multiwoven-integrations 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b4c9d23f9f2c64aa4e25911e6e399b2e316237902d742bb09cdc2fd4cb538666
4
- data.tar.gz: 3257d08bcdf074497cd26dca09b3d8367039cda5b1c6922dcc8d943a5e0d293d
3
+ metadata.gz: e8263bcb86ca81c8262642cd939c8079e671e7cb03a29a905c24f564888038c5
4
+ data.tar.gz: cf5ba4ee0ad1f25967592e72369b3e0452d09715a036b70685d48ee4f29568de
5
5
  SHA512:
6
- metadata.gz: 29ba8c2cc87689a8bf51ac6ea0e0ab439e83197cd1aaa81b390928cf088651a827de5ce66361f13a3cada6c8252fbe38b4ec5cd5ebd8f73e6d1ec44f7f2e16a0
7
- data.tar.gz: c4361564c16167cc5f2c59b24c9ff644c59ea5fd6c09979d17dbcb11adc63f526e82cbc00783c5f23621ce5339e9a8f82f023c516234bea4f98f0bb3dcbbd7a1
6
+ metadata.gz: a0c976f1fd891d8474dbc113bce838c0a566e0127e49829c61c37042b479b3303185193beb66a204a752a87f8a7ec82b512d51fba5437e5239e1fc8a889ddcb9
7
+ data.tar.gz: 0ecd0c38c46db77c5e6c9ea6d8ef94cfd3325f7cb9e5f1a2d86ae6d22c307305595cac210d5d6710843806d8c8891c1b04bc7cf663efcb7e5b42cd3bbe45e39f
data/README.md CHANGED
@@ -43,6 +43,23 @@ Multiwoven integrations is the collection of connectors built on top of [Multiwo
43
43
  Multiwoven protocol is an open source standard for moving data between data sources to any third-part destinations.
44
44
  Anyone can build a connetor with basic ruby knowledge using the protocol.
45
45
 
46
+ ## Prerequisites
47
+
48
+ Before you begin the installation, ensure you have the following dependencies installed:
49
+
50
+ - **MySQL Client**
51
+ - Command: `brew install mysql-client`
52
+ - Description: Required for database interactions.
53
+
54
+ - **Zstandard (zstd)**
55
+ - Command: `brew install zstd`
56
+ - Description: Needed for data compression and decompression.
57
+
58
+ - **OpenSSL 3**
59
+ - Command: `brew install openssl@3`
60
+ - Description: Essential for secure communication.
61
+
62
+
46
63
  ### Installation
47
64
 
48
65
  Install the gem and add to the application's Gemfile by executing:
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Multiwoven
4
4
  module Integrations
5
- VERSION = "0.3.2"
5
+ VERSION = "0.3.3"
6
6
 
7
7
  ENABLED_SOURCES = %w[
8
8
  Snowflake
@@ -4,13 +4,13 @@ module Multiwoven::Integrations::Source
4
4
  module AmazonS3
5
5
  include Multiwoven::Integrations::Core
6
6
  class Client < SourceConnector
7
- DISCOVER_QUERY = "SELECT * FROM S3Object LIMIT 1;"
8
-
7
+ @session_name = ""
9
8
  def check_connection(connection_config)
10
9
  connection_config = connection_config.with_indifferent_access
11
- auth_data = get_auth_data(connection_config)
12
- client = config_aws(auth_data, connection_config[:region])
13
- client.get_bucket_location({ bucket: connection_config[:bucket] })
10
+ @session_name = "connection-#{connection_config[:region]}-#{connection_config[:bucket]}"
11
+ conn = create_connection(connection_config)
12
+ path = build_path(connection_config)
13
+ get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
14
14
  ConnectionStatus.new(status: ConnectionStatusType["succeeded"]).to_multiwoven_message
15
15
  rescue StandardError => e
16
16
  ConnectionStatus.new(status: ConnectionStatusType["failed"], message: e.message).to_multiwoven_message
@@ -18,17 +18,13 @@ module Multiwoven::Integrations::Source
18
18
 
19
19
  def discover(connection_config)
20
20
  connection_config = connection_config.with_indifferent_access
21
- auth_data = get_auth_data(connection_config)
22
- connection_config[:access_id] = auth_data.credentials.access_key_id
23
- connection_config[:secret_access] = auth_data.credentials.secret_access_key
24
- connection_config[:session_token] = auth_data.credentials.session_token
21
+ @session_name = "discover-#{connection_config[:region]}-#{connection_config[:bucket]}"
25
22
  conn = create_connection(connection_config)
26
23
  # If pulling from multiple files, all files must have the same schema
27
- path = build_path(connection_config[:path])
28
- full_path = "s3://#{connection_config[:bucket]}/#{path}*.#{connection_config[:file_type]}"
29
- records = get_results(conn, "DESCRIBE SELECT * FROM '#{full_path}';")
24
+ path = build_path(connection_config)
25
+ records = get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
30
26
  columns = build_discover_columns(records)
31
- streams = [Multiwoven::Integrations::Protocol::Stream.new(name: full_path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
27
+ streams = [Multiwoven::Integrations::Protocol::Stream.new(name: path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
32
28
  catalog = Catalog.new(streams: streams)
33
29
  catalog.to_multiwoven_message
34
30
  rescue StandardError => e
@@ -37,10 +33,7 @@ module Multiwoven::Integrations::Source
37
33
 
38
34
  def read(sync_config)
39
35
  connection_config = sync_config.source.connection_specification.with_indifferent_access
40
- auth_data = get_auth_data(connection_config)
41
- connection_config[:access_id] = auth_data.credentials.access_key_id
42
- connection_config[:secret_access] = auth_data.credentials.secret_access_key
43
- connection_config[:session_token] = auth_data.credentials.session_token
36
+ @session_name = "#{sync_config.sync_id}-#{sync_config.source.name}-#{sync_config.destination.name}"
44
37
  conn = create_connection(connection_config)
45
38
  query = sync_config.model.query
46
39
  query = batched_query(query, sync_config.limit, sync_config.offset) unless sync_config.limit.nil? && sync_config.offset.nil?
@@ -57,38 +50,47 @@ module Multiwoven::Integrations::Source
57
50
  private
58
51
 
59
52
  def get_auth_data(connection_config)
53
+ session = @session_name
54
+ @session_name = ""
60
55
  if connection_config[:auth_type] == "user"
61
56
  Aws::Credentials.new(connection_config[:access_id], connection_config[:secret_access])
62
57
  elsif connection_config[:auth_type] == "role"
63
58
  sts_client = Aws::STS::Client.new(region: connection_config[:region])
64
- session_name = "s3-check-connection"
65
- sts_client.assume_role({
66
- role_arn: connection_config[:arn],
67
- role_session_name: session_name
68
- })
59
+ resp = sts_client.assume_role({
60
+ role_arn: connection_config[:arn],
61
+ role_session_name: session
62
+ })
63
+ Aws::Credentials.new(
64
+ resp.credentials.access_key_id,
65
+ resp.credentials.secret_access_key,
66
+ resp.credentials.session_token
67
+ )
69
68
  end
70
69
  end
71
70
 
72
- # DuckDB
73
71
  def create_connection(connection_config)
72
+ # In the case when previewing a query
73
+ @session_name = "preview-#{connection_config[:region]}-#{connection_config[:bucket]}" if @session_name.to_s.empty?
74
+ auth_data = get_auth_data(connection_config)
74
75
  conn = DuckDB::Database.open.connect
75
76
  # Set up S3 configuration
76
77
  secret_query = "
77
78
  CREATE SECRET amazons3_source (
78
79
  TYPE S3,
79
- KEY_ID '#{connection_config[:access_id]}',
80
- SECRET '#{connection_config[:secret_access]}',
80
+ KEY_ID '#{auth_data.credentials.access_key_id}',
81
+ SECRET '#{auth_data.credentials.secret_access_key}',
81
82
  REGION '#{connection_config[:region]}',
82
- SESSION_TOKEN '#{connection_config[:session_token]}'
83
+ SESSION_TOKEN '#{auth_data.credentials.session_token}'
83
84
  );
84
85
  "
85
86
  get_results(conn, secret_query)
86
87
  conn
87
88
  end
88
89
 
89
- def build_path(path)
90
- path = "#{path}/" if !path.to_s.strip.empty? && path[-1] != "/"
91
- path
90
+ def build_path(connection_config)
91
+ path = connection_config[:path]
92
+ path = "#{path}/" if path.to_s.strip.empty? || path[-1] != "/"
93
+ "s3://#{connection_config[:bucket]}#{path}*.#{connection_config[:file_type]}"
92
94
  end
93
95
 
94
96
  def get_results(conn, query)
@@ -132,41 +134,6 @@ module Multiwoven::Integrations::Source
132
134
  "boolean"
133
135
  end
134
136
  end
135
-
136
- # AWS SDK
137
- def config_aws(config, region)
138
- Aws.config.update({
139
- region: region,
140
- credentials: config
141
- })
142
- Aws::S3::Client.new
143
- end
144
-
145
- def build_select_content_options(config, query)
146
- config = config.with_indifferent_access
147
- bucket_name = config[:bucket]
148
- file_key = config[:file_key]
149
- file_type = config[:file_type]
150
- options = {
151
- bucket: bucket_name,
152
- key: file_key,
153
- expression_type: "SQL",
154
- expression: query,
155
- output_serialization: {
156
- json: {}
157
- }
158
- }
159
- if file_type == "parquet"
160
- options[:input_serialization] = {
161
- parquet: {}
162
- }
163
- elsif file_type == "csv"
164
- options[:input_serialization] = {
165
- csv: { file_header_info: "USE" }
166
- }
167
- end
168
- options
169
- end
170
137
  end
171
138
  end
172
139
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multiwoven-integrations
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Subin T P
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-06-27 00:00:00.000000000 Z
11
+ date: 2024-06-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport