multiwoven-integrations 0.3.2 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b4c9d23f9f2c64aa4e25911e6e399b2e316237902d742bb09cdc2fd4cb538666
4
- data.tar.gz: 3257d08bcdf074497cd26dca09b3d8367039cda5b1c6922dcc8d943a5e0d293d
3
+ metadata.gz: e8263bcb86ca81c8262642cd939c8079e671e7cb03a29a905c24f564888038c5
4
+ data.tar.gz: cf5ba4ee0ad1f25967592e72369b3e0452d09715a036b70685d48ee4f29568de
5
5
  SHA512:
6
- metadata.gz: 29ba8c2cc87689a8bf51ac6ea0e0ab439e83197cd1aaa81b390928cf088651a827de5ce66361f13a3cada6c8252fbe38b4ec5cd5ebd8f73e6d1ec44f7f2e16a0
7
- data.tar.gz: c4361564c16167cc5f2c59b24c9ff644c59ea5fd6c09979d17dbcb11adc63f526e82cbc00783c5f23621ce5339e9a8f82f023c516234bea4f98f0bb3dcbbd7a1
6
+ metadata.gz: a0c976f1fd891d8474dbc113bce838c0a566e0127e49829c61c37042b479b3303185193beb66a204a752a87f8a7ec82b512d51fba5437e5239e1fc8a889ddcb9
7
+ data.tar.gz: 0ecd0c38c46db77c5e6c9ea6d8ef94cfd3325f7cb9e5f1a2d86ae6d22c307305595cac210d5d6710843806d8c8891c1b04bc7cf663efcb7e5b42cd3bbe45e39f
data/README.md CHANGED
@@ -43,6 +43,23 @@ Multiwoven integrations is the collection of connectors built on top of [Multiwo
43
43
  Multiwoven protocol is an open source standard for moving data between data sources to any third-part destinations.
44
44
  Anyone can build a connetor with basic ruby knowledge using the protocol.
45
45
 
46
+ ## Prerequisites
47
+
48
+ Before you begin the installation, ensure you have the following dependencies installed:
49
+
50
+ - **MySQL Client**
51
+ - Command: `brew install mysql-client`
52
+ - Description: Required for database interactions.
53
+
54
+ - **Zstandard (zstd)**
55
+ - Command: `brew install zstd`
56
+ - Description: Needed for data compression and decompression.
57
+
58
+ - **OpenSSL 3**
59
+ - Command: `brew install openssl@3`
60
+ - Description: Essential for secure communication.
61
+
62
+
46
63
  ### Installation
47
64
 
48
65
  Install the gem and add to the application's Gemfile by executing:
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Multiwoven
4
4
  module Integrations
5
- VERSION = "0.3.2"
5
+ VERSION = "0.3.3"
6
6
 
7
7
  ENABLED_SOURCES = %w[
8
8
  Snowflake
@@ -4,13 +4,13 @@ module Multiwoven::Integrations::Source
4
4
  module AmazonS3
5
5
  include Multiwoven::Integrations::Core
6
6
  class Client < SourceConnector
7
- DISCOVER_QUERY = "SELECT * FROM S3Object LIMIT 1;"
8
-
7
+ @session_name = ""
9
8
  def check_connection(connection_config)
10
9
  connection_config = connection_config.with_indifferent_access
11
- auth_data = get_auth_data(connection_config)
12
- client = config_aws(auth_data, connection_config[:region])
13
- client.get_bucket_location({ bucket: connection_config[:bucket] })
10
+ @session_name = "connection-#{connection_config[:region]}-#{connection_config[:bucket]}"
11
+ conn = create_connection(connection_config)
12
+ path = build_path(connection_config)
13
+ get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
14
14
  ConnectionStatus.new(status: ConnectionStatusType["succeeded"]).to_multiwoven_message
15
15
  rescue StandardError => e
16
16
  ConnectionStatus.new(status: ConnectionStatusType["failed"], message: e.message).to_multiwoven_message
@@ -18,17 +18,13 @@ module Multiwoven::Integrations::Source
18
18
 
19
19
  def discover(connection_config)
20
20
  connection_config = connection_config.with_indifferent_access
21
- auth_data = get_auth_data(connection_config)
22
- connection_config[:access_id] = auth_data.credentials.access_key_id
23
- connection_config[:secret_access] = auth_data.credentials.secret_access_key
24
- connection_config[:session_token] = auth_data.credentials.session_token
21
+ @session_name = "discover-#{connection_config[:region]}-#{connection_config[:bucket]}"
25
22
  conn = create_connection(connection_config)
26
23
  # If pulling from multiple files, all files must have the same schema
27
- path = build_path(connection_config[:path])
28
- full_path = "s3://#{connection_config[:bucket]}/#{path}*.#{connection_config[:file_type]}"
29
- records = get_results(conn, "DESCRIBE SELECT * FROM '#{full_path}';")
24
+ path = build_path(connection_config)
25
+ records = get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
30
26
  columns = build_discover_columns(records)
31
- streams = [Multiwoven::Integrations::Protocol::Stream.new(name: full_path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
27
+ streams = [Multiwoven::Integrations::Protocol::Stream.new(name: path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
32
28
  catalog = Catalog.new(streams: streams)
33
29
  catalog.to_multiwoven_message
34
30
  rescue StandardError => e
@@ -37,10 +33,7 @@ module Multiwoven::Integrations::Source
37
33
 
38
34
  def read(sync_config)
39
35
  connection_config = sync_config.source.connection_specification.with_indifferent_access
40
- auth_data = get_auth_data(connection_config)
41
- connection_config[:access_id] = auth_data.credentials.access_key_id
42
- connection_config[:secret_access] = auth_data.credentials.secret_access_key
43
- connection_config[:session_token] = auth_data.credentials.session_token
36
+ @session_name = "#{sync_config.sync_id}-#{sync_config.source.name}-#{sync_config.destination.name}"
44
37
  conn = create_connection(connection_config)
45
38
  query = sync_config.model.query
46
39
  query = batched_query(query, sync_config.limit, sync_config.offset) unless sync_config.limit.nil? && sync_config.offset.nil?
@@ -57,38 +50,47 @@ module Multiwoven::Integrations::Source
57
50
  private
58
51
 
59
52
  def get_auth_data(connection_config)
53
+ session = @session_name
54
+ @session_name = ""
60
55
  if connection_config[:auth_type] == "user"
61
56
  Aws::Credentials.new(connection_config[:access_id], connection_config[:secret_access])
62
57
  elsif connection_config[:auth_type] == "role"
63
58
  sts_client = Aws::STS::Client.new(region: connection_config[:region])
64
- session_name = "s3-check-connection"
65
- sts_client.assume_role({
66
- role_arn: connection_config[:arn],
67
- role_session_name: session_name
68
- })
59
+ resp = sts_client.assume_role({
60
+ role_arn: connection_config[:arn],
61
+ role_session_name: session
62
+ })
63
+ Aws::Credentials.new(
64
+ resp.credentials.access_key_id,
65
+ resp.credentials.secret_access_key,
66
+ resp.credentials.session_token
67
+ )
69
68
  end
70
69
  end
71
70
 
72
- # DuckDB
73
71
  def create_connection(connection_config)
72
+ # In the case when previewing a query
73
+ @session_name = "preview-#{connection_config[:region]}-#{connection_config[:bucket]}" if @session_name.to_s.empty?
74
+ auth_data = get_auth_data(connection_config)
74
75
  conn = DuckDB::Database.open.connect
75
76
  # Set up S3 configuration
76
77
  secret_query = "
77
78
  CREATE SECRET amazons3_source (
78
79
  TYPE S3,
79
- KEY_ID '#{connection_config[:access_id]}',
80
- SECRET '#{connection_config[:secret_access]}',
80
+ KEY_ID '#{auth_data.credentials.access_key_id}',
81
+ SECRET '#{auth_data.credentials.secret_access_key}',
81
82
  REGION '#{connection_config[:region]}',
82
- SESSION_TOKEN '#{connection_config[:session_token]}'
83
+ SESSION_TOKEN '#{auth_data.credentials.session_token}'
83
84
  );
84
85
  "
85
86
  get_results(conn, secret_query)
86
87
  conn
87
88
  end
88
89
 
89
- def build_path(path)
90
- path = "#{path}/" if !path.to_s.strip.empty? && path[-1] != "/"
91
- path
90
+ def build_path(connection_config)
91
+ path = connection_config[:path]
92
+ path = "#{path}/" if path.to_s.strip.empty? || path[-1] != "/"
93
+ "s3://#{connection_config[:bucket]}#{path}*.#{connection_config[:file_type]}"
92
94
  end
93
95
 
94
96
  def get_results(conn, query)
@@ -132,41 +134,6 @@ module Multiwoven::Integrations::Source
132
134
  "boolean"
133
135
  end
134
136
  end
135
-
136
- # AWS SDK
137
- def config_aws(config, region)
138
- Aws.config.update({
139
- region: region,
140
- credentials: config
141
- })
142
- Aws::S3::Client.new
143
- end
144
-
145
- def build_select_content_options(config, query)
146
- config = config.with_indifferent_access
147
- bucket_name = config[:bucket]
148
- file_key = config[:file_key]
149
- file_type = config[:file_type]
150
- options = {
151
- bucket: bucket_name,
152
- key: file_key,
153
- expression_type: "SQL",
154
- expression: query,
155
- output_serialization: {
156
- json: {}
157
- }
158
- }
159
- if file_type == "parquet"
160
- options[:input_serialization] = {
161
- parquet: {}
162
- }
163
- elsif file_type == "csv"
164
- options[:input_serialization] = {
165
- csv: { file_header_info: "USE" }
166
- }
167
- end
168
- options
169
- end
170
137
  end
171
138
  end
172
139
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multiwoven-integrations
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Subin T P
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-06-27 00:00:00.000000000 Z
11
+ date: 2024-06-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport