multiwoven-integrations 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b4c9d23f9f2c64aa4e25911e6e399b2e316237902d742bb09cdc2fd4cb538666
4
- data.tar.gz: 3257d08bcdf074497cd26dca09b3d8367039cda5b1c6922dcc8d943a5e0d293d
3
+ metadata.gz: cb9f117bf574599b93ccfd2238f1629166c473184837ec48a16a447650b1dc54
4
+ data.tar.gz: efa39262c596d2b50bbd04d2d8a9d98889b708a9e2859e6ec3801b2c6eb79169
5
5
  SHA512:
6
- metadata.gz: 29ba8c2cc87689a8bf51ac6ea0e0ab439e83197cd1aaa81b390928cf088651a827de5ce66361f13a3cada6c8252fbe38b4ec5cd5ebd8f73e6d1ec44f7f2e16a0
7
- data.tar.gz: c4361564c16167cc5f2c59b24c9ff644c59ea5fd6c09979d17dbcb11adc63f526e82cbc00783c5f23621ce5339e9a8f82f023c516234bea4f98f0bb3dcbbd7a1
6
+ metadata.gz: 959b53319cb4581fd8aeb2731891f967031a23096c42e1e855fa79802bf81e799a746ba62c1c08f20484c3a443418d2175e1d9218abd28939eceaf89b6f87964
7
+ data.tar.gz: 22c706c8d47d9803cbdee5fa91a8f6862f7d0efb11c13466e2b0b0e07753a5e784e43443487a939a05f0cee0457bf310d12ce274e24ad322e5aa2cf8767c05ea
data/README.md CHANGED
@@ -43,6 +43,23 @@ Multiwoven integrations is the collection of connectors built on top of [Multiwo
43
43
  Multiwoven protocol is an open source standard for moving data between data sources to any third-part destinations.
44
44
  Anyone can build a connetor with basic ruby knowledge using the protocol.
45
45
 
46
+ ## Prerequisites
47
+
48
+ Before you begin the installation, ensure you have the following dependencies installed:
49
+
50
+ - **MySQL Client**
51
+ - Command: `brew install mysql-client`
52
+ - Description: Required for database interactions.
53
+
54
+ - **Zstandard (zstd)**
55
+ - Command: `brew install zstd`
56
+ - Description: Needed for data compression and decompression.
57
+
58
+ - **OpenSSL 3**
59
+ - Command: `brew install openssl@3`
60
+ - Description: Essential for secure communication.
61
+
62
+
46
63
  ### Installation
47
64
 
48
65
  Install the gem and add to the application's Gemfile by executing:
@@ -34,6 +34,9 @@ module Multiwoven
34
34
  AIRTABLE_BASES_ENDPOINT = "https://api.airtable.com/v0/meta/bases"
35
35
  AIRTABLE_GET_BASE_SCHEMA_ENDPOINT = "https://api.airtable.com/v0/meta/bases/{baseId}/tables"
36
36
 
37
+ AWS_ACCESS_KEY_ID = ENV["AWS_ACCESS_KEY_ID"]
38
+ AWS_SECRET_ACCESS_KEY = ENV["AWS_SECRET_ACCESS_KEY"]
39
+
37
40
  # HTTP
38
41
  HTTP_GET = "GET"
39
42
  HTTP_POST = "POST"
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Multiwoven
4
4
  module Integrations
5
- VERSION = "0.3.2"
5
+ VERSION = "0.3.4"
6
6
 
7
7
  ENABLED_SOURCES = %w[
8
8
  Snowflake
@@ -4,13 +4,13 @@ module Multiwoven::Integrations::Source
4
4
  module AmazonS3
5
5
  include Multiwoven::Integrations::Core
6
6
  class Client < SourceConnector
7
- DISCOVER_QUERY = "SELECT * FROM S3Object LIMIT 1;"
8
-
7
+ @session_name = ""
9
8
  def check_connection(connection_config)
10
9
  connection_config = connection_config.with_indifferent_access
11
- auth_data = get_auth_data(connection_config)
12
- client = config_aws(auth_data, connection_config[:region])
13
- client.get_bucket_location({ bucket: connection_config[:bucket] })
10
+ @session_name = "connection-#{connection_config[:region]}-#{connection_config[:bucket]}"
11
+ conn = create_connection(connection_config)
12
+ path = build_path(connection_config)
13
+ get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
14
14
  ConnectionStatus.new(status: ConnectionStatusType["succeeded"]).to_multiwoven_message
15
15
  rescue StandardError => e
16
16
  ConnectionStatus.new(status: ConnectionStatusType["failed"], message: e.message).to_multiwoven_message
@@ -18,17 +18,13 @@ module Multiwoven::Integrations::Source
18
18
 
19
19
  def discover(connection_config)
20
20
  connection_config = connection_config.with_indifferent_access
21
- auth_data = get_auth_data(connection_config)
22
- connection_config[:access_id] = auth_data.credentials.access_key_id
23
- connection_config[:secret_access] = auth_data.credentials.secret_access_key
24
- connection_config[:session_token] = auth_data.credentials.session_token
21
+ @session_name = "discover-#{connection_config[:region]}-#{connection_config[:bucket]}"
25
22
  conn = create_connection(connection_config)
26
23
  # If pulling from multiple files, all files must have the same schema
27
- path = build_path(connection_config[:path])
28
- full_path = "s3://#{connection_config[:bucket]}/#{path}*.#{connection_config[:file_type]}"
29
- records = get_results(conn, "DESCRIBE SELECT * FROM '#{full_path}';")
24
+ path = build_path(connection_config)
25
+ records = get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
30
26
  columns = build_discover_columns(records)
31
- streams = [Multiwoven::Integrations::Protocol::Stream.new(name: full_path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
27
+ streams = [Multiwoven::Integrations::Protocol::Stream.new(name: path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
32
28
  catalog = Catalog.new(streams: streams)
33
29
  catalog.to_multiwoven_message
34
30
  rescue StandardError => e
@@ -37,10 +33,7 @@ module Multiwoven::Integrations::Source
37
33
 
38
34
  def read(sync_config)
39
35
  connection_config = sync_config.source.connection_specification.with_indifferent_access
40
- auth_data = get_auth_data(connection_config)
41
- connection_config[:access_id] = auth_data.credentials.access_key_id
42
- connection_config[:secret_access] = auth_data.credentials.secret_access_key
43
- connection_config[:session_token] = auth_data.credentials.session_token
36
+ @session_name = "#{sync_config.sync_id}-#{sync_config.source.name}-#{sync_config.destination.name}"
44
37
  conn = create_connection(connection_config)
45
38
  query = sync_config.model.query
46
39
  query = batched_query(query, sync_config.limit, sync_config.offset) unless sync_config.limit.nil? && sync_config.offset.nil?
@@ -57,38 +50,49 @@ module Multiwoven::Integrations::Source
57
50
  private
58
51
 
59
52
  def get_auth_data(connection_config)
53
+ session = @session_name.gsub(/\s+/, "-")
54
+ @session_name = ""
60
55
  if connection_config[:auth_type] == "user"
61
56
  Aws::Credentials.new(connection_config[:access_id], connection_config[:secret_access])
62
57
  elsif connection_config[:auth_type] == "role"
63
- sts_client = Aws::STS::Client.new(region: connection_config[:region])
64
- session_name = "s3-check-connection"
65
- sts_client.assume_role({
66
- role_arn: connection_config[:arn],
67
- role_session_name: session_name
68
- })
58
+ credentials = Aws::Credentials.new(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
59
+ sts_client = Aws::STS::Client.new(region: connection_config[:region], credentials: credentials)
60
+ resp = sts_client.assume_role({
61
+ role_arn: connection_config[:arn],
62
+ role_session_name: session,
63
+ external_id: connection_config[:external_id]
64
+ })
65
+ Aws::Credentials.new(
66
+ resp.credentials.access_key_id,
67
+ resp.credentials.secret_access_key,
68
+ resp.credentials.session_token
69
+ )
69
70
  end
70
71
  end
71
72
 
72
- # DuckDB
73
73
  def create_connection(connection_config)
74
+ # In the case when previewing a query
75
+ @session_name = "preview-#{connection_config[:region]}-#{connection_config[:bucket]}" if @session_name.to_s.empty?
76
+ auth_data = get_auth_data(connection_config)
74
77
  conn = DuckDB::Database.open.connect
75
78
  # Set up S3 configuration
76
79
  secret_query = "
77
80
  CREATE SECRET amazons3_source (
78
81
  TYPE S3,
79
- KEY_ID '#{connection_config[:access_id]}',
80
- SECRET '#{connection_config[:secret_access]}',
82
+ KEY_ID '#{auth_data.credentials.access_key_id}',
83
+ SECRET '#{auth_data.credentials.secret_access_key}',
81
84
  REGION '#{connection_config[:region]}',
82
- SESSION_TOKEN '#{connection_config[:session_token]}'
85
+ SESSION_TOKEN '#{auth_data.credentials.session_token}'
83
86
  );
84
87
  "
85
88
  get_results(conn, secret_query)
86
89
  conn
87
90
  end
88
91
 
89
- def build_path(path)
90
- path = "#{path}/" if !path.to_s.strip.empty? && path[-1] != "/"
91
- path
92
+ def build_path(connection_config)
93
+ path = connection_config[:path]
94
+ path = "#{path}/" if path.to_s.strip.empty? || path[-1] != "/"
95
+ "s3://#{connection_config[:bucket]}#{path}*.#{connection_config[:file_type]}"
92
96
  end
93
97
 
94
98
  def get_results(conn, query)
@@ -132,41 +136,6 @@ module Multiwoven::Integrations::Source
132
136
  "boolean"
133
137
  end
134
138
  end
135
-
136
- # AWS SDK
137
- def config_aws(config, region)
138
- Aws.config.update({
139
- region: region,
140
- credentials: config
141
- })
142
- Aws::S3::Client.new
143
- end
144
-
145
- def build_select_content_options(config, query)
146
- config = config.with_indifferent_access
147
- bucket_name = config[:bucket]
148
- file_key = config[:file_key]
149
- file_type = config[:file_type]
150
- options = {
151
- bucket: bucket_name,
152
- key: file_key,
153
- expression_type: "SQL",
154
- expression: query,
155
- output_serialization: {
156
- json: {}
157
- }
158
- }
159
- if file_type == "parquet"
160
- options[:input_serialization] = {
161
- parquet: {}
162
- }
163
- elsif file_type == "csv"
164
- options[:input_serialization] = {
165
- csv: { file_header_info: "USE" }
166
- }
167
- end
168
- options
169
- end
170
139
  end
171
140
  end
172
141
  end
@@ -29,6 +29,7 @@
29
29
  "region",
30
30
  "bucket",
31
31
  "arn",
32
+ "external_id",
32
33
  "file_type"
33
34
  ]
34
35
  },
@@ -53,16 +54,22 @@
53
54
  "title": "IAM Role ARN",
54
55
  "order": 1
55
56
  },
57
+ "external_id": {
58
+ "type": "string",
59
+ "title": "External Id",
60
+ "description": "Unique ID that allows handshake between AWS accounts.",
61
+ "order": 2
62
+ },
56
63
  "access_id": {
57
64
  "type": "string",
58
65
  "title": "Access Id",
59
- "order": 2
66
+ "order": 3
60
67
  },
61
68
  "secret_access": {
62
69
  "type": "string",
63
70
  "title": "Secret Access",
64
71
  "multiwoven_secret": true,
65
- "order": 3
72
+ "order": 4
66
73
  },
67
74
  "region": {
68
75
  "description": "AWS region",
@@ -71,13 +78,13 @@
71
78
  ],
72
79
  "type": "string",
73
80
  "title": "Region",
74
- "order": 4
81
+ "order": 5
75
82
  },
76
83
  "bucket": {
77
84
  "description": "Bucket Name",
78
85
  "type": "string",
79
86
  "title": "Bucket",
80
- "order": 5
87
+ "order": 6
81
88
  },
82
89
  "path": {
83
90
  "description": "Path to csv or parquet files",
@@ -86,7 +93,7 @@
86
93
  ],
87
94
  "type": "string",
88
95
  "title": "Path",
89
- "order": 6
96
+ "order": 7
90
97
  },
91
98
  "file_type": {
92
99
  "description": "The type of file to read",
@@ -96,7 +103,7 @@
96
103
  "csv",
97
104
  "parquet"
98
105
  ],
99
- "order": 7
106
+ "order": 8
100
107
  }
101
108
  }
102
109
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multiwoven-integrations
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Subin T P
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-06-27 00:00:00.000000000 Z
11
+ date: 2024-07-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport