multiwoven-integrations 0.3.2 → 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b4c9d23f9f2c64aa4e25911e6e399b2e316237902d742bb09cdc2fd4cb538666
4
- data.tar.gz: 3257d08bcdf074497cd26dca09b3d8367039cda5b1c6922dcc8d943a5e0d293d
3
+ metadata.gz: cb9f117bf574599b93ccfd2238f1629166c473184837ec48a16a447650b1dc54
4
+ data.tar.gz: efa39262c596d2b50bbd04d2d8a9d98889b708a9e2859e6ec3801b2c6eb79169
5
5
  SHA512:
6
- metadata.gz: 29ba8c2cc87689a8bf51ac6ea0e0ab439e83197cd1aaa81b390928cf088651a827de5ce66361f13a3cada6c8252fbe38b4ec5cd5ebd8f73e6d1ec44f7f2e16a0
7
- data.tar.gz: c4361564c16167cc5f2c59b24c9ff644c59ea5fd6c09979d17dbcb11adc63f526e82cbc00783c5f23621ce5339e9a8f82f023c516234bea4f98f0bb3dcbbd7a1
6
+ metadata.gz: 959b53319cb4581fd8aeb2731891f967031a23096c42e1e855fa79802bf81e799a746ba62c1c08f20484c3a443418d2175e1d9218abd28939eceaf89b6f87964
7
+ data.tar.gz: 22c706c8d47d9803cbdee5fa91a8f6862f7d0efb11c13466e2b0b0e07753a5e784e43443487a939a05f0cee0457bf310d12ce274e24ad322e5aa2cf8767c05ea
data/README.md CHANGED
@@ -43,6 +43,23 @@ Multiwoven integrations is the collection of connectors built on top of [Multiwo
43
43
  Multiwoven protocol is an open source standard for moving data between data sources to any third-part destinations.
44
44
  Anyone can build a connetor with basic ruby knowledge using the protocol.
45
45
 
46
+ ## Prerequisites
47
+
48
+ Before you begin the installation, ensure you have the following dependencies installed:
49
+
50
+ - **MySQL Client**
51
+ - Command: `brew install mysql-client`
52
+ - Description: Required for database interactions.
53
+
54
+ - **Zstandard (zstd)**
55
+ - Command: `brew install zstd`
56
+ - Description: Needed for data compression and decompression.
57
+
58
+ - **OpenSSL 3**
59
+ - Command: `brew install openssl@3`
60
+ - Description: Essential for secure communication.
61
+
62
+
46
63
  ### Installation
47
64
 
48
65
  Install the gem and add to the application's Gemfile by executing:
@@ -34,6 +34,9 @@ module Multiwoven
34
34
  AIRTABLE_BASES_ENDPOINT = "https://api.airtable.com/v0/meta/bases"
35
35
  AIRTABLE_GET_BASE_SCHEMA_ENDPOINT = "https://api.airtable.com/v0/meta/bases/{baseId}/tables"
36
36
 
37
+ AWS_ACCESS_KEY_ID = ENV["AWS_ACCESS_KEY_ID"]
38
+ AWS_SECRET_ACCESS_KEY = ENV["AWS_SECRET_ACCESS_KEY"]
39
+
37
40
  # HTTP
38
41
  HTTP_GET = "GET"
39
42
  HTTP_POST = "POST"
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Multiwoven
4
4
  module Integrations
5
- VERSION = "0.3.2"
5
+ VERSION = "0.3.4"
6
6
 
7
7
  ENABLED_SOURCES = %w[
8
8
  Snowflake
@@ -4,13 +4,13 @@ module Multiwoven::Integrations::Source
4
4
  module AmazonS3
5
5
  include Multiwoven::Integrations::Core
6
6
  class Client < SourceConnector
7
- DISCOVER_QUERY = "SELECT * FROM S3Object LIMIT 1;"
8
-
7
+ @session_name = ""
9
8
  def check_connection(connection_config)
10
9
  connection_config = connection_config.with_indifferent_access
11
- auth_data = get_auth_data(connection_config)
12
- client = config_aws(auth_data, connection_config[:region])
13
- client.get_bucket_location({ bucket: connection_config[:bucket] })
10
+ @session_name = "connection-#{connection_config[:region]}-#{connection_config[:bucket]}"
11
+ conn = create_connection(connection_config)
12
+ path = build_path(connection_config)
13
+ get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
14
14
  ConnectionStatus.new(status: ConnectionStatusType["succeeded"]).to_multiwoven_message
15
15
  rescue StandardError => e
16
16
  ConnectionStatus.new(status: ConnectionStatusType["failed"], message: e.message).to_multiwoven_message
@@ -18,17 +18,13 @@ module Multiwoven::Integrations::Source
18
18
 
19
19
  def discover(connection_config)
20
20
  connection_config = connection_config.with_indifferent_access
21
- auth_data = get_auth_data(connection_config)
22
- connection_config[:access_id] = auth_data.credentials.access_key_id
23
- connection_config[:secret_access] = auth_data.credentials.secret_access_key
24
- connection_config[:session_token] = auth_data.credentials.session_token
21
+ @session_name = "discover-#{connection_config[:region]}-#{connection_config[:bucket]}"
25
22
  conn = create_connection(connection_config)
26
23
  # If pulling from multiple files, all files must have the same schema
27
- path = build_path(connection_config[:path])
28
- full_path = "s3://#{connection_config[:bucket]}/#{path}*.#{connection_config[:file_type]}"
29
- records = get_results(conn, "DESCRIBE SELECT * FROM '#{full_path}';")
24
+ path = build_path(connection_config)
25
+ records = get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
30
26
  columns = build_discover_columns(records)
31
- streams = [Multiwoven::Integrations::Protocol::Stream.new(name: full_path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
27
+ streams = [Multiwoven::Integrations::Protocol::Stream.new(name: path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
32
28
  catalog = Catalog.new(streams: streams)
33
29
  catalog.to_multiwoven_message
34
30
  rescue StandardError => e
@@ -37,10 +33,7 @@ module Multiwoven::Integrations::Source
37
33
 
38
34
  def read(sync_config)
39
35
  connection_config = sync_config.source.connection_specification.with_indifferent_access
40
- auth_data = get_auth_data(connection_config)
41
- connection_config[:access_id] = auth_data.credentials.access_key_id
42
- connection_config[:secret_access] = auth_data.credentials.secret_access_key
43
- connection_config[:session_token] = auth_data.credentials.session_token
36
+ @session_name = "#{sync_config.sync_id}-#{sync_config.source.name}-#{sync_config.destination.name}"
44
37
  conn = create_connection(connection_config)
45
38
  query = sync_config.model.query
46
39
  query = batched_query(query, sync_config.limit, sync_config.offset) unless sync_config.limit.nil? && sync_config.offset.nil?
@@ -57,38 +50,49 @@ module Multiwoven::Integrations::Source
57
50
  private
58
51
 
59
52
  def get_auth_data(connection_config)
53
+ session = @session_name.gsub(/\s+/, "-")
54
+ @session_name = ""
60
55
  if connection_config[:auth_type] == "user"
61
56
  Aws::Credentials.new(connection_config[:access_id], connection_config[:secret_access])
62
57
  elsif connection_config[:auth_type] == "role"
63
- sts_client = Aws::STS::Client.new(region: connection_config[:region])
64
- session_name = "s3-check-connection"
65
- sts_client.assume_role({
66
- role_arn: connection_config[:arn],
67
- role_session_name: session_name
68
- })
58
+ credentials = Aws::Credentials.new(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
59
+ sts_client = Aws::STS::Client.new(region: connection_config[:region], credentials: credentials)
60
+ resp = sts_client.assume_role({
61
+ role_arn: connection_config[:arn],
62
+ role_session_name: session,
63
+ external_id: connection_config[:external_id]
64
+ })
65
+ Aws::Credentials.new(
66
+ resp.credentials.access_key_id,
67
+ resp.credentials.secret_access_key,
68
+ resp.credentials.session_token
69
+ )
69
70
  end
70
71
  end
71
72
 
72
- # DuckDB
73
73
  def create_connection(connection_config)
74
+ # In the case when previewing a query
75
+ @session_name = "preview-#{connection_config[:region]}-#{connection_config[:bucket]}" if @session_name.to_s.empty?
76
+ auth_data = get_auth_data(connection_config)
74
77
  conn = DuckDB::Database.open.connect
75
78
  # Set up S3 configuration
76
79
  secret_query = "
77
80
  CREATE SECRET amazons3_source (
78
81
  TYPE S3,
79
- KEY_ID '#{connection_config[:access_id]}',
80
- SECRET '#{connection_config[:secret_access]}',
82
+ KEY_ID '#{auth_data.credentials.access_key_id}',
83
+ SECRET '#{auth_data.credentials.secret_access_key}',
81
84
  REGION '#{connection_config[:region]}',
82
- SESSION_TOKEN '#{connection_config[:session_token]}'
85
+ SESSION_TOKEN '#{auth_data.credentials.session_token}'
83
86
  );
84
87
  "
85
88
  get_results(conn, secret_query)
86
89
  conn
87
90
  end
88
91
 
89
- def build_path(path)
90
- path = "#{path}/" if !path.to_s.strip.empty? && path[-1] != "/"
91
- path
92
+ def build_path(connection_config)
93
+ path = connection_config[:path]
94
+ path = "#{path}/" if path.to_s.strip.empty? || path[-1] != "/"
95
+ "s3://#{connection_config[:bucket]}#{path}*.#{connection_config[:file_type]}"
92
96
  end
93
97
 
94
98
  def get_results(conn, query)
@@ -132,41 +136,6 @@ module Multiwoven::Integrations::Source
132
136
  "boolean"
133
137
  end
134
138
  end
135
-
136
- # AWS SDK
137
- def config_aws(config, region)
138
- Aws.config.update({
139
- region: region,
140
- credentials: config
141
- })
142
- Aws::S3::Client.new
143
- end
144
-
145
- def build_select_content_options(config, query)
146
- config = config.with_indifferent_access
147
- bucket_name = config[:bucket]
148
- file_key = config[:file_key]
149
- file_type = config[:file_type]
150
- options = {
151
- bucket: bucket_name,
152
- key: file_key,
153
- expression_type: "SQL",
154
- expression: query,
155
- output_serialization: {
156
- json: {}
157
- }
158
- }
159
- if file_type == "parquet"
160
- options[:input_serialization] = {
161
- parquet: {}
162
- }
163
- elsif file_type == "csv"
164
- options[:input_serialization] = {
165
- csv: { file_header_info: "USE" }
166
- }
167
- end
168
- options
169
- end
170
139
  end
171
140
  end
172
141
  end
@@ -29,6 +29,7 @@
29
29
  "region",
30
30
  "bucket",
31
31
  "arn",
32
+ "external_id",
32
33
  "file_type"
33
34
  ]
34
35
  },
@@ -53,16 +54,22 @@
53
54
  "title": "IAM Role ARN",
54
55
  "order": 1
55
56
  },
57
+ "external_id": {
58
+ "type": "string",
59
+ "title": "External Id",
60
+ "description": "Unique ID that allows handshake between AWS accounts.",
61
+ "order": 2
62
+ },
56
63
  "access_id": {
57
64
  "type": "string",
58
65
  "title": "Access Id",
59
- "order": 2
66
+ "order": 3
60
67
  },
61
68
  "secret_access": {
62
69
  "type": "string",
63
70
  "title": "Secret Access",
64
71
  "multiwoven_secret": true,
65
- "order": 3
72
+ "order": 4
66
73
  },
67
74
  "region": {
68
75
  "description": "AWS region",
@@ -71,13 +78,13 @@
71
78
  ],
72
79
  "type": "string",
73
80
  "title": "Region",
74
- "order": 4
81
+ "order": 5
75
82
  },
76
83
  "bucket": {
77
84
  "description": "Bucket Name",
78
85
  "type": "string",
79
86
  "title": "Bucket",
80
- "order": 5
87
+ "order": 6
81
88
  },
82
89
  "path": {
83
90
  "description": "Path to csv or parquet files",
@@ -86,7 +93,7 @@
86
93
  ],
87
94
  "type": "string",
88
95
  "title": "Path",
89
- "order": 6
96
+ "order": 7
90
97
  },
91
98
  "file_type": {
92
99
  "description": "The type of file to read",
@@ -96,7 +103,7 @@
96
103
  "csv",
97
104
  "parquet"
98
105
  ],
99
- "order": 7
106
+ "order": 8
100
107
  }
101
108
  }
102
109
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multiwoven-integrations
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Subin T P
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-06-27 00:00:00.000000000 Z
11
+ date: 2024-07-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport