multiwoven-integrations 0.3.2 → 0.3.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +17 -0
- data/lib/multiwoven/integrations/core/constants.rb +3 -0
- data/lib/multiwoven/integrations/rollout.rb +1 -1
- data/lib/multiwoven/integrations/source/amazon_s3/client.rb +34 -65
- data/lib/multiwoven/integrations/source/amazon_s3/config/spec.json +13 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cb9f117bf574599b93ccfd2238f1629166c473184837ec48a16a447650b1dc54
|
4
|
+
data.tar.gz: efa39262c596d2b50bbd04d2d8a9d98889b708a9e2859e6ec3801b2c6eb79169
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 959b53319cb4581fd8aeb2731891f967031a23096c42e1e855fa79802bf81e799a746ba62c1c08f20484c3a443418d2175e1d9218abd28939eceaf89b6f87964
|
7
|
+
data.tar.gz: 22c706c8d47d9803cbdee5fa91a8f6862f7d0efb11c13466e2b0b0e07753a5e784e43443487a939a05f0cee0457bf310d12ce274e24ad322e5aa2cf8767c05ea
|
data/README.md
CHANGED
@@ -43,6 +43,23 @@ Multiwoven integrations is the collection of connectors built on top of [Multiwo
|
|
43
43
|
Multiwoven protocol is an open source standard for moving data between data sources to any third-part destinations.
|
44
44
|
Anyone can build a connetor with basic ruby knowledge using the protocol.
|
45
45
|
|
46
|
+
## Prerequisites
|
47
|
+
|
48
|
+
Before you begin the installation, ensure you have the following dependencies installed:
|
49
|
+
|
50
|
+
- **MySQL Client**
|
51
|
+
- Command: `brew install mysql-client`
|
52
|
+
- Description: Required for database interactions.
|
53
|
+
|
54
|
+
- **Zstandard (zstd)**
|
55
|
+
- Command: `brew install zstd`
|
56
|
+
- Description: Needed for data compression and decompression.
|
57
|
+
|
58
|
+
- **OpenSSL 3**
|
59
|
+
- Command: `brew install openssl@3`
|
60
|
+
- Description: Essential for secure communication.
|
61
|
+
|
62
|
+
|
46
63
|
### Installation
|
47
64
|
|
48
65
|
Install the gem and add to the application's Gemfile by executing:
|
@@ -34,6 +34,9 @@ module Multiwoven
|
|
34
34
|
AIRTABLE_BASES_ENDPOINT = "https://api.airtable.com/v0/meta/bases"
|
35
35
|
AIRTABLE_GET_BASE_SCHEMA_ENDPOINT = "https://api.airtable.com/v0/meta/bases/{baseId}/tables"
|
36
36
|
|
37
|
+
AWS_ACCESS_KEY_ID = ENV["AWS_ACCESS_KEY_ID"]
|
38
|
+
AWS_SECRET_ACCESS_KEY = ENV["AWS_SECRET_ACCESS_KEY"]
|
39
|
+
|
37
40
|
# HTTP
|
38
41
|
HTTP_GET = "GET"
|
39
42
|
HTTP_POST = "POST"
|
@@ -4,13 +4,13 @@ module Multiwoven::Integrations::Source
|
|
4
4
|
module AmazonS3
|
5
5
|
include Multiwoven::Integrations::Core
|
6
6
|
class Client < SourceConnector
|
7
|
-
|
8
|
-
|
7
|
+
@session_name = ""
|
9
8
|
def check_connection(connection_config)
|
10
9
|
connection_config = connection_config.with_indifferent_access
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
@session_name = "connection-#{connection_config[:region]}-#{connection_config[:bucket]}"
|
11
|
+
conn = create_connection(connection_config)
|
12
|
+
path = build_path(connection_config)
|
13
|
+
get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
|
14
14
|
ConnectionStatus.new(status: ConnectionStatusType["succeeded"]).to_multiwoven_message
|
15
15
|
rescue StandardError => e
|
16
16
|
ConnectionStatus.new(status: ConnectionStatusType["failed"], message: e.message).to_multiwoven_message
|
@@ -18,17 +18,13 @@ module Multiwoven::Integrations::Source
|
|
18
18
|
|
19
19
|
def discover(connection_config)
|
20
20
|
connection_config = connection_config.with_indifferent_access
|
21
|
-
|
22
|
-
connection_config[:access_id] = auth_data.credentials.access_key_id
|
23
|
-
connection_config[:secret_access] = auth_data.credentials.secret_access_key
|
24
|
-
connection_config[:session_token] = auth_data.credentials.session_token
|
21
|
+
@session_name = "discover-#{connection_config[:region]}-#{connection_config[:bucket]}"
|
25
22
|
conn = create_connection(connection_config)
|
26
23
|
# If pulling from multiple files, all files must have the same schema
|
27
|
-
path = build_path(connection_config
|
28
|
-
|
29
|
-
records = get_results(conn, "DESCRIBE SELECT * FROM '#{full_path}';")
|
24
|
+
path = build_path(connection_config)
|
25
|
+
records = get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
|
30
26
|
columns = build_discover_columns(records)
|
31
|
-
streams = [Multiwoven::Integrations::Protocol::Stream.new(name:
|
27
|
+
streams = [Multiwoven::Integrations::Protocol::Stream.new(name: path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
|
32
28
|
catalog = Catalog.new(streams: streams)
|
33
29
|
catalog.to_multiwoven_message
|
34
30
|
rescue StandardError => e
|
@@ -37,10 +33,7 @@ module Multiwoven::Integrations::Source
|
|
37
33
|
|
38
34
|
def read(sync_config)
|
39
35
|
connection_config = sync_config.source.connection_specification.with_indifferent_access
|
40
|
-
|
41
|
-
connection_config[:access_id] = auth_data.credentials.access_key_id
|
42
|
-
connection_config[:secret_access] = auth_data.credentials.secret_access_key
|
43
|
-
connection_config[:session_token] = auth_data.credentials.session_token
|
36
|
+
@session_name = "#{sync_config.sync_id}-#{sync_config.source.name}-#{sync_config.destination.name}"
|
44
37
|
conn = create_connection(connection_config)
|
45
38
|
query = sync_config.model.query
|
46
39
|
query = batched_query(query, sync_config.limit, sync_config.offset) unless sync_config.limit.nil? && sync_config.offset.nil?
|
@@ -57,38 +50,49 @@ module Multiwoven::Integrations::Source
|
|
57
50
|
private
|
58
51
|
|
59
52
|
def get_auth_data(connection_config)
|
53
|
+
session = @session_name.gsub(/\s+/, "-")
|
54
|
+
@session_name = ""
|
60
55
|
if connection_config[:auth_type] == "user"
|
61
56
|
Aws::Credentials.new(connection_config[:access_id], connection_config[:secret_access])
|
62
57
|
elsif connection_config[:auth_type] == "role"
|
63
|
-
|
64
|
-
|
65
|
-
sts_client.assume_role({
|
66
|
-
|
67
|
-
|
68
|
-
|
58
|
+
credentials = Aws::Credentials.new(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
|
59
|
+
sts_client = Aws::STS::Client.new(region: connection_config[:region], credentials: credentials)
|
60
|
+
resp = sts_client.assume_role({
|
61
|
+
role_arn: connection_config[:arn],
|
62
|
+
role_session_name: session,
|
63
|
+
external_id: connection_config[:external_id]
|
64
|
+
})
|
65
|
+
Aws::Credentials.new(
|
66
|
+
resp.credentials.access_key_id,
|
67
|
+
resp.credentials.secret_access_key,
|
68
|
+
resp.credentials.session_token
|
69
|
+
)
|
69
70
|
end
|
70
71
|
end
|
71
72
|
|
72
|
-
# DuckDB
|
73
73
|
def create_connection(connection_config)
|
74
|
+
# In the case when previewing a query
|
75
|
+
@session_name = "preview-#{connection_config[:region]}-#{connection_config[:bucket]}" if @session_name.to_s.empty?
|
76
|
+
auth_data = get_auth_data(connection_config)
|
74
77
|
conn = DuckDB::Database.open.connect
|
75
78
|
# Set up S3 configuration
|
76
79
|
secret_query = "
|
77
80
|
CREATE SECRET amazons3_source (
|
78
81
|
TYPE S3,
|
79
|
-
KEY_ID '#{
|
80
|
-
SECRET '#{
|
82
|
+
KEY_ID '#{auth_data.credentials.access_key_id}',
|
83
|
+
SECRET '#{auth_data.credentials.secret_access_key}',
|
81
84
|
REGION '#{connection_config[:region]}',
|
82
|
-
SESSION_TOKEN '#{
|
85
|
+
SESSION_TOKEN '#{auth_data.credentials.session_token}'
|
83
86
|
);
|
84
87
|
"
|
85
88
|
get_results(conn, secret_query)
|
86
89
|
conn
|
87
90
|
end
|
88
91
|
|
89
|
-
def build_path(
|
90
|
-
path =
|
91
|
-
path
|
92
|
+
def build_path(connection_config)
|
93
|
+
path = connection_config[:path]
|
94
|
+
path = "#{path}/" if path.to_s.strip.empty? || path[-1] != "/"
|
95
|
+
"s3://#{connection_config[:bucket]}#{path}*.#{connection_config[:file_type]}"
|
92
96
|
end
|
93
97
|
|
94
98
|
def get_results(conn, query)
|
@@ -132,41 +136,6 @@ module Multiwoven::Integrations::Source
|
|
132
136
|
"boolean"
|
133
137
|
end
|
134
138
|
end
|
135
|
-
|
136
|
-
# AWS SDK
|
137
|
-
def config_aws(config, region)
|
138
|
-
Aws.config.update({
|
139
|
-
region: region,
|
140
|
-
credentials: config
|
141
|
-
})
|
142
|
-
Aws::S3::Client.new
|
143
|
-
end
|
144
|
-
|
145
|
-
def build_select_content_options(config, query)
|
146
|
-
config = config.with_indifferent_access
|
147
|
-
bucket_name = config[:bucket]
|
148
|
-
file_key = config[:file_key]
|
149
|
-
file_type = config[:file_type]
|
150
|
-
options = {
|
151
|
-
bucket: bucket_name,
|
152
|
-
key: file_key,
|
153
|
-
expression_type: "SQL",
|
154
|
-
expression: query,
|
155
|
-
output_serialization: {
|
156
|
-
json: {}
|
157
|
-
}
|
158
|
-
}
|
159
|
-
if file_type == "parquet"
|
160
|
-
options[:input_serialization] = {
|
161
|
-
parquet: {}
|
162
|
-
}
|
163
|
-
elsif file_type == "csv"
|
164
|
-
options[:input_serialization] = {
|
165
|
-
csv: { file_header_info: "USE" }
|
166
|
-
}
|
167
|
-
end
|
168
|
-
options
|
169
|
-
end
|
170
139
|
end
|
171
140
|
end
|
172
141
|
end
|
@@ -29,6 +29,7 @@
|
|
29
29
|
"region",
|
30
30
|
"bucket",
|
31
31
|
"arn",
|
32
|
+
"external_id",
|
32
33
|
"file_type"
|
33
34
|
]
|
34
35
|
},
|
@@ -53,16 +54,22 @@
|
|
53
54
|
"title": "IAM Role ARN",
|
54
55
|
"order": 1
|
55
56
|
},
|
57
|
+
"external_id": {
|
58
|
+
"type": "string",
|
59
|
+
"title": "External Id",
|
60
|
+
"description": "Unique ID that allows handshake between AWS accounts.",
|
61
|
+
"order": 2
|
62
|
+
},
|
56
63
|
"access_id": {
|
57
64
|
"type": "string",
|
58
65
|
"title": "Access Id",
|
59
|
-
"order":
|
66
|
+
"order": 3
|
60
67
|
},
|
61
68
|
"secret_access": {
|
62
69
|
"type": "string",
|
63
70
|
"title": "Secret Access",
|
64
71
|
"multiwoven_secret": true,
|
65
|
-
"order":
|
72
|
+
"order": 4
|
66
73
|
},
|
67
74
|
"region": {
|
68
75
|
"description": "AWS region",
|
@@ -71,13 +78,13 @@
|
|
71
78
|
],
|
72
79
|
"type": "string",
|
73
80
|
"title": "Region",
|
74
|
-
"order":
|
81
|
+
"order": 5
|
75
82
|
},
|
76
83
|
"bucket": {
|
77
84
|
"description": "Bucket Name",
|
78
85
|
"type": "string",
|
79
86
|
"title": "Bucket",
|
80
|
-
"order":
|
87
|
+
"order": 6
|
81
88
|
},
|
82
89
|
"path": {
|
83
90
|
"description": "Path to csv or parquet files",
|
@@ -86,7 +93,7 @@
|
|
86
93
|
],
|
87
94
|
"type": "string",
|
88
95
|
"title": "Path",
|
89
|
-
"order":
|
96
|
+
"order": 7
|
90
97
|
},
|
91
98
|
"file_type": {
|
92
99
|
"description": "The type of file to read",
|
@@ -96,7 +103,7 @@
|
|
96
103
|
"csv",
|
97
104
|
"parquet"
|
98
105
|
],
|
99
|
-
"order":
|
106
|
+
"order": 8
|
100
107
|
}
|
101
108
|
}
|
102
109
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: multiwoven-integrations
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Subin T P
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-07-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|