multiwoven-integrations 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +17 -0
- data/lib/multiwoven/integrations/core/constants.rb +3 -0
- data/lib/multiwoven/integrations/rollout.rb +1 -1
- data/lib/multiwoven/integrations/source/amazon_s3/client.rb +34 -65
- data/lib/multiwoven/integrations/source/amazon_s3/config/spec.json +13 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cb9f117bf574599b93ccfd2238f1629166c473184837ec48a16a447650b1dc54
|
4
|
+
data.tar.gz: efa39262c596d2b50bbd04d2d8a9d98889b708a9e2859e6ec3801b2c6eb79169
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 959b53319cb4581fd8aeb2731891f967031a23096c42e1e855fa79802bf81e799a746ba62c1c08f20484c3a443418d2175e1d9218abd28939eceaf89b6f87964
|
7
|
+
data.tar.gz: 22c706c8d47d9803cbdee5fa91a8f6862f7d0efb11c13466e2b0b0e07753a5e784e43443487a939a05f0cee0457bf310d12ce274e24ad322e5aa2cf8767c05ea
|
data/README.md
CHANGED
@@ -43,6 +43,23 @@ Multiwoven integrations is the collection of connectors built on top of [Multiwo
|
|
43
43
|
Multiwoven protocol is an open source standard for moving data between data sources to any third-part destinations.
|
44
44
|
Anyone can build a connetor with basic ruby knowledge using the protocol.
|
45
45
|
|
46
|
+
## Prerequisites
|
47
|
+
|
48
|
+
Before you begin the installation, ensure you have the following dependencies installed:
|
49
|
+
|
50
|
+
- **MySQL Client**
|
51
|
+
- Command: `brew install mysql-client`
|
52
|
+
- Description: Required for database interactions.
|
53
|
+
|
54
|
+
- **Zstandard (zstd)**
|
55
|
+
- Command: `brew install zstd`
|
56
|
+
- Description: Needed for data compression and decompression.
|
57
|
+
|
58
|
+
- **OpenSSL 3**
|
59
|
+
- Command: `brew install openssl@3`
|
60
|
+
- Description: Essential for secure communication.
|
61
|
+
|
62
|
+
|
46
63
|
### Installation
|
47
64
|
|
48
65
|
Install the gem and add to the application's Gemfile by executing:
|
@@ -34,6 +34,9 @@ module Multiwoven
|
|
34
34
|
AIRTABLE_BASES_ENDPOINT = "https://api.airtable.com/v0/meta/bases"
|
35
35
|
AIRTABLE_GET_BASE_SCHEMA_ENDPOINT = "https://api.airtable.com/v0/meta/bases/{baseId}/tables"
|
36
36
|
|
37
|
+
AWS_ACCESS_KEY_ID = ENV["AWS_ACCESS_KEY_ID"]
|
38
|
+
AWS_SECRET_ACCESS_KEY = ENV["AWS_SECRET_ACCESS_KEY"]
|
39
|
+
|
37
40
|
# HTTP
|
38
41
|
HTTP_GET = "GET"
|
39
42
|
HTTP_POST = "POST"
|
@@ -4,13 +4,13 @@ module Multiwoven::Integrations::Source
|
|
4
4
|
module AmazonS3
|
5
5
|
include Multiwoven::Integrations::Core
|
6
6
|
class Client < SourceConnector
|
7
|
-
|
8
|
-
|
7
|
+
@session_name = ""
|
9
8
|
def check_connection(connection_config)
|
10
9
|
connection_config = connection_config.with_indifferent_access
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
@session_name = "connection-#{connection_config[:region]}-#{connection_config[:bucket]}"
|
11
|
+
conn = create_connection(connection_config)
|
12
|
+
path = build_path(connection_config)
|
13
|
+
get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
|
14
14
|
ConnectionStatus.new(status: ConnectionStatusType["succeeded"]).to_multiwoven_message
|
15
15
|
rescue StandardError => e
|
16
16
|
ConnectionStatus.new(status: ConnectionStatusType["failed"], message: e.message).to_multiwoven_message
|
@@ -18,17 +18,13 @@ module Multiwoven::Integrations::Source
|
|
18
18
|
|
19
19
|
def discover(connection_config)
|
20
20
|
connection_config = connection_config.with_indifferent_access
|
21
|
-
|
22
|
-
connection_config[:access_id] = auth_data.credentials.access_key_id
|
23
|
-
connection_config[:secret_access] = auth_data.credentials.secret_access_key
|
24
|
-
connection_config[:session_token] = auth_data.credentials.session_token
|
21
|
+
@session_name = "discover-#{connection_config[:region]}-#{connection_config[:bucket]}"
|
25
22
|
conn = create_connection(connection_config)
|
26
23
|
# If pulling from multiple files, all files must have the same schema
|
27
|
-
path = build_path(connection_config
|
28
|
-
|
29
|
-
records = get_results(conn, "DESCRIBE SELECT * FROM '#{full_path}';")
|
24
|
+
path = build_path(connection_config)
|
25
|
+
records = get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
|
30
26
|
columns = build_discover_columns(records)
|
31
|
-
streams = [Multiwoven::Integrations::Protocol::Stream.new(name:
|
27
|
+
streams = [Multiwoven::Integrations::Protocol::Stream.new(name: path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
|
32
28
|
catalog = Catalog.new(streams: streams)
|
33
29
|
catalog.to_multiwoven_message
|
34
30
|
rescue StandardError => e
|
@@ -37,10 +33,7 @@ module Multiwoven::Integrations::Source
|
|
37
33
|
|
38
34
|
def read(sync_config)
|
39
35
|
connection_config = sync_config.source.connection_specification.with_indifferent_access
|
40
|
-
|
41
|
-
connection_config[:access_id] = auth_data.credentials.access_key_id
|
42
|
-
connection_config[:secret_access] = auth_data.credentials.secret_access_key
|
43
|
-
connection_config[:session_token] = auth_data.credentials.session_token
|
36
|
+
@session_name = "#{sync_config.sync_id}-#{sync_config.source.name}-#{sync_config.destination.name}"
|
44
37
|
conn = create_connection(connection_config)
|
45
38
|
query = sync_config.model.query
|
46
39
|
query = batched_query(query, sync_config.limit, sync_config.offset) unless sync_config.limit.nil? && sync_config.offset.nil?
|
@@ -57,38 +50,49 @@ module Multiwoven::Integrations::Source
|
|
57
50
|
private
|
58
51
|
|
59
52
|
def get_auth_data(connection_config)
|
53
|
+
session = @session_name.gsub(/\s+/, "-")
|
54
|
+
@session_name = ""
|
60
55
|
if connection_config[:auth_type] == "user"
|
61
56
|
Aws::Credentials.new(connection_config[:access_id], connection_config[:secret_access])
|
62
57
|
elsif connection_config[:auth_type] == "role"
|
63
|
-
|
64
|
-
|
65
|
-
sts_client.assume_role({
|
66
|
-
|
67
|
-
|
68
|
-
|
58
|
+
credentials = Aws::Credentials.new(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
|
59
|
+
sts_client = Aws::STS::Client.new(region: connection_config[:region], credentials: credentials)
|
60
|
+
resp = sts_client.assume_role({
|
61
|
+
role_arn: connection_config[:arn],
|
62
|
+
role_session_name: session,
|
63
|
+
external_id: connection_config[:external_id]
|
64
|
+
})
|
65
|
+
Aws::Credentials.new(
|
66
|
+
resp.credentials.access_key_id,
|
67
|
+
resp.credentials.secret_access_key,
|
68
|
+
resp.credentials.session_token
|
69
|
+
)
|
69
70
|
end
|
70
71
|
end
|
71
72
|
|
72
|
-
# DuckDB
|
73
73
|
def create_connection(connection_config)
|
74
|
+
# In the case when previewing a query
|
75
|
+
@session_name = "preview-#{connection_config[:region]}-#{connection_config[:bucket]}" if @session_name.to_s.empty?
|
76
|
+
auth_data = get_auth_data(connection_config)
|
74
77
|
conn = DuckDB::Database.open.connect
|
75
78
|
# Set up S3 configuration
|
76
79
|
secret_query = "
|
77
80
|
CREATE SECRET amazons3_source (
|
78
81
|
TYPE S3,
|
79
|
-
KEY_ID '#{
|
80
|
-
SECRET '#{
|
82
|
+
KEY_ID '#{auth_data.credentials.access_key_id}',
|
83
|
+
SECRET '#{auth_data.credentials.secret_access_key}',
|
81
84
|
REGION '#{connection_config[:region]}',
|
82
|
-
SESSION_TOKEN '#{
|
85
|
+
SESSION_TOKEN '#{auth_data.credentials.session_token}'
|
83
86
|
);
|
84
87
|
"
|
85
88
|
get_results(conn, secret_query)
|
86
89
|
conn
|
87
90
|
end
|
88
91
|
|
89
|
-
def build_path(
|
90
|
-
path =
|
91
|
-
path
|
92
|
+
def build_path(connection_config)
|
93
|
+
path = connection_config[:path]
|
94
|
+
path = "#{path}/" if path.to_s.strip.empty? || path[-1] != "/"
|
95
|
+
"s3://#{connection_config[:bucket]}#{path}*.#{connection_config[:file_type]}"
|
92
96
|
end
|
93
97
|
|
94
98
|
def get_results(conn, query)
|
@@ -132,41 +136,6 @@ module Multiwoven::Integrations::Source
|
|
132
136
|
"boolean"
|
133
137
|
end
|
134
138
|
end
|
135
|
-
|
136
|
-
# AWS SDK
|
137
|
-
def config_aws(config, region)
|
138
|
-
Aws.config.update({
|
139
|
-
region: region,
|
140
|
-
credentials: config
|
141
|
-
})
|
142
|
-
Aws::S3::Client.new
|
143
|
-
end
|
144
|
-
|
145
|
-
def build_select_content_options(config, query)
|
146
|
-
config = config.with_indifferent_access
|
147
|
-
bucket_name = config[:bucket]
|
148
|
-
file_key = config[:file_key]
|
149
|
-
file_type = config[:file_type]
|
150
|
-
options = {
|
151
|
-
bucket: bucket_name,
|
152
|
-
key: file_key,
|
153
|
-
expression_type: "SQL",
|
154
|
-
expression: query,
|
155
|
-
output_serialization: {
|
156
|
-
json: {}
|
157
|
-
}
|
158
|
-
}
|
159
|
-
if file_type == "parquet"
|
160
|
-
options[:input_serialization] = {
|
161
|
-
parquet: {}
|
162
|
-
}
|
163
|
-
elsif file_type == "csv"
|
164
|
-
options[:input_serialization] = {
|
165
|
-
csv: { file_header_info: "USE" }
|
166
|
-
}
|
167
|
-
end
|
168
|
-
options
|
169
|
-
end
|
170
139
|
end
|
171
140
|
end
|
172
141
|
end
|
@@ -29,6 +29,7 @@
|
|
29
29
|
"region",
|
30
30
|
"bucket",
|
31
31
|
"arn",
|
32
|
+
"external_id",
|
32
33
|
"file_type"
|
33
34
|
]
|
34
35
|
},
|
@@ -53,16 +54,22 @@
|
|
53
54
|
"title": "IAM Role ARN",
|
54
55
|
"order": 1
|
55
56
|
},
|
57
|
+
"external_id": {
|
58
|
+
"type": "string",
|
59
|
+
"title": "External Id",
|
60
|
+
"description": "Unique ID that allows handshake between AWS accounts.",
|
61
|
+
"order": 2
|
62
|
+
},
|
56
63
|
"access_id": {
|
57
64
|
"type": "string",
|
58
65
|
"title": "Access Id",
|
59
|
-
"order":
|
66
|
+
"order": 3
|
60
67
|
},
|
61
68
|
"secret_access": {
|
62
69
|
"type": "string",
|
63
70
|
"title": "Secret Access",
|
64
71
|
"multiwoven_secret": true,
|
65
|
-
"order":
|
72
|
+
"order": 4
|
66
73
|
},
|
67
74
|
"region": {
|
68
75
|
"description": "AWS region",
|
@@ -71,13 +78,13 @@
|
|
71
78
|
],
|
72
79
|
"type": "string",
|
73
80
|
"title": "Region",
|
74
|
-
"order":
|
81
|
+
"order": 5
|
75
82
|
},
|
76
83
|
"bucket": {
|
77
84
|
"description": "Bucket Name",
|
78
85
|
"type": "string",
|
79
86
|
"title": "Bucket",
|
80
|
-
"order":
|
87
|
+
"order": 6
|
81
88
|
},
|
82
89
|
"path": {
|
83
90
|
"description": "Path to csv or parquet files",
|
@@ -86,7 +93,7 @@
|
|
86
93
|
],
|
87
94
|
"type": "string",
|
88
95
|
"title": "Path",
|
89
|
-
"order":
|
96
|
+
"order": 7
|
90
97
|
},
|
91
98
|
"file_type": {
|
92
99
|
"description": "The type of file to read",
|
@@ -96,7 +103,7 @@
|
|
96
103
|
"csv",
|
97
104
|
"parquet"
|
98
105
|
],
|
99
|
-
"order":
|
106
|
+
"order": 8
|
100
107
|
}
|
101
108
|
}
|
102
109
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: multiwoven-integrations
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Subin T P
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-07-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|