multiwoven-integrations 0.3.2 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +17 -0
- data/lib/multiwoven/integrations/rollout.rb +1 -1
- data/lib/multiwoven/integrations/source/amazon_s3/client.rb +31 -64
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e8263bcb86ca81c8262642cd939c8079e671e7cb03a29a905c24f564888038c5
|
4
|
+
data.tar.gz: cf5ba4ee0ad1f25967592e72369b3e0452d09715a036b70685d48ee4f29568de
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a0c976f1fd891d8474dbc113bce838c0a566e0127e49829c61c37042b479b3303185193beb66a204a752a87f8a7ec82b512d51fba5437e5239e1fc8a889ddcb9
|
7
|
+
data.tar.gz: 0ecd0c38c46db77c5e6c9ea6d8ef94cfd3325f7cb9e5f1a2d86ae6d22c307305595cac210d5d6710843806d8c8891c1b04bc7cf663efcb7e5b42cd3bbe45e39f
|
data/README.md
CHANGED
@@ -43,6 +43,23 @@ Multiwoven integrations is the collection of connectors built on top of [Multiwo
|
|
43
43
|
Multiwoven protocol is an open source standard for moving data between data sources to any third-part destinations.
|
44
44
|
Anyone can build a connetor with basic ruby knowledge using the protocol.
|
45
45
|
|
46
|
+
## Prerequisites
|
47
|
+
|
48
|
+
Before you begin the installation, ensure you have the following dependencies installed:
|
49
|
+
|
50
|
+
- **MySQL Client**
|
51
|
+
- Command: `brew install mysql-client`
|
52
|
+
- Description: Required for database interactions.
|
53
|
+
|
54
|
+
- **Zstandard (zstd)**
|
55
|
+
- Command: `brew install zstd`
|
56
|
+
- Description: Needed for data compression and decompression.
|
57
|
+
|
58
|
+
- **OpenSSL 3**
|
59
|
+
- Command: `brew install openssl@3`
|
60
|
+
- Description: Essential for secure communication.
|
61
|
+
|
62
|
+
|
46
63
|
### Installation
|
47
64
|
|
48
65
|
Install the gem and add to the application's Gemfile by executing:
|
@@ -4,13 +4,13 @@ module Multiwoven::Integrations::Source
|
|
4
4
|
module AmazonS3
|
5
5
|
include Multiwoven::Integrations::Core
|
6
6
|
class Client < SourceConnector
|
7
|
-
|
8
|
-
|
7
|
+
@session_name = ""
|
9
8
|
def check_connection(connection_config)
|
10
9
|
connection_config = connection_config.with_indifferent_access
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
@session_name = "connection-#{connection_config[:region]}-#{connection_config[:bucket]}"
|
11
|
+
conn = create_connection(connection_config)
|
12
|
+
path = build_path(connection_config)
|
13
|
+
get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
|
14
14
|
ConnectionStatus.new(status: ConnectionStatusType["succeeded"]).to_multiwoven_message
|
15
15
|
rescue StandardError => e
|
16
16
|
ConnectionStatus.new(status: ConnectionStatusType["failed"], message: e.message).to_multiwoven_message
|
@@ -18,17 +18,13 @@ module Multiwoven::Integrations::Source
|
|
18
18
|
|
19
19
|
def discover(connection_config)
|
20
20
|
connection_config = connection_config.with_indifferent_access
|
21
|
-
|
22
|
-
connection_config[:access_id] = auth_data.credentials.access_key_id
|
23
|
-
connection_config[:secret_access] = auth_data.credentials.secret_access_key
|
24
|
-
connection_config[:session_token] = auth_data.credentials.session_token
|
21
|
+
@session_name = "discover-#{connection_config[:region]}-#{connection_config[:bucket]}"
|
25
22
|
conn = create_connection(connection_config)
|
26
23
|
# If pulling from multiple files, all files must have the same schema
|
27
|
-
path = build_path(connection_config
|
28
|
-
|
29
|
-
records = get_results(conn, "DESCRIBE SELECT * FROM '#{full_path}';")
|
24
|
+
path = build_path(connection_config)
|
25
|
+
records = get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
|
30
26
|
columns = build_discover_columns(records)
|
31
|
-
streams = [Multiwoven::Integrations::Protocol::Stream.new(name:
|
27
|
+
streams = [Multiwoven::Integrations::Protocol::Stream.new(name: path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
|
32
28
|
catalog = Catalog.new(streams: streams)
|
33
29
|
catalog.to_multiwoven_message
|
34
30
|
rescue StandardError => e
|
@@ -37,10 +33,7 @@ module Multiwoven::Integrations::Source
|
|
37
33
|
|
38
34
|
def read(sync_config)
|
39
35
|
connection_config = sync_config.source.connection_specification.with_indifferent_access
|
40
|
-
|
41
|
-
connection_config[:access_id] = auth_data.credentials.access_key_id
|
42
|
-
connection_config[:secret_access] = auth_data.credentials.secret_access_key
|
43
|
-
connection_config[:session_token] = auth_data.credentials.session_token
|
36
|
+
@session_name = "#{sync_config.sync_id}-#{sync_config.source.name}-#{sync_config.destination.name}"
|
44
37
|
conn = create_connection(connection_config)
|
45
38
|
query = sync_config.model.query
|
46
39
|
query = batched_query(query, sync_config.limit, sync_config.offset) unless sync_config.limit.nil? && sync_config.offset.nil?
|
@@ -57,38 +50,47 @@ module Multiwoven::Integrations::Source
|
|
57
50
|
private
|
58
51
|
|
59
52
|
def get_auth_data(connection_config)
|
53
|
+
session = @session_name
|
54
|
+
@session_name = ""
|
60
55
|
if connection_config[:auth_type] == "user"
|
61
56
|
Aws::Credentials.new(connection_config[:access_id], connection_config[:secret_access])
|
62
57
|
elsif connection_config[:auth_type] == "role"
|
63
58
|
sts_client = Aws::STS::Client.new(region: connection_config[:region])
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
59
|
+
resp = sts_client.assume_role({
|
60
|
+
role_arn: connection_config[:arn],
|
61
|
+
role_session_name: session
|
62
|
+
})
|
63
|
+
Aws::Credentials.new(
|
64
|
+
resp.credentials.access_key_id,
|
65
|
+
resp.credentials.secret_access_key,
|
66
|
+
resp.credentials.session_token
|
67
|
+
)
|
69
68
|
end
|
70
69
|
end
|
71
70
|
|
72
|
-
# DuckDB
|
73
71
|
def create_connection(connection_config)
|
72
|
+
# In the case when previewing a query
|
73
|
+
@session_name = "preview-#{connection_config[:region]}-#{connection_config[:bucket]}" if @session_name.to_s.empty?
|
74
|
+
auth_data = get_auth_data(connection_config)
|
74
75
|
conn = DuckDB::Database.open.connect
|
75
76
|
# Set up S3 configuration
|
76
77
|
secret_query = "
|
77
78
|
CREATE SECRET amazons3_source (
|
78
79
|
TYPE S3,
|
79
|
-
KEY_ID '#{
|
80
|
-
SECRET '#{
|
80
|
+
KEY_ID '#{auth_data.credentials.access_key_id}',
|
81
|
+
SECRET '#{auth_data.credentials.secret_access_key}',
|
81
82
|
REGION '#{connection_config[:region]}',
|
82
|
-
SESSION_TOKEN '#{
|
83
|
+
SESSION_TOKEN '#{auth_data.credentials.session_token}'
|
83
84
|
);
|
84
85
|
"
|
85
86
|
get_results(conn, secret_query)
|
86
87
|
conn
|
87
88
|
end
|
88
89
|
|
89
|
-
def build_path(
|
90
|
-
path =
|
91
|
-
path
|
90
|
+
def build_path(connection_config)
|
91
|
+
path = connection_config[:path]
|
92
|
+
path = "#{path}/" if path.to_s.strip.empty? || path[-1] != "/"
|
93
|
+
"s3://#{connection_config[:bucket]}#{path}*.#{connection_config[:file_type]}"
|
92
94
|
end
|
93
95
|
|
94
96
|
def get_results(conn, query)
|
@@ -132,41 +134,6 @@ module Multiwoven::Integrations::Source
|
|
132
134
|
"boolean"
|
133
135
|
end
|
134
136
|
end
|
135
|
-
|
136
|
-
# AWS SDK
|
137
|
-
def config_aws(config, region)
|
138
|
-
Aws.config.update({
|
139
|
-
region: region,
|
140
|
-
credentials: config
|
141
|
-
})
|
142
|
-
Aws::S3::Client.new
|
143
|
-
end
|
144
|
-
|
145
|
-
def build_select_content_options(config, query)
|
146
|
-
config = config.with_indifferent_access
|
147
|
-
bucket_name = config[:bucket]
|
148
|
-
file_key = config[:file_key]
|
149
|
-
file_type = config[:file_type]
|
150
|
-
options = {
|
151
|
-
bucket: bucket_name,
|
152
|
-
key: file_key,
|
153
|
-
expression_type: "SQL",
|
154
|
-
expression: query,
|
155
|
-
output_serialization: {
|
156
|
-
json: {}
|
157
|
-
}
|
158
|
-
}
|
159
|
-
if file_type == "parquet"
|
160
|
-
options[:input_serialization] = {
|
161
|
-
parquet: {}
|
162
|
-
}
|
163
|
-
elsif file_type == "csv"
|
164
|
-
options[:input_serialization] = {
|
165
|
-
csv: { file_header_info: "USE" }
|
166
|
-
}
|
167
|
-
end
|
168
|
-
options
|
169
|
-
end
|
170
137
|
end
|
171
138
|
end
|
172
139
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: multiwoven-integrations
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Subin T P
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-06-
|
11
|
+
date: 2024-06-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|