multiwoven-integrations 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +17 -0
- data/lib/multiwoven/integrations/rollout.rb +1 -1
- data/lib/multiwoven/integrations/source/amazon_s3/client.rb +31 -64
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e8263bcb86ca81c8262642cd939c8079e671e7cb03a29a905c24f564888038c5
|
4
|
+
data.tar.gz: cf5ba4ee0ad1f25967592e72369b3e0452d09715a036b70685d48ee4f29568de
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a0c976f1fd891d8474dbc113bce838c0a566e0127e49829c61c37042b479b3303185193beb66a204a752a87f8a7ec82b512d51fba5437e5239e1fc8a889ddcb9
|
7
|
+
data.tar.gz: 0ecd0c38c46db77c5e6c9ea6d8ef94cfd3325f7cb9e5f1a2d86ae6d22c307305595cac210d5d6710843806d8c8891c1b04bc7cf663efcb7e5b42cd3bbe45e39f
|
data/README.md
CHANGED
@@ -43,6 +43,23 @@ Multiwoven integrations is the collection of connectors built on top of [Multiwo
|
|
43
43
|
Multiwoven protocol is an open source standard for moving data between data sources to any third-part destinations.
|
44
44
|
Anyone can build a connetor with basic ruby knowledge using the protocol.
|
45
45
|
|
46
|
+
## Prerequisites
|
47
|
+
|
48
|
+
Before you begin the installation, ensure you have the following dependencies installed:
|
49
|
+
|
50
|
+
- **MySQL Client**
|
51
|
+
- Command: `brew install mysql-client`
|
52
|
+
- Description: Required for database interactions.
|
53
|
+
|
54
|
+
- **Zstandard (zstd)**
|
55
|
+
- Command: `brew install zstd`
|
56
|
+
- Description: Needed for data compression and decompression.
|
57
|
+
|
58
|
+
- **OpenSSL 3**
|
59
|
+
- Command: `brew install openssl@3`
|
60
|
+
- Description: Essential for secure communication.
|
61
|
+
|
62
|
+
|
46
63
|
### Installation
|
47
64
|
|
48
65
|
Install the gem and add to the application's Gemfile by executing:
|
@@ -4,13 +4,13 @@ module Multiwoven::Integrations::Source
|
|
4
4
|
module AmazonS3
|
5
5
|
include Multiwoven::Integrations::Core
|
6
6
|
class Client < SourceConnector
|
7
|
-
|
8
|
-
|
7
|
+
@session_name = ""
|
9
8
|
def check_connection(connection_config)
|
10
9
|
connection_config = connection_config.with_indifferent_access
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
@session_name = "connection-#{connection_config[:region]}-#{connection_config[:bucket]}"
|
11
|
+
conn = create_connection(connection_config)
|
12
|
+
path = build_path(connection_config)
|
13
|
+
get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
|
14
14
|
ConnectionStatus.new(status: ConnectionStatusType["succeeded"]).to_multiwoven_message
|
15
15
|
rescue StandardError => e
|
16
16
|
ConnectionStatus.new(status: ConnectionStatusType["failed"], message: e.message).to_multiwoven_message
|
@@ -18,17 +18,13 @@ module Multiwoven::Integrations::Source
|
|
18
18
|
|
19
19
|
def discover(connection_config)
|
20
20
|
connection_config = connection_config.with_indifferent_access
|
21
|
-
|
22
|
-
connection_config[:access_id] = auth_data.credentials.access_key_id
|
23
|
-
connection_config[:secret_access] = auth_data.credentials.secret_access_key
|
24
|
-
connection_config[:session_token] = auth_data.credentials.session_token
|
21
|
+
@session_name = "discover-#{connection_config[:region]}-#{connection_config[:bucket]}"
|
25
22
|
conn = create_connection(connection_config)
|
26
23
|
# If pulling from multiple files, all files must have the same schema
|
27
|
-
path = build_path(connection_config
|
28
|
-
|
29
|
-
records = get_results(conn, "DESCRIBE SELECT * FROM '#{full_path}';")
|
24
|
+
path = build_path(connection_config)
|
25
|
+
records = get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
|
30
26
|
columns = build_discover_columns(records)
|
31
|
-
streams = [Multiwoven::Integrations::Protocol::Stream.new(name:
|
27
|
+
streams = [Multiwoven::Integrations::Protocol::Stream.new(name: path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
|
32
28
|
catalog = Catalog.new(streams: streams)
|
33
29
|
catalog.to_multiwoven_message
|
34
30
|
rescue StandardError => e
|
@@ -37,10 +33,7 @@ module Multiwoven::Integrations::Source
|
|
37
33
|
|
38
34
|
def read(sync_config)
|
39
35
|
connection_config = sync_config.source.connection_specification.with_indifferent_access
|
40
|
-
|
41
|
-
connection_config[:access_id] = auth_data.credentials.access_key_id
|
42
|
-
connection_config[:secret_access] = auth_data.credentials.secret_access_key
|
43
|
-
connection_config[:session_token] = auth_data.credentials.session_token
|
36
|
+
@session_name = "#{sync_config.sync_id}-#{sync_config.source.name}-#{sync_config.destination.name}"
|
44
37
|
conn = create_connection(connection_config)
|
45
38
|
query = sync_config.model.query
|
46
39
|
query = batched_query(query, sync_config.limit, sync_config.offset) unless sync_config.limit.nil? && sync_config.offset.nil?
|
@@ -57,38 +50,47 @@ module Multiwoven::Integrations::Source
|
|
57
50
|
private
|
58
51
|
|
59
52
|
def get_auth_data(connection_config)
|
53
|
+
session = @session_name
|
54
|
+
@session_name = ""
|
60
55
|
if connection_config[:auth_type] == "user"
|
61
56
|
Aws::Credentials.new(connection_config[:access_id], connection_config[:secret_access])
|
62
57
|
elsif connection_config[:auth_type] == "role"
|
63
58
|
sts_client = Aws::STS::Client.new(region: connection_config[:region])
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
59
|
+
resp = sts_client.assume_role({
|
60
|
+
role_arn: connection_config[:arn],
|
61
|
+
role_session_name: session
|
62
|
+
})
|
63
|
+
Aws::Credentials.new(
|
64
|
+
resp.credentials.access_key_id,
|
65
|
+
resp.credentials.secret_access_key,
|
66
|
+
resp.credentials.session_token
|
67
|
+
)
|
69
68
|
end
|
70
69
|
end
|
71
70
|
|
72
|
-
# DuckDB
|
73
71
|
def create_connection(connection_config)
|
72
|
+
# In the case when previewing a query
|
73
|
+
@session_name = "preview-#{connection_config[:region]}-#{connection_config[:bucket]}" if @session_name.to_s.empty?
|
74
|
+
auth_data = get_auth_data(connection_config)
|
74
75
|
conn = DuckDB::Database.open.connect
|
75
76
|
# Set up S3 configuration
|
76
77
|
secret_query = "
|
77
78
|
CREATE SECRET amazons3_source (
|
78
79
|
TYPE S3,
|
79
|
-
KEY_ID '#{
|
80
|
-
SECRET '#{
|
80
|
+
KEY_ID '#{auth_data.credentials.access_key_id}',
|
81
|
+
SECRET '#{auth_data.credentials.secret_access_key}',
|
81
82
|
REGION '#{connection_config[:region]}',
|
82
|
-
SESSION_TOKEN '#{
|
83
|
+
SESSION_TOKEN '#{auth_data.credentials.session_token}'
|
83
84
|
);
|
84
85
|
"
|
85
86
|
get_results(conn, secret_query)
|
86
87
|
conn
|
87
88
|
end
|
88
89
|
|
89
|
-
def build_path(
|
90
|
-
path =
|
91
|
-
path
|
90
|
+
def build_path(connection_config)
|
91
|
+
path = connection_config[:path]
|
92
|
+
path = "#{path}/" if path.to_s.strip.empty? || path[-1] != "/"
|
93
|
+
"s3://#{connection_config[:bucket]}#{path}*.#{connection_config[:file_type]}"
|
92
94
|
end
|
93
95
|
|
94
96
|
def get_results(conn, query)
|
@@ -132,41 +134,6 @@ module Multiwoven::Integrations::Source
|
|
132
134
|
"boolean"
|
133
135
|
end
|
134
136
|
end
|
135
|
-
|
136
|
-
# AWS SDK
|
137
|
-
def config_aws(config, region)
|
138
|
-
Aws.config.update({
|
139
|
-
region: region,
|
140
|
-
credentials: config
|
141
|
-
})
|
142
|
-
Aws::S3::Client.new
|
143
|
-
end
|
144
|
-
|
145
|
-
def build_select_content_options(config, query)
|
146
|
-
config = config.with_indifferent_access
|
147
|
-
bucket_name = config[:bucket]
|
148
|
-
file_key = config[:file_key]
|
149
|
-
file_type = config[:file_type]
|
150
|
-
options = {
|
151
|
-
bucket: bucket_name,
|
152
|
-
key: file_key,
|
153
|
-
expression_type: "SQL",
|
154
|
-
expression: query,
|
155
|
-
output_serialization: {
|
156
|
-
json: {}
|
157
|
-
}
|
158
|
-
}
|
159
|
-
if file_type == "parquet"
|
160
|
-
options[:input_serialization] = {
|
161
|
-
parquet: {}
|
162
|
-
}
|
163
|
-
elsif file_type == "csv"
|
164
|
-
options[:input_serialization] = {
|
165
|
-
csv: { file_header_info: "USE" }
|
166
|
-
}
|
167
|
-
end
|
168
|
-
options
|
169
|
-
end
|
170
137
|
end
|
171
138
|
end
|
172
139
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: multiwoven-integrations
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Subin T P
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-06-
|
11
|
+
date: 2024-06-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|