multiwoven-integrations 0.3.1 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +17 -0
- data/lib/multiwoven/integrations/rollout.rb +1 -1
- data/lib/multiwoven/integrations/source/amazon_s3/client.rb +40 -52
- data/lib/multiwoven/integrations/source/amazon_s3/config/spec.json +63 -11
- data/lib/multiwoven/integrations.rb +1 -0
- data/multiwoven-integrations.gemspec +1 -0
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e8263bcb86ca81c8262642cd939c8079e671e7cb03a29a905c24f564888038c5
|
4
|
+
data.tar.gz: cf5ba4ee0ad1f25967592e72369b3e0452d09715a036b70685d48ee4f29568de
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a0c976f1fd891d8474dbc113bce838c0a566e0127e49829c61c37042b479b3303185193beb66a204a752a87f8a7ec82b512d51fba5437e5239e1fc8a889ddcb9
|
7
|
+
data.tar.gz: 0ecd0c38c46db77c5e6c9ea6d8ef94cfd3325f7cb9e5f1a2d86ae6d22c307305595cac210d5d6710843806d8c8891c1b04bc7cf663efcb7e5b42cd3bbe45e39f
|
data/README.md
CHANGED
@@ -43,6 +43,23 @@ Multiwoven integrations is the collection of connectors built on top of [Multiwo
|
|
43
43
|
Multiwoven protocol is an open source standard for moving data between data sources to any third-part destinations.
|
44
44
|
Anyone can build a connetor with basic ruby knowledge using the protocol.
|
45
45
|
|
46
|
+
## Prerequisites
|
47
|
+
|
48
|
+
Before you begin the installation, ensure you have the following dependencies installed:
|
49
|
+
|
50
|
+
- **MySQL Client**
|
51
|
+
- Command: `brew install mysql-client`
|
52
|
+
- Description: Required for database interactions.
|
53
|
+
|
54
|
+
- **Zstandard (zstd)**
|
55
|
+
- Command: `brew install zstd`
|
56
|
+
- Description: Needed for data compression and decompression.
|
57
|
+
|
58
|
+
- **OpenSSL 3**
|
59
|
+
- Command: `brew install openssl@3`
|
60
|
+
- Description: Essential for secure communication.
|
61
|
+
|
62
|
+
|
46
63
|
### Installation
|
47
64
|
|
48
65
|
Install the gem and add to the application's Gemfile by executing:
|
@@ -4,12 +4,13 @@ module Multiwoven::Integrations::Source
|
|
4
4
|
module AmazonS3
|
5
5
|
include Multiwoven::Integrations::Core
|
6
6
|
class Client < SourceConnector
|
7
|
-
|
8
|
-
|
7
|
+
@session_name = ""
|
9
8
|
def check_connection(connection_config)
|
10
9
|
connection_config = connection_config.with_indifferent_access
|
11
|
-
|
12
|
-
|
10
|
+
@session_name = "connection-#{connection_config[:region]}-#{connection_config[:bucket]}"
|
11
|
+
conn = create_connection(connection_config)
|
12
|
+
path = build_path(connection_config)
|
13
|
+
get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
|
13
14
|
ConnectionStatus.new(status: ConnectionStatusType["succeeded"]).to_multiwoven_message
|
14
15
|
rescue StandardError => e
|
15
16
|
ConnectionStatus.new(status: ConnectionStatusType["failed"], message: e.message).to_multiwoven_message
|
@@ -17,13 +18,13 @@ module Multiwoven::Integrations::Source
|
|
17
18
|
|
18
19
|
def discover(connection_config)
|
19
20
|
connection_config = connection_config.with_indifferent_access
|
21
|
+
@session_name = "discover-#{connection_config[:region]}-#{connection_config[:bucket]}"
|
20
22
|
conn = create_connection(connection_config)
|
21
23
|
# If pulling from multiple files, all files must have the same schema
|
22
|
-
path = build_path(connection_config
|
23
|
-
|
24
|
-
records = get_results(conn, "DESCRIBE SELECT * FROM '#{full_path}';")
|
24
|
+
path = build_path(connection_config)
|
25
|
+
records = get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
|
25
26
|
columns = build_discover_columns(records)
|
26
|
-
streams = [Multiwoven::Integrations::Protocol::Stream.new(name:
|
27
|
+
streams = [Multiwoven::Integrations::Protocol::Stream.new(name: path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
|
27
28
|
catalog = Catalog.new(streams: streams)
|
28
29
|
catalog.to_multiwoven_message
|
29
30
|
rescue StandardError => e
|
@@ -32,6 +33,7 @@ module Multiwoven::Integrations::Source
|
|
32
33
|
|
33
34
|
def read(sync_config)
|
34
35
|
connection_config = sync_config.source.connection_specification.with_indifferent_access
|
36
|
+
@session_name = "#{sync_config.sync_id}-#{sync_config.source.name}-#{sync_config.destination.name}"
|
35
37
|
conn = create_connection(connection_config)
|
36
38
|
query = sync_config.model.query
|
37
39
|
query = batched_query(query, sync_config.limit, sync_config.offset) unless sync_config.limit.nil? && sync_config.offset.nil?
|
@@ -47,25 +49,48 @@ module Multiwoven::Integrations::Source
|
|
47
49
|
|
48
50
|
private
|
49
51
|
|
50
|
-
|
52
|
+
def get_auth_data(connection_config)
|
53
|
+
session = @session_name
|
54
|
+
@session_name = ""
|
55
|
+
if connection_config[:auth_type] == "user"
|
56
|
+
Aws::Credentials.new(connection_config[:access_id], connection_config[:secret_access])
|
57
|
+
elsif connection_config[:auth_type] == "role"
|
58
|
+
sts_client = Aws::STS::Client.new(region: connection_config[:region])
|
59
|
+
resp = sts_client.assume_role({
|
60
|
+
role_arn: connection_config[:arn],
|
61
|
+
role_session_name: session
|
62
|
+
})
|
63
|
+
Aws::Credentials.new(
|
64
|
+
resp.credentials.access_key_id,
|
65
|
+
resp.credentials.secret_access_key,
|
66
|
+
resp.credentials.session_token
|
67
|
+
)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
51
71
|
def create_connection(connection_config)
|
72
|
+
# In the case when previewing a query
|
73
|
+
@session_name = "preview-#{connection_config[:region]}-#{connection_config[:bucket]}" if @session_name.to_s.empty?
|
74
|
+
auth_data = get_auth_data(connection_config)
|
52
75
|
conn = DuckDB::Database.open.connect
|
53
76
|
# Set up S3 configuration
|
54
77
|
secret_query = "
|
55
78
|
CREATE SECRET amazons3_source (
|
56
79
|
TYPE S3,
|
57
|
-
KEY_ID '#{
|
58
|
-
SECRET '#{
|
59
|
-
REGION '#{connection_config[:region]}'
|
80
|
+
KEY_ID '#{auth_data.credentials.access_key_id}',
|
81
|
+
SECRET '#{auth_data.credentials.secret_access_key}',
|
82
|
+
REGION '#{connection_config[:region]}',
|
83
|
+
SESSION_TOKEN '#{auth_data.credentials.session_token}'
|
60
84
|
);
|
61
85
|
"
|
62
86
|
get_results(conn, secret_query)
|
63
87
|
conn
|
64
88
|
end
|
65
89
|
|
66
|
-
def build_path(
|
67
|
-
path =
|
68
|
-
path
|
90
|
+
def build_path(connection_config)
|
91
|
+
path = connection_config[:path]
|
92
|
+
path = "#{path}/" if path.to_s.strip.empty? || path[-1] != "/"
|
93
|
+
"s3://#{connection_config[:bucket]}#{path}*.#{connection_config[:file_type]}"
|
69
94
|
end
|
70
95
|
|
71
96
|
def get_results(conn, query)
|
@@ -109,43 +134,6 @@ module Multiwoven::Integrations::Source
|
|
109
134
|
"boolean"
|
110
135
|
end
|
111
136
|
end
|
112
|
-
|
113
|
-
# AWS SDK
|
114
|
-
def config_aws(config)
|
115
|
-
config = config.with_indifferent_access
|
116
|
-
Aws.config.update({
|
117
|
-
region: config[:region],
|
118
|
-
credentials: Aws::Credentials.new(config[:access_id], config[:secret_access])
|
119
|
-
})
|
120
|
-
config.with_indifferent_access
|
121
|
-
Aws::S3::Client.new
|
122
|
-
end
|
123
|
-
|
124
|
-
def build_select_content_options(config, query)
|
125
|
-
config = config.with_indifferent_access
|
126
|
-
bucket_name = config[:bucket]
|
127
|
-
file_key = config[:file_key]
|
128
|
-
file_type = config[:file_type]
|
129
|
-
options = {
|
130
|
-
bucket: bucket_name,
|
131
|
-
key: file_key,
|
132
|
-
expression_type: "SQL",
|
133
|
-
expression: query,
|
134
|
-
output_serialization: {
|
135
|
-
json: {}
|
136
|
-
}
|
137
|
-
}
|
138
|
-
if file_type == "parquet"
|
139
|
-
options[:input_serialization] = {
|
140
|
-
parquet: {}
|
141
|
-
}
|
142
|
-
elsif file_type == "csv"
|
143
|
-
options[:input_serialization] = {
|
144
|
-
csv: { file_header_info: "USE" }
|
145
|
-
}
|
146
|
-
end
|
147
|
-
options
|
148
|
-
end
|
149
137
|
end
|
150
138
|
end
|
151
139
|
end
|
@@ -6,13 +6,51 @@
|
|
6
6
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
7
7
|
"title": "AmazonS3",
|
8
8
|
"type": "object",
|
9
|
-
"
|
9
|
+
"if": {
|
10
|
+
"properties": {
|
11
|
+
"auth_type": {
|
12
|
+
"enum": ["user"]
|
13
|
+
}
|
14
|
+
}
|
15
|
+
},
|
16
|
+
"then": {
|
17
|
+
"required": [
|
18
|
+
"auth_type",
|
19
|
+
"region",
|
20
|
+
"bucket",
|
21
|
+
"access_id",
|
22
|
+
"secret_access",
|
23
|
+
"file_type"
|
24
|
+
]
|
25
|
+
},
|
26
|
+
"else": {
|
27
|
+
"required": [
|
28
|
+
"auth_type",
|
29
|
+
"region",
|
30
|
+
"bucket",
|
31
|
+
"arn",
|
32
|
+
"file_type"
|
33
|
+
]
|
34
|
+
},
|
10
35
|
"properties": {
|
11
|
-
"
|
12
|
-
"
|
13
|
-
"examples": ["us-east-2"],
|
36
|
+
"auth_type": {
|
37
|
+
"title": "Authentication type",
|
14
38
|
"type": "string",
|
15
|
-
"
|
39
|
+
"default": "user",
|
40
|
+
"description": "Authenticate either by using an IAM User (Access Key ID & Secret Access Key) or an IAM Role (ARN)",
|
41
|
+
"enum": [
|
42
|
+
"user",
|
43
|
+
"role"
|
44
|
+
],
|
45
|
+
"enumNames": [
|
46
|
+
"IAM User",
|
47
|
+
"IAM Role"
|
48
|
+
],
|
49
|
+
"order": 0
|
50
|
+
},
|
51
|
+
"arn": {
|
52
|
+
"type": "string",
|
53
|
+
"title": "IAM Role ARN",
|
16
54
|
"order": 1
|
17
55
|
},
|
18
56
|
"access_id": {
|
@@ -26,26 +64,40 @@
|
|
26
64
|
"multiwoven_secret": true,
|
27
65
|
"order": 3
|
28
66
|
},
|
67
|
+
"region": {
|
68
|
+
"description": "AWS region",
|
69
|
+
"examples": [
|
70
|
+
"us-east-2"
|
71
|
+
],
|
72
|
+
"type": "string",
|
73
|
+
"title": "Region",
|
74
|
+
"order": 4
|
75
|
+
},
|
29
76
|
"bucket": {
|
30
77
|
"description": "Bucket Name",
|
31
78
|
"type": "string",
|
32
79
|
"title": "Bucket",
|
33
|
-
"order":
|
80
|
+
"order": 5
|
34
81
|
},
|
35
82
|
"path": {
|
36
83
|
"description": "Path to csv or parquet files",
|
37
|
-
"examples": [
|
84
|
+
"examples": [
|
85
|
+
"/path/to/files"
|
86
|
+
],
|
38
87
|
"type": "string",
|
39
88
|
"title": "Path",
|
40
|
-
"order":
|
89
|
+
"order": 6
|
41
90
|
},
|
42
91
|
"file_type": {
|
43
92
|
"description": "The type of file to read",
|
44
93
|
"type": "string",
|
45
94
|
"title": "File Type",
|
46
|
-
"enum": [
|
47
|
-
|
95
|
+
"enum": [
|
96
|
+
"csv",
|
97
|
+
"parquet"
|
98
|
+
],
|
99
|
+
"order": 7
|
48
100
|
}
|
49
101
|
}
|
50
102
|
}
|
51
|
-
}
|
103
|
+
}
|
@@ -37,6 +37,7 @@ Gem::Specification.new do |spec|
|
|
37
37
|
spec.add_runtime_dependency "async-websocket"
|
38
38
|
spec.add_runtime_dependency "aws-sdk-athena"
|
39
39
|
spec.add_runtime_dependency "aws-sdk-s3"
|
40
|
+
spec.add_runtime_dependency "aws-sdk-sts"
|
40
41
|
spec.add_runtime_dependency "csv"
|
41
42
|
spec.add_runtime_dependency "dry-schema"
|
42
43
|
spec.add_runtime_dependency "dry-struct"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: multiwoven-integrations
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Subin T P
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-06-
|
11
|
+
date: 2024-06-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: aws-sdk-sts
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: csv
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|