multiwoven-integrations 0.1.76 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/multiwoven/integrations/core/source_connector.rb +6 -0
- data/lib/multiwoven/integrations/rollout.rb +2 -1
- data/lib/multiwoven/integrations/source/amazon_s3/client.rb +151 -0
- data/lib/multiwoven/integrations/source/amazon_s3/config/meta.json +15 -0
- data/lib/multiwoven/integrations/source/amazon_s3/config/spec.json +51 -0
- data/lib/multiwoven/integrations/source/amazon_s3/icon.svg +34 -0
- data/lib/multiwoven/integrations.rb +3 -0
- data/multiwoven-integrations.gemspec +2 -0
- metadata +34 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 91cf72711d231942521cb83dd735b0a4b9067bd30d2aaeecf2be27d69d163da2
|
4
|
+
data.tar.gz: 5f2cc42a72e86cb0d8d1ed590c6fc337aa4539892305fc4f0502c74c7bb5dfa7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 392ac958aeb012d4e1bf48daadbe168cf8e0cabd095fd79d155ed8fa4cc14cd8967974d9ee0085e4cf36ac9b78111d09da6e631fe21f97fd5be2136200fd56af
|
7
|
+
data.tar.gz: e3e76a87549ad98f9bed153746035fc1c6672ec30754eb1943a54d43cf2dbe253cbc3bb4235c4b11c9b98544856ff432cbd067af36bada6a12422d835426acca
|
@@ -13,6 +13,12 @@ module Multiwoven
|
|
13
13
|
|
14
14
|
private
|
15
15
|
|
16
|
+
# This needs to be implemented as private method
|
17
|
+
# In every source connector. This will be used for model preview
|
18
|
+
def create_connection(connector_config)
|
19
|
+
# return a connection to the client's source
|
20
|
+
end
|
21
|
+
|
16
22
|
# This needs to be implemented as private method
|
17
23
|
# In every source connector. This will be used for model preview
|
18
24
|
def query(connection, query)
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module Multiwoven
|
4
4
|
module Integrations
|
5
|
-
VERSION = "0.
|
5
|
+
VERSION = "0.2.0"
|
6
6
|
|
7
7
|
ENABLED_SOURCES = %w[
|
8
8
|
Snowflake
|
@@ -13,6 +13,7 @@ module Multiwoven
|
|
13
13
|
SalesforceConsumerGoodsCloud
|
14
14
|
AwsAthena
|
15
15
|
Clickhouse
|
16
|
+
AmazonS3
|
16
17
|
].freeze
|
17
18
|
|
18
19
|
ENABLED_DESTINATIONS = %w[
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Multiwoven::Integrations::Source
|
4
|
+
module AmazonS3
|
5
|
+
include Multiwoven::Integrations::Core
|
6
|
+
class Client < SourceConnector
|
7
|
+
DISCOVER_QUERY = "SELECT * FROM S3Object LIMIT 1;"
|
8
|
+
|
9
|
+
def check_connection(connection_config)
|
10
|
+
connection_config = connection_config.with_indifferent_access
|
11
|
+
client = config_aws(connection_config)
|
12
|
+
client.get_bucket_policy_status({ bucket: connection_config[:bucket] })
|
13
|
+
ConnectionStatus.new(status: ConnectionStatusType["succeeded"]).to_multiwoven_message
|
14
|
+
rescue StandardError => e
|
15
|
+
ConnectionStatus.new(status: ConnectionStatusType["failed"], message: e.message).to_multiwoven_message
|
16
|
+
end
|
17
|
+
|
18
|
+
def discover(connection_config)
|
19
|
+
connection_config = connection_config.with_indifferent_access
|
20
|
+
conn = create_connection(connection_config)
|
21
|
+
# If pulling from multiple files, all files must have the same schema
|
22
|
+
path = build_path(connection_config[:path])
|
23
|
+
full_path = "s3://#{connection_config[:bucket]}/#{path}*.#{connection_config[:file_type]}"
|
24
|
+
records = get_results(conn, "DESCRIBE SELECT * FROM '#{full_path}';")
|
25
|
+
columns = build_discover_columns(records)
|
26
|
+
streams = [Multiwoven::Integrations::Protocol::Stream.new(name: full_path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
|
27
|
+
catalog = Catalog.new(streams: streams)
|
28
|
+
catalog.to_multiwoven_message
|
29
|
+
rescue StandardError => e
|
30
|
+
handle_exception(e, { context: "AMAZONS3:DISCOVER:EXCEPTION", type: "error" })
|
31
|
+
end
|
32
|
+
|
33
|
+
def read(sync_config)
|
34
|
+
connection_config = sync_config.source.connection_specification.with_indifferent_access
|
35
|
+
conn = create_connection(connection_config)
|
36
|
+
query = sync_config.model.query
|
37
|
+
query = batched_query(query, sync_config.limit, sync_config.offset) unless sync_config.limit.nil? && sync_config.offset.nil?
|
38
|
+
query(conn, query)
|
39
|
+
rescue StandardError => e
|
40
|
+
handle_exception(e, {
|
41
|
+
context: "AMAZONS3:READ:EXCEPTION",
|
42
|
+
type: "error",
|
43
|
+
sync_id: sync_config.sync_id,
|
44
|
+
sync_run_id: sync_config.sync_run_id
|
45
|
+
})
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
# DuckDB
|
51
|
+
def create_connection(connection_config)
|
52
|
+
conn = DuckDB::Database.open.connect
|
53
|
+
# Set up S3 configuration
|
54
|
+
secret_query = "
|
55
|
+
CREATE SECRET amazons3_source (
|
56
|
+
TYPE S3,
|
57
|
+
KEY_ID '#{connection_config[:access_id]}',
|
58
|
+
SECRET '#{connection_config[:secret_access]}',
|
59
|
+
REGION '#{connection_config[:region]}'
|
60
|
+
);
|
61
|
+
"
|
62
|
+
get_results(conn, secret_query)
|
63
|
+
conn
|
64
|
+
end
|
65
|
+
|
66
|
+
def build_path(path)
|
67
|
+
path = "#{path}/" if !path.to_s.strip.empty? && path[-1] != "/"
|
68
|
+
path
|
69
|
+
end
|
70
|
+
|
71
|
+
def get_results(conn, query)
|
72
|
+
results = conn.query(query)
|
73
|
+
hash_array_values(results)
|
74
|
+
end
|
75
|
+
|
76
|
+
def query(conn, query)
|
77
|
+
records = get_results(conn, query)
|
78
|
+
records.map do |row|
|
79
|
+
RecordMessage.new(data: row, emitted_at: Time.now.to_i).to_multiwoven_message
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def hash_array_values(describe)
|
84
|
+
keys = describe.columns.map(&:name)
|
85
|
+
describe.map do |row|
|
86
|
+
Hash[keys.zip(row)]
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def build_discover_columns(describe_results)
|
91
|
+
describe_results.map do |row|
|
92
|
+
type = column_schema_helper(row["column_type"])
|
93
|
+
{
|
94
|
+
column_name: row["column_name"],
|
95
|
+
type: type
|
96
|
+
}
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def column_schema_helper(column_type)
|
101
|
+
case column_type
|
102
|
+
when "VARCHAR", "BIT", "DATE", "TIME", "TIMESTAMP", "UUID"
|
103
|
+
"string"
|
104
|
+
when "DOUBLE"
|
105
|
+
"number"
|
106
|
+
when "BIGINT", "HUGEINT", "INTEGER", "SMALLINT"
|
107
|
+
"integer"
|
108
|
+
when "BOOLEAN"
|
109
|
+
"boolean"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# AWS SDK
|
114
|
+
def config_aws(config)
|
115
|
+
config = config.with_indifferent_access
|
116
|
+
Aws.config.update({
|
117
|
+
region: config[:region],
|
118
|
+
credentials: Aws::Credentials.new(config[:access_id], config[:secret_access])
|
119
|
+
})
|
120
|
+
config.with_indifferent_access
|
121
|
+
Aws::S3::Client.new
|
122
|
+
end
|
123
|
+
|
124
|
+
def build_select_content_options(config, query)
|
125
|
+
config = config.with_indifferent_access
|
126
|
+
bucket_name = config[:bucket]
|
127
|
+
file_key = config[:file_key]
|
128
|
+
file_type = config[:file_type]
|
129
|
+
options = {
|
130
|
+
bucket: bucket_name,
|
131
|
+
key: file_key,
|
132
|
+
expression_type: "SQL",
|
133
|
+
expression: query,
|
134
|
+
output_serialization: {
|
135
|
+
json: {}
|
136
|
+
}
|
137
|
+
}
|
138
|
+
if file_type == "parquet"
|
139
|
+
options[:input_serialization] = {
|
140
|
+
parquet: {}
|
141
|
+
}
|
142
|
+
elsif file_type == "csv"
|
143
|
+
options[:input_serialization] = {
|
144
|
+
csv: { file_header_info: "USE" }
|
145
|
+
}
|
146
|
+
end
|
147
|
+
options
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
{
|
2
|
+
"data": {
|
3
|
+
"name": "AmazonS3",
|
4
|
+
"title": "Amazon S3",
|
5
|
+
"connector_type": "source",
|
6
|
+
"category": "Data Lake",
|
7
|
+
"documentation_url": "https://docs.mutliwoven.com",
|
8
|
+
"github_issue_label": "source-amazons3",
|
9
|
+
"icon": "icon.svg",
|
10
|
+
"license": "MIT",
|
11
|
+
"release_stage": "alpha",
|
12
|
+
"support_level": "community",
|
13
|
+
"tags": ["language:ruby", "multiwoven"]
|
14
|
+
}
|
15
|
+
}
|
@@ -0,0 +1,51 @@
|
|
1
|
+
{
|
2
|
+
"documentation_url": "https://docs.multiwoven.com/integrations/sources/amazons3",
|
3
|
+
"stream_type": "dynamic",
|
4
|
+
"connector_query_type": "raw_sql",
|
5
|
+
"connection_specification": {
|
6
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
7
|
+
"title": "AmazonS3",
|
8
|
+
"type": "object",
|
9
|
+
"required": ["region", "bucket", "access_id", "secret_access", "file_type"],
|
10
|
+
"properties": {
|
11
|
+
"region": {
|
12
|
+
"description": "AWS region",
|
13
|
+
"examples": ["us-east-2"],
|
14
|
+
"type": "string",
|
15
|
+
"title": "Region",
|
16
|
+
"order": 1
|
17
|
+
},
|
18
|
+
"access_id": {
|
19
|
+
"type": "string",
|
20
|
+
"title": "Access Id",
|
21
|
+
"order": 2
|
22
|
+
},
|
23
|
+
"secret_access": {
|
24
|
+
"type": "string",
|
25
|
+
"title": "Secret Access",
|
26
|
+
"multiwoven_secret": true,
|
27
|
+
"order": 3
|
28
|
+
},
|
29
|
+
"bucket": {
|
30
|
+
"description": "Bucket Name",
|
31
|
+
"type": "string",
|
32
|
+
"title": "Bucket",
|
33
|
+
"order": 4
|
34
|
+
},
|
35
|
+
"path": {
|
36
|
+
"description": "Path to csv or parquet files",
|
37
|
+
"examples": ["/path/to/files"],
|
38
|
+
"type": "string",
|
39
|
+
"title": "Path",
|
40
|
+
"order": 5
|
41
|
+
},
|
42
|
+
"file_type": {
|
43
|
+
"description": "The type of file to read",
|
44
|
+
"type": "string",
|
45
|
+
"title": "File Type",
|
46
|
+
"enum": ["csv", "parquet"],
|
47
|
+
"order": 6
|
48
|
+
}
|
49
|
+
}
|
50
|
+
}
|
51
|
+
}
|
@@ -0,0 +1,34 @@
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="428" height="512" viewBox="0 0 428 512">
|
2
|
+
<defs>
|
3
|
+
<style>
|
4
|
+
.cls-1 {
|
5
|
+
fill: #e25444;
|
6
|
+
}
|
7
|
+
|
8
|
+
.cls-1, .cls-2, .cls-3 {
|
9
|
+
fill-rule: evenodd;
|
10
|
+
}
|
11
|
+
|
12
|
+
.cls-2 {
|
13
|
+
fill: #7b1d13;
|
14
|
+
}
|
15
|
+
|
16
|
+
.cls-3 {
|
17
|
+
fill: #58150d;
|
18
|
+
}
|
19
|
+
</style>
|
20
|
+
</defs>
|
21
|
+
<path class="cls-1" d="M378,99L295,257l83,158,34-19V118Z"/>
|
22
|
+
<path class="cls-2" d="M378,99L212,118,127.5,257,212,396l166,19V99Z"/>
|
23
|
+
<path class="cls-3" d="M43,99L16,111V403l27,12L212,257Z"/>
|
24
|
+
<path class="cls-1" d="M42.637,98.667l169.587,47.111V372.444L42.637,415.111V98.667Z"/>
|
25
|
+
<path class="cls-3" d="M212.313,170.667l-72.008-11.556,72.008-81.778,71.83,81.778Z"/>
|
26
|
+
<path class="cls-3" d="M284.143,159.111l-71.919,11.733-71.919-11.733V77.333"/>
|
27
|
+
<path class="cls-3" d="M212.313,342.222l-72.008,13.334,72.008,70.222,71.83-70.222Z"/>
|
28
|
+
<path class="cls-2" d="M212,16L140,54V159l72.224-20.333Z"/>
|
29
|
+
<path class="cls-2" d="M212.224,196.444l-71.919,7.823V309.105l71.919,8.228V196.444Z"/>
|
30
|
+
<path class="cls-2" d="M212.224,373.333L140.305,355.3V458.363L212.224,496V373.333Z"/>
|
31
|
+
<path class="cls-1" d="M284.143,355.3l-71.919,18.038V496l71.919-37.637V355.3Z"/>
|
32
|
+
<path class="cls-1" d="M212.224,196.444l71.919,7.823V309.105l-71.919,8.228V196.444Z"/>
|
33
|
+
<path class="cls-1" d="M212,16l72,38V159l-72-20V16Z"/>
|
34
|
+
</svg>
|
@@ -27,6 +27,8 @@ require "zip"
|
|
27
27
|
require "zendesk_api"
|
28
28
|
require "faraday"
|
29
29
|
require "base64"
|
30
|
+
require "aws-sdk-s3"
|
31
|
+
require "duckdb"
|
30
32
|
require "iterable-api-client"
|
31
33
|
|
32
34
|
# Service
|
@@ -55,6 +57,7 @@ require_relative "integrations/source/databricks/client"
|
|
55
57
|
require_relative "integrations/source/salesforce_consumer_goods_cloud/client"
|
56
58
|
require_relative "integrations/source/aws_athena/client"
|
57
59
|
require_relative "integrations/source/clickhouse/client"
|
60
|
+
require_relative "integrations/source/amazon_s3/client"
|
58
61
|
|
59
62
|
# Destination
|
60
63
|
require_relative "integrations/destination/klaviyo/client"
|
@@ -36,10 +36,12 @@ Gem::Specification.new do |spec|
|
|
36
36
|
spec.add_runtime_dependency "activesupport"
|
37
37
|
spec.add_runtime_dependency "async-websocket"
|
38
38
|
spec.add_runtime_dependency "aws-sdk-athena"
|
39
|
+
spec.add_runtime_dependency "aws-sdk-s3"
|
39
40
|
spec.add_runtime_dependency "csv"
|
40
41
|
spec.add_runtime_dependency "dry-schema"
|
41
42
|
spec.add_runtime_dependency "dry-struct"
|
42
43
|
spec.add_runtime_dependency "dry-types"
|
44
|
+
spec.add_runtime_dependency "duckdb"
|
43
45
|
spec.add_runtime_dependency "git"
|
44
46
|
spec.add_runtime_dependency "google-apis-sheets_v4"
|
45
47
|
spec.add_runtime_dependency "google-cloud-bigquery"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: multiwoven-integrations
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Subin T P
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-06-
|
11
|
+
date: 2024-06-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: aws-sdk-s3
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: csv
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +122,20 @@ dependencies:
|
|
108
122
|
- - ">="
|
109
123
|
- !ruby/object:Gem::Version
|
110
124
|
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: duckdb
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
111
139
|
- !ruby/object:Gem::Dependency
|
112
140
|
name: git
|
113
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -519,6 +547,10 @@ files:
|
|
519
547
|
- lib/multiwoven/integrations/protocol/protocol.rb
|
520
548
|
- lib/multiwoven/integrations/rollout.rb
|
521
549
|
- lib/multiwoven/integrations/service.rb
|
550
|
+
- lib/multiwoven/integrations/source/amazon_s3/client.rb
|
551
|
+
- lib/multiwoven/integrations/source/amazon_s3/config/meta.json
|
552
|
+
- lib/multiwoven/integrations/source/amazon_s3/config/spec.json
|
553
|
+
- lib/multiwoven/integrations/source/amazon_s3/icon.svg
|
522
554
|
- lib/multiwoven/integrations/source/aws_athena/client.rb
|
523
555
|
- lib/multiwoven/integrations/source/aws_athena/config/meta.json
|
524
556
|
- lib/multiwoven/integrations/source/aws_athena/config/spec.json
|