multiwoven-integrations 0.1.76 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 489b7124c7814169b6c17fbd7de9cbde740e2f23f4ce0e0f0f74ddf6ba1b73c8
4
- data.tar.gz: 74fb01f7fa855194d7585df7ab7c79afd8a84d811be3989c30b53e3232174c9b
3
+ metadata.gz: 91cf72711d231942521cb83dd735b0a4b9067bd30d2aaeecf2be27d69d163da2
4
+ data.tar.gz: 5f2cc42a72e86cb0d8d1ed590c6fc337aa4539892305fc4f0502c74c7bb5dfa7
5
5
  SHA512:
6
- metadata.gz: 470ab95e9f07707468d2baa2f414436545ffa6f51d456ee2b498ae3b88e71df0fd3e73fadf8ca2d7429238a4b8676272bbd9cec81aa762745c571625b7be2791
7
- data.tar.gz: 30cf3c3609382f06a4d7509e2923902011dbe85f0ff23aecd8ae5ec392289de759388a438389ca3bc22aeb68ce507b0aa9ef0c8131a9d0fa5db47cde381dfff1
6
+ metadata.gz: 392ac958aeb012d4e1bf48daadbe168cf8e0cabd095fd79d155ed8fa4cc14cd8967974d9ee0085e4cf36ac9b78111d09da6e631fe21f97fd5be2136200fd56af
7
+ data.tar.gz: e3e76a87549ad98f9bed153746035fc1c6672ec30754eb1943a54d43cf2dbe253cbc3bb4235c4b11c9b98544856ff432cbd067af36bada6a12422d835426acca
@@ -13,6 +13,12 @@ module Multiwoven
13
13
 
14
14
  private
15
15
 
16
+ # This needs to be implemented as private method
17
+ # In every source connector. This will be used for model preview
18
+ def create_connection(connector_config)
19
+ # return a connection to the client's source
20
+ end
21
+
16
22
  # This needs to be implemented as private method
17
23
  # In every source connector. This will be used for model preview
18
24
  def query(connection, query)
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Multiwoven
4
4
  module Integrations
5
- VERSION = "0.1.76"
5
+ VERSION = "0.2.0"
6
6
 
7
7
  ENABLED_SOURCES = %w[
8
8
  Snowflake
@@ -13,6 +13,7 @@ module Multiwoven
13
13
  SalesforceConsumerGoodsCloud
14
14
  AwsAthena
15
15
  Clickhouse
16
+ AmazonS3
16
17
  ].freeze
17
18
 
18
19
  ENABLED_DESTINATIONS = %w[
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Multiwoven::Integrations::Source
4
+ module AmazonS3
5
+ include Multiwoven::Integrations::Core
6
+ class Client < SourceConnector
7
+ DISCOVER_QUERY = "SELECT * FROM S3Object LIMIT 1;"
8
+
9
+ def check_connection(connection_config)
10
+ connection_config = connection_config.with_indifferent_access
11
+ client = config_aws(connection_config)
12
+ client.get_bucket_policy_status({ bucket: connection_config[:bucket] })
13
+ ConnectionStatus.new(status: ConnectionStatusType["succeeded"]).to_multiwoven_message
14
+ rescue StandardError => e
15
+ ConnectionStatus.new(status: ConnectionStatusType["failed"], message: e.message).to_multiwoven_message
16
+ end
17
+
18
+ def discover(connection_config)
19
+ connection_config = connection_config.with_indifferent_access
20
+ conn = create_connection(connection_config)
21
+ # If pulling from multiple files, all files must have the same schema
22
+ path = build_path(connection_config[:path])
23
+ full_path = "s3://#{connection_config[:bucket]}/#{path}*.#{connection_config[:file_type]}"
24
+ records = get_results(conn, "DESCRIBE SELECT * FROM '#{full_path}';")
25
+ columns = build_discover_columns(records)
26
+ streams = [Multiwoven::Integrations::Protocol::Stream.new(name: full_path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
27
+ catalog = Catalog.new(streams: streams)
28
+ catalog.to_multiwoven_message
29
+ rescue StandardError => e
30
+ handle_exception(e, { context: "AMAZONS3:DISCOVER:EXCEPTION", type: "error" })
31
+ end
32
+
33
+ def read(sync_config)
34
+ connection_config = sync_config.source.connection_specification.with_indifferent_access
35
+ conn = create_connection(connection_config)
36
+ query = sync_config.model.query
37
+ query = batched_query(query, sync_config.limit, sync_config.offset) unless sync_config.limit.nil? && sync_config.offset.nil?
38
+ query(conn, query)
39
+ rescue StandardError => e
40
+ handle_exception(e, {
41
+ context: "AMAZONS3:READ:EXCEPTION",
42
+ type: "error",
43
+ sync_id: sync_config.sync_id,
44
+ sync_run_id: sync_config.sync_run_id
45
+ })
46
+ end
47
+
48
+ private
49
+
50
+ # DuckDB
51
+ def create_connection(connection_config)
52
+ conn = DuckDB::Database.open.connect
53
+ # Set up S3 configuration
54
+ secret_query = "
55
+ CREATE SECRET amazons3_source (
56
+ TYPE S3,
57
+ KEY_ID '#{connection_config[:access_id]}',
58
+ SECRET '#{connection_config[:secret_access]}',
59
+ REGION '#{connection_config[:region]}'
60
+ );
61
+ "
62
+ get_results(conn, secret_query)
63
+ conn
64
+ end
65
+
66
+ def build_path(path)
67
+ path = "#{path}/" if !path.to_s.strip.empty? && path[-1] != "/"
68
+ path
69
+ end
70
+
71
+ def get_results(conn, query)
72
+ results = conn.query(query)
73
+ hash_array_values(results)
74
+ end
75
+
76
+ def query(conn, query)
77
+ records = get_results(conn, query)
78
+ records.map do |row|
79
+ RecordMessage.new(data: row, emitted_at: Time.now.to_i).to_multiwoven_message
80
+ end
81
+ end
82
+
83
+ def hash_array_values(describe)
84
+ keys = describe.columns.map(&:name)
85
+ describe.map do |row|
86
+ Hash[keys.zip(row)]
87
+ end
88
+ end
89
+
90
+ def build_discover_columns(describe_results)
91
+ describe_results.map do |row|
92
+ type = column_schema_helper(row["column_type"])
93
+ {
94
+ column_name: row["column_name"],
95
+ type: type
96
+ }
97
+ end
98
+ end
99
+
100
+ def column_schema_helper(column_type)
101
+ case column_type
102
+ when "VARCHAR", "BIT", "DATE", "TIME", "TIMESTAMP", "UUID"
103
+ "string"
104
+ when "DOUBLE"
105
+ "number"
106
+ when "BIGINT", "HUGEINT", "INTEGER", "SMALLINT"
107
+ "integer"
108
+ when "BOOLEAN"
109
+ "boolean"
110
+ end
111
+ end
112
+
113
+ # AWS SDK
114
+ def config_aws(config)
115
+ config = config.with_indifferent_access
116
+ Aws.config.update({
117
+ region: config[:region],
118
+ credentials: Aws::Credentials.new(config[:access_id], config[:secret_access])
119
+ })
120
+ config.with_indifferent_access
121
+ Aws::S3::Client.new
122
+ end
123
+
124
+ def build_select_content_options(config, query)
125
+ config = config.with_indifferent_access
126
+ bucket_name = config[:bucket]
127
+ file_key = config[:file_key]
128
+ file_type = config[:file_type]
129
+ options = {
130
+ bucket: bucket_name,
131
+ key: file_key,
132
+ expression_type: "SQL",
133
+ expression: query,
134
+ output_serialization: {
135
+ json: {}
136
+ }
137
+ }
138
+ if file_type == "parquet"
139
+ options[:input_serialization] = {
140
+ parquet: {}
141
+ }
142
+ elsif file_type == "csv"
143
+ options[:input_serialization] = {
144
+ csv: { file_header_info: "USE" }
145
+ }
146
+ end
147
+ options
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,15 @@
1
+ {
2
+ "data": {
3
+ "name": "AmazonS3",
4
+ "title": "Amazon S3",
5
+ "connector_type": "source",
6
+ "category": "Data Lake",
7
+ "documentation_url": "https://docs.mutliwoven.com",
8
+ "github_issue_label": "source-amazons3",
9
+ "icon": "icon.svg",
10
+ "license": "MIT",
11
+ "release_stage": "alpha",
12
+ "support_level": "community",
13
+ "tags": ["language:ruby", "multiwoven"]
14
+ }
15
+ }
@@ -0,0 +1,51 @@
1
+ {
2
+ "documentation_url": "https://docs.multiwoven.com/integrations/sources/amazons3",
3
+ "stream_type": "dynamic",
4
+ "connector_query_type": "raw_sql",
5
+ "connection_specification": {
6
+ "$schema": "http://json-schema.org/draft-07/schema#",
7
+ "title": "AmazonS3",
8
+ "type": "object",
9
+ "required": ["region", "bucket", "access_id", "secret_access", "file_type"],
10
+ "properties": {
11
+ "region": {
12
+ "description": "AWS region",
13
+ "examples": ["us-east-2"],
14
+ "type": "string",
15
+ "title": "Region",
16
+ "order": 1
17
+ },
18
+ "access_id": {
19
+ "type": "string",
20
+ "title": "Access Id",
21
+ "order": 2
22
+ },
23
+ "secret_access": {
24
+ "type": "string",
25
+ "title": "Secret Access",
26
+ "multiwoven_secret": true,
27
+ "order": 3
28
+ },
29
+ "bucket": {
30
+ "description": "Bucket Name",
31
+ "type": "string",
32
+ "title": "Bucket",
33
+ "order": 4
34
+ },
35
+ "path": {
36
+ "description": "Path to csv or parquet files",
37
+ "examples": ["/path/to/files"],
38
+ "type": "string",
39
+ "title": "Path",
40
+ "order": 5
41
+ },
42
+ "file_type": {
43
+ "description": "The type of file to read",
44
+ "type": "string",
45
+ "title": "File Type",
46
+ "enum": ["csv", "parquet"],
47
+ "order": 6
48
+ }
49
+ }
50
+ }
51
+ }
@@ -0,0 +1,34 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg" width="428" height="512" viewBox="0 0 428 512">
2
+ <defs>
3
+ <style>
4
+ .cls-1 {
5
+ fill: #e25444;
6
+ }
7
+
8
+ .cls-1, .cls-2, .cls-3 {
9
+ fill-rule: evenodd;
10
+ }
11
+
12
+ .cls-2 {
13
+ fill: #7b1d13;
14
+ }
15
+
16
+ .cls-3 {
17
+ fill: #58150d;
18
+ }
19
+ </style>
20
+ </defs>
21
+ <path class="cls-1" d="M378,99L295,257l83,158,34-19V118Z"/>
22
+ <path class="cls-2" d="M378,99L212,118,127.5,257,212,396l166,19V99Z"/>
23
+ <path class="cls-3" d="M43,99L16,111V403l27,12L212,257Z"/>
24
+ <path class="cls-1" d="M42.637,98.667l169.587,47.111V372.444L42.637,415.111V98.667Z"/>
25
+ <path class="cls-3" d="M212.313,170.667l-72.008-11.556,72.008-81.778,71.83,81.778Z"/>
26
+ <path class="cls-3" d="M284.143,159.111l-71.919,11.733-71.919-11.733V77.333"/>
27
+ <path class="cls-3" d="M212.313,342.222l-72.008,13.334,72.008,70.222,71.83-70.222Z"/>
28
+ <path class="cls-2" d="M212,16L140,54V159l72.224-20.333Z"/>
29
+ <path class="cls-2" d="M212.224,196.444l-71.919,7.823V309.105l71.919,8.228V196.444Z"/>
30
+ <path class="cls-2" d="M212.224,373.333L140.305,355.3V458.363L212.224,496V373.333Z"/>
31
+ <path class="cls-1" d="M284.143,355.3l-71.919,18.038V496l71.919-37.637V355.3Z"/>
32
+ <path class="cls-1" d="M212.224,196.444l71.919,7.823V309.105l-71.919,8.228V196.444Z"/>
33
+ <path class="cls-1" d="M212,16l72,38V159l-72-20V16Z"/>
34
+ </svg>
@@ -27,6 +27,8 @@ require "zip"
27
27
  require "zendesk_api"
28
28
  require "faraday"
29
29
  require "base64"
30
+ require "aws-sdk-s3"
31
+ require "duckdb"
30
32
  require "iterable-api-client"
31
33
 
32
34
  # Service
@@ -55,6 +57,7 @@ require_relative "integrations/source/databricks/client"
55
57
  require_relative "integrations/source/salesforce_consumer_goods_cloud/client"
56
58
  require_relative "integrations/source/aws_athena/client"
57
59
  require_relative "integrations/source/clickhouse/client"
60
+ require_relative "integrations/source/amazon_s3/client"
58
61
 
59
62
  # Destination
60
63
  require_relative "integrations/destination/klaviyo/client"
@@ -36,10 +36,12 @@ Gem::Specification.new do |spec|
36
36
  spec.add_runtime_dependency "activesupport"
37
37
  spec.add_runtime_dependency "async-websocket"
38
38
  spec.add_runtime_dependency "aws-sdk-athena"
39
+ spec.add_runtime_dependency "aws-sdk-s3"
39
40
  spec.add_runtime_dependency "csv"
40
41
  spec.add_runtime_dependency "dry-schema"
41
42
  spec.add_runtime_dependency "dry-struct"
42
43
  spec.add_runtime_dependency "dry-types"
44
+ spec.add_runtime_dependency "duckdb"
43
45
  spec.add_runtime_dependency "git"
44
46
  spec.add_runtime_dependency "google-apis-sheets_v4"
45
47
  spec.add_runtime_dependency "google-cloud-bigquery"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multiwoven-integrations
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.76
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Subin T P
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-06-13 00:00:00.000000000 Z
11
+ date: 2024-06-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: aws-sdk-s3
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: csv
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -108,6 +122,20 @@ dependencies:
108
122
  - - ">="
109
123
  - !ruby/object:Gem::Version
110
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: duckdb
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
111
139
  - !ruby/object:Gem::Dependency
112
140
  name: git
113
141
  requirement: !ruby/object:Gem::Requirement
@@ -519,6 +547,10 @@ files:
519
547
  - lib/multiwoven/integrations/protocol/protocol.rb
520
548
  - lib/multiwoven/integrations/rollout.rb
521
549
  - lib/multiwoven/integrations/service.rb
550
+ - lib/multiwoven/integrations/source/amazon_s3/client.rb
551
+ - lib/multiwoven/integrations/source/amazon_s3/config/meta.json
552
+ - lib/multiwoven/integrations/source/amazon_s3/config/spec.json
553
+ - lib/multiwoven/integrations/source/amazon_s3/icon.svg
522
554
  - lib/multiwoven/integrations/source/aws_athena/client.rb
523
555
  - lib/multiwoven/integrations/source/aws_athena/config/meta.json
524
556
  - lib/multiwoven/integrations/source/aws_athena/config/spec.json