multiwoven-integrations 0.1.76 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 489b7124c7814169b6c17fbd7de9cbde740e2f23f4ce0e0f0f74ddf6ba1b73c8
4
- data.tar.gz: 74fb01f7fa855194d7585df7ab7c79afd8a84d811be3989c30b53e3232174c9b
3
+ metadata.gz: 91cf72711d231942521cb83dd735b0a4b9067bd30d2aaeecf2be27d69d163da2
4
+ data.tar.gz: 5f2cc42a72e86cb0d8d1ed590c6fc337aa4539892305fc4f0502c74c7bb5dfa7
5
5
  SHA512:
6
- metadata.gz: 470ab95e9f07707468d2baa2f414436545ffa6f51d456ee2b498ae3b88e71df0fd3e73fadf8ca2d7429238a4b8676272bbd9cec81aa762745c571625b7be2791
7
- data.tar.gz: 30cf3c3609382f06a4d7509e2923902011dbe85f0ff23aecd8ae5ec392289de759388a438389ca3bc22aeb68ce507b0aa9ef0c8131a9d0fa5db47cde381dfff1
6
+ metadata.gz: 392ac958aeb012d4e1bf48daadbe168cf8e0cabd095fd79d155ed8fa4cc14cd8967974d9ee0085e4cf36ac9b78111d09da6e631fe21f97fd5be2136200fd56af
7
+ data.tar.gz: e3e76a87549ad98f9bed153746035fc1c6672ec30754eb1943a54d43cf2dbe253cbc3bb4235c4b11c9b98544856ff432cbd067af36bada6a12422d835426acca
@@ -13,6 +13,12 @@ module Multiwoven
13
13
 
14
14
  private
15
15
 
16
+ # This needs to be implemented as private method
17
+ # In every source connector. This will be used for model preview
18
+ def create_connection(connector_config)
19
+ # return a connection to the client's source
20
+ end
21
+
16
22
  # This needs to be implemented as private method
17
23
  # In every source connector. This will be used for model preview
18
24
  def query(connection, query)
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Multiwoven
4
4
  module Integrations
5
- VERSION = "0.1.76"
5
+ VERSION = "0.2.0"
6
6
 
7
7
  ENABLED_SOURCES = %w[
8
8
  Snowflake
@@ -13,6 +13,7 @@ module Multiwoven
13
13
  SalesforceConsumerGoodsCloud
14
14
  AwsAthena
15
15
  Clickhouse
16
+ AmazonS3
16
17
  ].freeze
17
18
 
18
19
  ENABLED_DESTINATIONS = %w[
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Multiwoven::Integrations::Source
4
+ module AmazonS3
5
+ include Multiwoven::Integrations::Core
6
+ class Client < SourceConnector
7
+ DISCOVER_QUERY = "SELECT * FROM S3Object LIMIT 1;"
8
+
9
+ def check_connection(connection_config)
10
+ connection_config = connection_config.with_indifferent_access
11
+ client = config_aws(connection_config)
12
+ client.get_bucket_policy_status({ bucket: connection_config[:bucket] })
13
+ ConnectionStatus.new(status: ConnectionStatusType["succeeded"]).to_multiwoven_message
14
+ rescue StandardError => e
15
+ ConnectionStatus.new(status: ConnectionStatusType["failed"], message: e.message).to_multiwoven_message
16
+ end
17
+
18
+ def discover(connection_config)
19
+ connection_config = connection_config.with_indifferent_access
20
+ conn = create_connection(connection_config)
21
+ # If pulling from multiple files, all files must have the same schema
22
+ path = build_path(connection_config[:path])
23
+ full_path = "s3://#{connection_config[:bucket]}/#{path}*.#{connection_config[:file_type]}"
24
+ records = get_results(conn, "DESCRIBE SELECT * FROM '#{full_path}';")
25
+ columns = build_discover_columns(records)
26
+ streams = [Multiwoven::Integrations::Protocol::Stream.new(name: full_path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
27
+ catalog = Catalog.new(streams: streams)
28
+ catalog.to_multiwoven_message
29
+ rescue StandardError => e
30
+ handle_exception(e, { context: "AMAZONS3:DISCOVER:EXCEPTION", type: "error" })
31
+ end
32
+
33
+ def read(sync_config)
34
+ connection_config = sync_config.source.connection_specification.with_indifferent_access
35
+ conn = create_connection(connection_config)
36
+ query = sync_config.model.query
37
+ query = batched_query(query, sync_config.limit, sync_config.offset) unless sync_config.limit.nil? && sync_config.offset.nil?
38
+ query(conn, query)
39
+ rescue StandardError => e
40
+ handle_exception(e, {
41
+ context: "AMAZONS3:READ:EXCEPTION",
42
+ type: "error",
43
+ sync_id: sync_config.sync_id,
44
+ sync_run_id: sync_config.sync_run_id
45
+ })
46
+ end
47
+
48
+ private
49
+
50
+ # DuckDB
51
+ def create_connection(connection_config)
52
+ conn = DuckDB::Database.open.connect
53
+ # Set up S3 configuration
54
+ secret_query = "
55
+ CREATE SECRET amazons3_source (
56
+ TYPE S3,
57
+ KEY_ID '#{connection_config[:access_id]}',
58
+ SECRET '#{connection_config[:secret_access]}',
59
+ REGION '#{connection_config[:region]}'
60
+ );
61
+ "
62
+ get_results(conn, secret_query)
63
+ conn
64
+ end
65
+
66
+ def build_path(path)
67
+ path = "#{path}/" if !path.to_s.strip.empty? && path[-1] != "/"
68
+ path
69
+ end
70
+
71
+ def get_results(conn, query)
72
+ results = conn.query(query)
73
+ hash_array_values(results)
74
+ end
75
+
76
+ def query(conn, query)
77
+ records = get_results(conn, query)
78
+ records.map do |row|
79
+ RecordMessage.new(data: row, emitted_at: Time.now.to_i).to_multiwoven_message
80
+ end
81
+ end
82
+
83
+ def hash_array_values(describe)
84
+ keys = describe.columns.map(&:name)
85
+ describe.map do |row|
86
+ Hash[keys.zip(row)]
87
+ end
88
+ end
89
+
90
+ def build_discover_columns(describe_results)
91
+ describe_results.map do |row|
92
+ type = column_schema_helper(row["column_type"])
93
+ {
94
+ column_name: row["column_name"],
95
+ type: type
96
+ }
97
+ end
98
+ end
99
+
100
+ def column_schema_helper(column_type)
101
+ case column_type
102
+ when "VARCHAR", "BIT", "DATE", "TIME", "TIMESTAMP", "UUID"
103
+ "string"
104
+ when "DOUBLE"
105
+ "number"
106
+ when "BIGINT", "HUGEINT", "INTEGER", "SMALLINT"
107
+ "integer"
108
+ when "BOOLEAN"
109
+ "boolean"
110
+ end
111
+ end
112
+
113
+ # AWS SDK
114
+ def config_aws(config)
115
+ config = config.with_indifferent_access
116
+ Aws.config.update({
117
+ region: config[:region],
118
+ credentials: Aws::Credentials.new(config[:access_id], config[:secret_access])
119
+ })
120
+ config.with_indifferent_access
121
+ Aws::S3::Client.new
122
+ end
123
+
124
+ def build_select_content_options(config, query)
125
+ config = config.with_indifferent_access
126
+ bucket_name = config[:bucket]
127
+ file_key = config[:file_key]
128
+ file_type = config[:file_type]
129
+ options = {
130
+ bucket: bucket_name,
131
+ key: file_key,
132
+ expression_type: "SQL",
133
+ expression: query,
134
+ output_serialization: {
135
+ json: {}
136
+ }
137
+ }
138
+ if file_type == "parquet"
139
+ options[:input_serialization] = {
140
+ parquet: {}
141
+ }
142
+ elsif file_type == "csv"
143
+ options[:input_serialization] = {
144
+ csv: { file_header_info: "USE" }
145
+ }
146
+ end
147
+ options
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,15 @@
1
+ {
2
+ "data": {
3
+ "name": "AmazonS3",
4
+ "title": "Amazon S3",
5
+ "connector_type": "source",
6
+ "category": "Data Lake",
7
+ "documentation_url": "https://docs.mutliwoven.com",
8
+ "github_issue_label": "source-amazons3",
9
+ "icon": "icon.svg",
10
+ "license": "MIT",
11
+ "release_stage": "alpha",
12
+ "support_level": "community",
13
+ "tags": ["language:ruby", "multiwoven"]
14
+ }
15
+ }
@@ -0,0 +1,51 @@
1
+ {
2
+ "documentation_url": "https://docs.multiwoven.com/integrations/sources/amazons3",
3
+ "stream_type": "dynamic",
4
+ "connector_query_type": "raw_sql",
5
+ "connection_specification": {
6
+ "$schema": "http://json-schema.org/draft-07/schema#",
7
+ "title": "AmazonS3",
8
+ "type": "object",
9
+ "required": ["region", "bucket", "access_id", "secret_access", "file_type"],
10
+ "properties": {
11
+ "region": {
12
+ "description": "AWS region",
13
+ "examples": ["us-east-2"],
14
+ "type": "string",
15
+ "title": "Region",
16
+ "order": 1
17
+ },
18
+ "access_id": {
19
+ "type": "string",
20
+ "title": "Access Id",
21
+ "order": 2
22
+ },
23
+ "secret_access": {
24
+ "type": "string",
25
+ "title": "Secret Access",
26
+ "multiwoven_secret": true,
27
+ "order": 3
28
+ },
29
+ "bucket": {
30
+ "description": "Bucket Name",
31
+ "type": "string",
32
+ "title": "Bucket",
33
+ "order": 4
34
+ },
35
+ "path": {
36
+ "description": "Path to csv or parquet files",
37
+ "examples": ["/path/to/files"],
38
+ "type": "string",
39
+ "title": "Path",
40
+ "order": 5
41
+ },
42
+ "file_type": {
43
+ "description": "The type of file to read",
44
+ "type": "string",
45
+ "title": "File Type",
46
+ "enum": ["csv", "parquet"],
47
+ "order": 6
48
+ }
49
+ }
50
+ }
51
+ }
@@ -0,0 +1,34 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg" width="428" height="512" viewBox="0 0 428 512">
2
+ <defs>
3
+ <style>
4
+ .cls-1 {
5
+ fill: #e25444;
6
+ }
7
+
8
+ .cls-1, .cls-2, .cls-3 {
9
+ fill-rule: evenodd;
10
+ }
11
+
12
+ .cls-2 {
13
+ fill: #7b1d13;
14
+ }
15
+
16
+ .cls-3 {
17
+ fill: #58150d;
18
+ }
19
+ </style>
20
+ </defs>
21
+ <path class="cls-1" d="M378,99L295,257l83,158,34-19V118Z"/>
22
+ <path class="cls-2" d="M378,99L212,118,127.5,257,212,396l166,19V99Z"/>
23
+ <path class="cls-3" d="M43,99L16,111V403l27,12L212,257Z"/>
24
+ <path class="cls-1" d="M42.637,98.667l169.587,47.111V372.444L42.637,415.111V98.667Z"/>
25
+ <path class="cls-3" d="M212.313,170.667l-72.008-11.556,72.008-81.778,71.83,81.778Z"/>
26
+ <path class="cls-3" d="M284.143,159.111l-71.919,11.733-71.919-11.733V77.333"/>
27
+ <path class="cls-3" d="M212.313,342.222l-72.008,13.334,72.008,70.222,71.83-70.222Z"/>
28
+ <path class="cls-2" d="M212,16L140,54V159l72.224-20.333Z"/>
29
+ <path class="cls-2" d="M212.224,196.444l-71.919,7.823V309.105l71.919,8.228V196.444Z"/>
30
+ <path class="cls-2" d="M212.224,373.333L140.305,355.3V458.363L212.224,496V373.333Z"/>
31
+ <path class="cls-1" d="M284.143,355.3l-71.919,18.038V496l71.919-37.637V355.3Z"/>
32
+ <path class="cls-1" d="M212.224,196.444l71.919,7.823V309.105l-71.919,8.228V196.444Z"/>
33
+ <path class="cls-1" d="M212,16l72,38V159l-72-20V16Z"/>
34
+ </svg>
@@ -27,6 +27,8 @@ require "zip"
27
27
  require "zendesk_api"
28
28
  require "faraday"
29
29
  require "base64"
30
+ require "aws-sdk-s3"
31
+ require "duckdb"
30
32
  require "iterable-api-client"
31
33
 
32
34
  # Service
@@ -55,6 +57,7 @@ require_relative "integrations/source/databricks/client"
55
57
  require_relative "integrations/source/salesforce_consumer_goods_cloud/client"
56
58
  require_relative "integrations/source/aws_athena/client"
57
59
  require_relative "integrations/source/clickhouse/client"
60
+ require_relative "integrations/source/amazon_s3/client"
58
61
 
59
62
  # Destination
60
63
  require_relative "integrations/destination/klaviyo/client"
@@ -36,10 +36,12 @@ Gem::Specification.new do |spec|
36
36
  spec.add_runtime_dependency "activesupport"
37
37
  spec.add_runtime_dependency "async-websocket"
38
38
  spec.add_runtime_dependency "aws-sdk-athena"
39
+ spec.add_runtime_dependency "aws-sdk-s3"
39
40
  spec.add_runtime_dependency "csv"
40
41
  spec.add_runtime_dependency "dry-schema"
41
42
  spec.add_runtime_dependency "dry-struct"
42
43
  spec.add_runtime_dependency "dry-types"
44
+ spec.add_runtime_dependency "duckdb"
43
45
  spec.add_runtime_dependency "git"
44
46
  spec.add_runtime_dependency "google-apis-sheets_v4"
45
47
  spec.add_runtime_dependency "google-cloud-bigquery"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multiwoven-integrations
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.76
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Subin T P
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-06-13 00:00:00.000000000 Z
11
+ date: 2024-06-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: aws-sdk-s3
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: csv
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -108,6 +122,20 @@ dependencies:
108
122
  - - ">="
109
123
  - !ruby/object:Gem::Version
110
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: duckdb
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
111
139
  - !ruby/object:Gem::Dependency
112
140
  name: git
113
141
  requirement: !ruby/object:Gem::Requirement
@@ -519,6 +547,10 @@ files:
519
547
  - lib/multiwoven/integrations/protocol/protocol.rb
520
548
  - lib/multiwoven/integrations/rollout.rb
521
549
  - lib/multiwoven/integrations/service.rb
550
+ - lib/multiwoven/integrations/source/amazon_s3/client.rb
551
+ - lib/multiwoven/integrations/source/amazon_s3/config/meta.json
552
+ - lib/multiwoven/integrations/source/amazon_s3/config/spec.json
553
+ - lib/multiwoven/integrations/source/amazon_s3/icon.svg
522
554
  - lib/multiwoven/integrations/source/aws_athena/client.rb
523
555
  - lib/multiwoven/integrations/source/aws_athena/config/meta.json
524
556
  - lib/multiwoven/integrations/source/aws_athena/config/spec.json