multiwoven-integrations 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f9a7e28f8f3f1a559e5bb4feaec6a030201d447e1f1c7e90e8e8ced256274d5c
4
- data.tar.gz: a16b584bbc112998e54b9a51785498411e98cb8156116f66e082afd55c1c4916
3
+ metadata.gz: e8263bcb86ca81c8262642cd939c8079e671e7cb03a29a905c24f564888038c5
4
+ data.tar.gz: cf5ba4ee0ad1f25967592e72369b3e0452d09715a036b70685d48ee4f29568de
5
5
  SHA512:
6
- metadata.gz: f2e3b44f30db035ac8b87f1cc2ffbecc34aef53630ff7c5757193affaebc0749c827368c72cdb04444a00c0abc800c6c2b0cef97aa2484b532662292e8b0ccd0
7
- data.tar.gz: 0f2650b28d03b4118dcb86d3533cadf6fec2bcb54c0b54f349e79aa7f276b50c9a574e6b3ef87c2b19dda810b18cefcf00dfb470706753e49f5e35c0a5d858a4
6
+ metadata.gz: a0c976f1fd891d8474dbc113bce838c0a566e0127e49829c61c37042b479b3303185193beb66a204a752a87f8a7ec82b512d51fba5437e5239e1fc8a889ddcb9
7
+ data.tar.gz: 0ecd0c38c46db77c5e6c9ea6d8ef94cfd3325f7cb9e5f1a2d86ae6d22c307305595cac210d5d6710843806d8c8891c1b04bc7cf663efcb7e5b42cd3bbe45e39f
data/README.md CHANGED
@@ -43,6 +43,23 @@ Multiwoven integrations is the collection of connectors built on top of [Multiwo
43
43
  Multiwoven protocol is an open source standard for moving data between data sources to any third-part destinations.
44
44
  Anyone can build a connetor with basic ruby knowledge using the protocol.
45
45
 
46
+ ## Prerequisites
47
+
48
+ Before you begin the installation, ensure you have the following dependencies installed:
49
+
50
+ - **MySQL Client**
51
+ - Command: `brew install mysql-client`
52
+ - Description: Required for database interactions.
53
+
54
+ - **Zstandard (zstd)**
55
+ - Command: `brew install zstd`
56
+ - Description: Needed for data compression and decompression.
57
+
58
+ - **OpenSSL 3**
59
+ - Command: `brew install openssl@3`
60
+ - Description: Essential for secure communication.
61
+
62
+
46
63
  ### Installation
47
64
 
48
65
  Install the gem and add to the application's Gemfile by executing:
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Multiwoven
4
4
  module Integrations
5
- VERSION = "0.3.1"
5
+ VERSION = "0.3.3"
6
6
 
7
7
  ENABLED_SOURCES = %w[
8
8
  Snowflake
@@ -4,12 +4,13 @@ module Multiwoven::Integrations::Source
4
4
  module AmazonS3
5
5
  include Multiwoven::Integrations::Core
6
6
  class Client < SourceConnector
7
- DISCOVER_QUERY = "SELECT * FROM S3Object LIMIT 1;"
8
-
7
+ @session_name = ""
9
8
  def check_connection(connection_config)
10
9
  connection_config = connection_config.with_indifferent_access
11
- client = config_aws(connection_config)
12
- client.get_bucket_policy_status({ bucket: connection_config[:bucket] })
10
+ @session_name = "connection-#{connection_config[:region]}-#{connection_config[:bucket]}"
11
+ conn = create_connection(connection_config)
12
+ path = build_path(connection_config)
13
+ get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
13
14
  ConnectionStatus.new(status: ConnectionStatusType["succeeded"]).to_multiwoven_message
14
15
  rescue StandardError => e
15
16
  ConnectionStatus.new(status: ConnectionStatusType["failed"], message: e.message).to_multiwoven_message
@@ -17,13 +18,13 @@ module Multiwoven::Integrations::Source
17
18
 
18
19
  def discover(connection_config)
19
20
  connection_config = connection_config.with_indifferent_access
21
+ @session_name = "discover-#{connection_config[:region]}-#{connection_config[:bucket]}"
20
22
  conn = create_connection(connection_config)
21
23
  # If pulling from multiple files, all files must have the same schema
22
- path = build_path(connection_config[:path])
23
- full_path = "s3://#{connection_config[:bucket]}/#{path}*.#{connection_config[:file_type]}"
24
- records = get_results(conn, "DESCRIBE SELECT * FROM '#{full_path}';")
24
+ path = build_path(connection_config)
25
+ records = get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
25
26
  columns = build_discover_columns(records)
26
- streams = [Multiwoven::Integrations::Protocol::Stream.new(name: full_path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
27
+ streams = [Multiwoven::Integrations::Protocol::Stream.new(name: path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
27
28
  catalog = Catalog.new(streams: streams)
28
29
  catalog.to_multiwoven_message
29
30
  rescue StandardError => e
@@ -32,6 +33,7 @@ module Multiwoven::Integrations::Source
32
33
 
33
34
  def read(sync_config)
34
35
  connection_config = sync_config.source.connection_specification.with_indifferent_access
36
+ @session_name = "#{sync_config.sync_id}-#{sync_config.source.name}-#{sync_config.destination.name}"
35
37
  conn = create_connection(connection_config)
36
38
  query = sync_config.model.query
37
39
  query = batched_query(query, sync_config.limit, sync_config.offset) unless sync_config.limit.nil? && sync_config.offset.nil?
@@ -47,25 +49,48 @@ module Multiwoven::Integrations::Source
47
49
 
48
50
  private
49
51
 
50
- # DuckDB
52
+ def get_auth_data(connection_config)
53
+ session = @session_name
54
+ @session_name = ""
55
+ if connection_config[:auth_type] == "user"
56
+ Aws::Credentials.new(connection_config[:access_id], connection_config[:secret_access])
57
+ elsif connection_config[:auth_type] == "role"
58
+ sts_client = Aws::STS::Client.new(region: connection_config[:region])
59
+ resp = sts_client.assume_role({
60
+ role_arn: connection_config[:arn],
61
+ role_session_name: session
62
+ })
63
+ Aws::Credentials.new(
64
+ resp.credentials.access_key_id,
65
+ resp.credentials.secret_access_key,
66
+ resp.credentials.session_token
67
+ )
68
+ end
69
+ end
70
+
51
71
  def create_connection(connection_config)
72
+ # In the case when previewing a query
73
+ @session_name = "preview-#{connection_config[:region]}-#{connection_config[:bucket]}" if @session_name.to_s.empty?
74
+ auth_data = get_auth_data(connection_config)
52
75
  conn = DuckDB::Database.open.connect
53
76
  # Set up S3 configuration
54
77
  secret_query = "
55
78
  CREATE SECRET amazons3_source (
56
79
  TYPE S3,
57
- KEY_ID '#{connection_config[:access_id]}',
58
- SECRET '#{connection_config[:secret_access]}',
59
- REGION '#{connection_config[:region]}'
80
+ KEY_ID '#{auth_data.credentials.access_key_id}',
81
+ SECRET '#{auth_data.credentials.secret_access_key}',
82
+ REGION '#{connection_config[:region]}',
83
+ SESSION_TOKEN '#{auth_data.credentials.session_token}'
60
84
  );
61
85
  "
62
86
  get_results(conn, secret_query)
63
87
  conn
64
88
  end
65
89
 
66
- def build_path(path)
67
- path = "#{path}/" if !path.to_s.strip.empty? && path[-1] != "/"
68
- path
90
+ def build_path(connection_config)
91
+ path = connection_config[:path]
92
+ path = "#{path}/" if path.to_s.strip.empty? || path[-1] != "/"
93
+ "s3://#{connection_config[:bucket]}#{path}*.#{connection_config[:file_type]}"
69
94
  end
70
95
 
71
96
  def get_results(conn, query)
@@ -109,43 +134,6 @@ module Multiwoven::Integrations::Source
109
134
  "boolean"
110
135
  end
111
136
  end
112
-
113
- # AWS SDK
114
- def config_aws(config)
115
- config = config.with_indifferent_access
116
- Aws.config.update({
117
- region: config[:region],
118
- credentials: Aws::Credentials.new(config[:access_id], config[:secret_access])
119
- })
120
- config.with_indifferent_access
121
- Aws::S3::Client.new
122
- end
123
-
124
- def build_select_content_options(config, query)
125
- config = config.with_indifferent_access
126
- bucket_name = config[:bucket]
127
- file_key = config[:file_key]
128
- file_type = config[:file_type]
129
- options = {
130
- bucket: bucket_name,
131
- key: file_key,
132
- expression_type: "SQL",
133
- expression: query,
134
- output_serialization: {
135
- json: {}
136
- }
137
- }
138
- if file_type == "parquet"
139
- options[:input_serialization] = {
140
- parquet: {}
141
- }
142
- elsif file_type == "csv"
143
- options[:input_serialization] = {
144
- csv: { file_header_info: "USE" }
145
- }
146
- end
147
- options
148
- end
149
137
  end
150
138
  end
151
139
  end
@@ -6,13 +6,51 @@
6
6
  "$schema": "http://json-schema.org/draft-07/schema#",
7
7
  "title": "AmazonS3",
8
8
  "type": "object",
9
- "required": ["region", "bucket", "access_id", "secret_access", "file_type"],
9
+ "if": {
10
+ "properties": {
11
+ "auth_type": {
12
+ "enum": ["user"]
13
+ }
14
+ }
15
+ },
16
+ "then": {
17
+ "required": [
18
+ "auth_type",
19
+ "region",
20
+ "bucket",
21
+ "access_id",
22
+ "secret_access",
23
+ "file_type"
24
+ ]
25
+ },
26
+ "else": {
27
+ "required": [
28
+ "auth_type",
29
+ "region",
30
+ "bucket",
31
+ "arn",
32
+ "file_type"
33
+ ]
34
+ },
10
35
  "properties": {
11
- "region": {
12
- "description": "AWS region",
13
- "examples": ["us-east-2"],
36
+ "auth_type": {
37
+ "title": "Authentication type",
14
38
  "type": "string",
15
- "title": "Region",
39
+ "default": "user",
40
+ "description": "Authenticate either by using an IAM User (Access Key ID & Secret Access Key) or an IAM Role (ARN)",
41
+ "enum": [
42
+ "user",
43
+ "role"
44
+ ],
45
+ "enumNames": [
46
+ "IAM User",
47
+ "IAM Role"
48
+ ],
49
+ "order": 0
50
+ },
51
+ "arn": {
52
+ "type": "string",
53
+ "title": "IAM Role ARN",
16
54
  "order": 1
17
55
  },
18
56
  "access_id": {
@@ -26,26 +64,40 @@
26
64
  "multiwoven_secret": true,
27
65
  "order": 3
28
66
  },
67
+ "region": {
68
+ "description": "AWS region",
69
+ "examples": [
70
+ "us-east-2"
71
+ ],
72
+ "type": "string",
73
+ "title": "Region",
74
+ "order": 4
75
+ },
29
76
  "bucket": {
30
77
  "description": "Bucket Name",
31
78
  "type": "string",
32
79
  "title": "Bucket",
33
- "order": 4
80
+ "order": 5
34
81
  },
35
82
  "path": {
36
83
  "description": "Path to csv or parquet files",
37
- "examples": ["/path/to/files"],
84
+ "examples": [
85
+ "/path/to/files"
86
+ ],
38
87
  "type": "string",
39
88
  "title": "Path",
40
- "order": 5
89
+ "order": 6
41
90
  },
42
91
  "file_type": {
43
92
  "description": "The type of file to read",
44
93
  "type": "string",
45
94
  "title": "File Type",
46
- "enum": ["csv", "parquet"],
47
- "order": 6
95
+ "enum": [
96
+ "csv",
97
+ "parquet"
98
+ ],
99
+ "order": 7
48
100
  }
49
101
  }
50
102
  }
51
- }
103
+ }
@@ -30,6 +30,7 @@ require "base64"
30
30
  require "aws-sdk-s3"
31
31
  require "duckdb"
32
32
  require "iterable-api-client"
33
+ require "aws-sdk-sts"
33
34
 
34
35
  # Service
35
36
  require_relative "integrations/config"
@@ -37,6 +37,7 @@ Gem::Specification.new do |spec|
37
37
  spec.add_runtime_dependency "async-websocket"
38
38
  spec.add_runtime_dependency "aws-sdk-athena"
39
39
  spec.add_runtime_dependency "aws-sdk-s3"
40
+ spec.add_runtime_dependency "aws-sdk-sts"
40
41
  spec.add_runtime_dependency "csv"
41
42
  spec.add_runtime_dependency "dry-schema"
42
43
  spec.add_runtime_dependency "dry-struct"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multiwoven-integrations
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Subin T P
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-06-26 00:00:00.000000000 Z
11
+ date: 2024-06-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: aws-sdk-sts
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: csv
71
85
  requirement: !ruby/object:Gem::Requirement