multiwoven-integrations 0.3.1 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f9a7e28f8f3f1a559e5bb4feaec6a030201d447e1f1c7e90e8e8ced256274d5c
4
- data.tar.gz: a16b584bbc112998e54b9a51785498411e98cb8156116f66e082afd55c1c4916
3
+ metadata.gz: e8263bcb86ca81c8262642cd939c8079e671e7cb03a29a905c24f564888038c5
4
+ data.tar.gz: cf5ba4ee0ad1f25967592e72369b3e0452d09715a036b70685d48ee4f29568de
5
5
  SHA512:
6
- metadata.gz: f2e3b44f30db035ac8b87f1cc2ffbecc34aef53630ff7c5757193affaebc0749c827368c72cdb04444a00c0abc800c6c2b0cef97aa2484b532662292e8b0ccd0
7
- data.tar.gz: 0f2650b28d03b4118dcb86d3533cadf6fec2bcb54c0b54f349e79aa7f276b50c9a574e6b3ef87c2b19dda810b18cefcf00dfb470706753e49f5e35c0a5d858a4
6
+ metadata.gz: a0c976f1fd891d8474dbc113bce838c0a566e0127e49829c61c37042b479b3303185193beb66a204a752a87f8a7ec82b512d51fba5437e5239e1fc8a889ddcb9
7
+ data.tar.gz: 0ecd0c38c46db77c5e6c9ea6d8ef94cfd3325f7cb9e5f1a2d86ae6d22c307305595cac210d5d6710843806d8c8891c1b04bc7cf663efcb7e5b42cd3bbe45e39f
data/README.md CHANGED
@@ -43,6 +43,23 @@ Multiwoven integrations is the collection of connectors built on top of [Multiwo
43
43
  Multiwoven protocol is an open source standard for moving data between data sources to any third-part destinations.
44
44
  Anyone can build a connetor with basic ruby knowledge using the protocol.
45
45
 
46
+ ## Prerequisites
47
+
48
+ Before you begin the installation, ensure you have the following dependencies installed:
49
+
50
+ - **MySQL Client**
51
+ - Command: `brew install mysql-client`
52
+ - Description: Required for database interactions.
53
+
54
+ - **Zstandard (zstd)**
55
+ - Command: `brew install zstd`
56
+ - Description: Needed for data compression and decompression.
57
+
58
+ - **OpenSSL 3**
59
+ - Command: `brew install openssl@3`
60
+ - Description: Essential for secure communication.
61
+
62
+
46
63
  ### Installation
47
64
 
48
65
  Install the gem and add to the application's Gemfile by executing:
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Multiwoven
4
4
  module Integrations
5
- VERSION = "0.3.1"
5
+ VERSION = "0.3.3"
6
6
 
7
7
  ENABLED_SOURCES = %w[
8
8
  Snowflake
@@ -4,12 +4,13 @@ module Multiwoven::Integrations::Source
4
4
  module AmazonS3
5
5
  include Multiwoven::Integrations::Core
6
6
  class Client < SourceConnector
7
- DISCOVER_QUERY = "SELECT * FROM S3Object LIMIT 1;"
8
-
7
+ @session_name = ""
9
8
  def check_connection(connection_config)
10
9
  connection_config = connection_config.with_indifferent_access
11
- client = config_aws(connection_config)
12
- client.get_bucket_policy_status({ bucket: connection_config[:bucket] })
10
+ @session_name = "connection-#{connection_config[:region]}-#{connection_config[:bucket]}"
11
+ conn = create_connection(connection_config)
12
+ path = build_path(connection_config)
13
+ get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
13
14
  ConnectionStatus.new(status: ConnectionStatusType["succeeded"]).to_multiwoven_message
14
15
  rescue StandardError => e
15
16
  ConnectionStatus.new(status: ConnectionStatusType["failed"], message: e.message).to_multiwoven_message
@@ -17,13 +18,13 @@ module Multiwoven::Integrations::Source
17
18
 
18
19
  def discover(connection_config)
19
20
  connection_config = connection_config.with_indifferent_access
21
+ @session_name = "discover-#{connection_config[:region]}-#{connection_config[:bucket]}"
20
22
  conn = create_connection(connection_config)
21
23
  # If pulling from multiple files, all files must have the same schema
22
- path = build_path(connection_config[:path])
23
- full_path = "s3://#{connection_config[:bucket]}/#{path}*.#{connection_config[:file_type]}"
24
- records = get_results(conn, "DESCRIBE SELECT * FROM '#{full_path}';")
24
+ path = build_path(connection_config)
25
+ records = get_results(conn, "DESCRIBE SELECT * FROM '#{path}';")
25
26
  columns = build_discover_columns(records)
26
- streams = [Multiwoven::Integrations::Protocol::Stream.new(name: full_path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
27
+ streams = [Multiwoven::Integrations::Protocol::Stream.new(name: path, action: StreamAction["fetch"], json_schema: convert_to_json_schema(columns))]
27
28
  catalog = Catalog.new(streams: streams)
28
29
  catalog.to_multiwoven_message
29
30
  rescue StandardError => e
@@ -32,6 +33,7 @@ module Multiwoven::Integrations::Source
32
33
 
33
34
  def read(sync_config)
34
35
  connection_config = sync_config.source.connection_specification.with_indifferent_access
36
+ @session_name = "#{sync_config.sync_id}-#{sync_config.source.name}-#{sync_config.destination.name}"
35
37
  conn = create_connection(connection_config)
36
38
  query = sync_config.model.query
37
39
  query = batched_query(query, sync_config.limit, sync_config.offset) unless sync_config.limit.nil? && sync_config.offset.nil?
@@ -47,25 +49,48 @@ module Multiwoven::Integrations::Source
47
49
 
48
50
  private
49
51
 
50
- # DuckDB
52
+ def get_auth_data(connection_config)
53
+ session = @session_name
54
+ @session_name = ""
55
+ if connection_config[:auth_type] == "user"
56
+ Aws::Credentials.new(connection_config[:access_id], connection_config[:secret_access])
57
+ elsif connection_config[:auth_type] == "role"
58
+ sts_client = Aws::STS::Client.new(region: connection_config[:region])
59
+ resp = sts_client.assume_role({
60
+ role_arn: connection_config[:arn],
61
+ role_session_name: session
62
+ })
63
+ Aws::Credentials.new(
64
+ resp.credentials.access_key_id,
65
+ resp.credentials.secret_access_key,
66
+ resp.credentials.session_token
67
+ )
68
+ end
69
+ end
70
+
51
71
  def create_connection(connection_config)
72
+ # In the case when previewing a query
73
+ @session_name = "preview-#{connection_config[:region]}-#{connection_config[:bucket]}" if @session_name.to_s.empty?
74
+ auth_data = get_auth_data(connection_config)
52
75
  conn = DuckDB::Database.open.connect
53
76
  # Set up S3 configuration
54
77
  secret_query = "
55
78
  CREATE SECRET amazons3_source (
56
79
  TYPE S3,
57
- KEY_ID '#{connection_config[:access_id]}',
58
- SECRET '#{connection_config[:secret_access]}',
59
- REGION '#{connection_config[:region]}'
80
+ KEY_ID '#{auth_data.credentials.access_key_id}',
81
+ SECRET '#{auth_data.credentials.secret_access_key}',
82
+ REGION '#{connection_config[:region]}',
83
+ SESSION_TOKEN '#{auth_data.credentials.session_token}'
60
84
  );
61
85
  "
62
86
  get_results(conn, secret_query)
63
87
  conn
64
88
  end
65
89
 
66
- def build_path(path)
67
- path = "#{path}/" if !path.to_s.strip.empty? && path[-1] != "/"
68
- path
90
+ def build_path(connection_config)
91
+ path = connection_config[:path]
92
+ path = "#{path}/" if path.to_s.strip.empty? || path[-1] != "/"
93
+ "s3://#{connection_config[:bucket]}#{path}*.#{connection_config[:file_type]}"
69
94
  end
70
95
 
71
96
  def get_results(conn, query)
@@ -109,43 +134,6 @@ module Multiwoven::Integrations::Source
109
134
  "boolean"
110
135
  end
111
136
  end
112
-
113
- # AWS SDK
114
- def config_aws(config)
115
- config = config.with_indifferent_access
116
- Aws.config.update({
117
- region: config[:region],
118
- credentials: Aws::Credentials.new(config[:access_id], config[:secret_access])
119
- })
120
- config.with_indifferent_access
121
- Aws::S3::Client.new
122
- end
123
-
124
- def build_select_content_options(config, query)
125
- config = config.with_indifferent_access
126
- bucket_name = config[:bucket]
127
- file_key = config[:file_key]
128
- file_type = config[:file_type]
129
- options = {
130
- bucket: bucket_name,
131
- key: file_key,
132
- expression_type: "SQL",
133
- expression: query,
134
- output_serialization: {
135
- json: {}
136
- }
137
- }
138
- if file_type == "parquet"
139
- options[:input_serialization] = {
140
- parquet: {}
141
- }
142
- elsif file_type == "csv"
143
- options[:input_serialization] = {
144
- csv: { file_header_info: "USE" }
145
- }
146
- end
147
- options
148
- end
149
137
  end
150
138
  end
151
139
  end
@@ -6,13 +6,51 @@
6
6
  "$schema": "http://json-schema.org/draft-07/schema#",
7
7
  "title": "AmazonS3",
8
8
  "type": "object",
9
- "required": ["region", "bucket", "access_id", "secret_access", "file_type"],
9
+ "if": {
10
+ "properties": {
11
+ "auth_type": {
12
+ "enum": ["user"]
13
+ }
14
+ }
15
+ },
16
+ "then": {
17
+ "required": [
18
+ "auth_type",
19
+ "region",
20
+ "bucket",
21
+ "access_id",
22
+ "secret_access",
23
+ "file_type"
24
+ ]
25
+ },
26
+ "else": {
27
+ "required": [
28
+ "auth_type",
29
+ "region",
30
+ "bucket",
31
+ "arn",
32
+ "file_type"
33
+ ]
34
+ },
10
35
  "properties": {
11
- "region": {
12
- "description": "AWS region",
13
- "examples": ["us-east-2"],
36
+ "auth_type": {
37
+ "title": "Authentication type",
14
38
  "type": "string",
15
- "title": "Region",
39
+ "default": "user",
40
+ "description": "Authenticate either by using an IAM User (Access Key ID & Secret Access Key) or an IAM Role (ARN)",
41
+ "enum": [
42
+ "user",
43
+ "role"
44
+ ],
45
+ "enumNames": [
46
+ "IAM User",
47
+ "IAM Role"
48
+ ],
49
+ "order": 0
50
+ },
51
+ "arn": {
52
+ "type": "string",
53
+ "title": "IAM Role ARN",
16
54
  "order": 1
17
55
  },
18
56
  "access_id": {
@@ -26,26 +64,40 @@
26
64
  "multiwoven_secret": true,
27
65
  "order": 3
28
66
  },
67
+ "region": {
68
+ "description": "AWS region",
69
+ "examples": [
70
+ "us-east-2"
71
+ ],
72
+ "type": "string",
73
+ "title": "Region",
74
+ "order": 4
75
+ },
29
76
  "bucket": {
30
77
  "description": "Bucket Name",
31
78
  "type": "string",
32
79
  "title": "Bucket",
33
- "order": 4
80
+ "order": 5
34
81
  },
35
82
  "path": {
36
83
  "description": "Path to csv or parquet files",
37
- "examples": ["/path/to/files"],
84
+ "examples": [
85
+ "/path/to/files"
86
+ ],
38
87
  "type": "string",
39
88
  "title": "Path",
40
- "order": 5
89
+ "order": 6
41
90
  },
42
91
  "file_type": {
43
92
  "description": "The type of file to read",
44
93
  "type": "string",
45
94
  "title": "File Type",
46
- "enum": ["csv", "parquet"],
47
- "order": 6
95
+ "enum": [
96
+ "csv",
97
+ "parquet"
98
+ ],
99
+ "order": 7
48
100
  }
49
101
  }
50
102
  }
51
- }
103
+ }
@@ -30,6 +30,7 @@ require "base64"
30
30
  require "aws-sdk-s3"
31
31
  require "duckdb"
32
32
  require "iterable-api-client"
33
+ require "aws-sdk-sts"
33
34
 
34
35
  # Service
35
36
  require_relative "integrations/config"
@@ -37,6 +37,7 @@ Gem::Specification.new do |spec|
37
37
  spec.add_runtime_dependency "async-websocket"
38
38
  spec.add_runtime_dependency "aws-sdk-athena"
39
39
  spec.add_runtime_dependency "aws-sdk-s3"
40
+ spec.add_runtime_dependency "aws-sdk-sts"
40
41
  spec.add_runtime_dependency "csv"
41
42
  spec.add_runtime_dependency "dry-schema"
42
43
  spec.add_runtime_dependency "dry-struct"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multiwoven-integrations
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Subin T P
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-06-26 00:00:00.000000000 Z
11
+ date: 2024-06-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: aws-sdk-sts
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: csv
71
85
  requirement: !ruby/object:Gem::Requirement