remi 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/Gemfile.lock +11 -11
- data/features/s3_file_target_job.feature +10 -0
- data/features/step_definitions/remi_step.rb +26 -0
- data/jobs/s3_file_target_job.rb +23 -0
- data/lib/remi/data_subject.rb +5 -1
- data/lib/remi/data_subjects/none.rb +3 -3
- data/lib/remi/data_subjects/s3_file.rb +199 -12
- data/lib/remi/data_subjects/salesforce.rb +2 -2
- data/lib/remi/data_subjects/salesforce_soap.rb +3 -1
- data/lib/remi/data_subjects/sftp_file.rb +77 -62
- data/lib/remi/job/transform.rb +14 -0
- data/lib/remi/source_to_target_map/map.rb +4 -1
- data/lib/remi/transform.rb +3 -0
- data/lib/remi/version.rb +1 -1
- data/spec/data_subjects/none_spec.rb +5 -1
- data/spec/data_subjects/s3_file_spec.rb +12 -3
- data/spec/data_subjects/salesforce_soap_spec.rb +20 -3
- data/spec/data_subjects/sftp_file_spec.rb +37 -22
- data/spec/job/transform_spec.rb +84 -0
- data/spec/source_to_target_map_spec.rb +30 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 051d6add4664343ee59a6c722a2abadc15ea4377
|
4
|
+
data.tar.gz: f7f438b794a08948617b767dfca83e58533300ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d0e46a405da1e48dc0b82afe9c350b26f0b436fe99aa1cdf9500d035ec5a01612257c679338063920d370ead1e09c68ec35f7de9d732338417874a396ed8634c
|
7
|
+
data.tar.gz: d39aaad0be382a3f70359c03eeaa1b13ef9a594967cd029014da40ca2882cffe53a81a534cdf4c8221399fb10636e5c1304ac591e6a0ef589ecd1d0fab1b07f2
|
data/Gemfile
CHANGED
@@ -7,4 +7,4 @@ gem 'daru', '0.1.4.1', git: 'git@github.com:inside-track/daru.git', branch: '0.1
|
|
7
7
|
gem 'restforce', '~> 2.1'
|
8
8
|
gem 'salesforce_bulk_api', git: 'git@github.com:inside-track/salesforce_bulk_api.git', branch: 'master'
|
9
9
|
gem 'soapforce', '~> 0.5'
|
10
|
-
gem 'aws-sdk', '~> 2.
|
10
|
+
gem 'aws-sdk', '~> 2.10'
|
data/Gemfile.lock
CHANGED
@@ -18,7 +18,7 @@ GIT
|
|
18
18
|
PATH
|
19
19
|
remote: .
|
20
20
|
specs:
|
21
|
-
remi (0.3.
|
21
|
+
remi (0.3.3)
|
22
22
|
activesupport (~> 4.2)
|
23
23
|
bond (~> 0.5)
|
24
24
|
cucumber (~> 2.1)
|
@@ -43,12 +43,14 @@ GEM
|
|
43
43
|
akami (1.3.1)
|
44
44
|
gyoku (>= 0.4.0)
|
45
45
|
nokogiri
|
46
|
-
aws-sdk (2.3
|
47
|
-
aws-sdk-resources (= 2.3
|
48
|
-
aws-sdk-core (2.3
|
46
|
+
aws-sdk (2.10.3)
|
47
|
+
aws-sdk-resources (= 2.10.3)
|
48
|
+
aws-sdk-core (2.10.3)
|
49
|
+
aws-sigv4 (~> 1.0)
|
49
50
|
jmespath (~> 1.0)
|
50
|
-
aws-sdk-resources (2.3
|
51
|
-
aws-sdk-core (= 2.3
|
51
|
+
aws-sdk-resources (2.10.3)
|
52
|
+
aws-sdk-core (= 2.10.3)
|
53
|
+
aws-sigv4 (1.0.0)
|
52
54
|
backports (3.6.8)
|
53
55
|
bond (0.5.1)
|
54
56
|
builder (3.2.2)
|
@@ -104,10 +106,8 @@ GEM
|
|
104
106
|
mimemagic (~> 0.3)
|
105
107
|
multi_json (~> 1.11)
|
106
108
|
rbczmq (~> 1.7)
|
107
|
-
jmespath (1.
|
108
|
-
json_pure (>= 1.8.1)
|
109
|
+
jmespath (1.3.1)
|
109
110
|
json (1.8.3)
|
110
|
-
json_pure (1.8.3)
|
111
111
|
jwt (1.5.6)
|
112
112
|
little-plugger (1.1.4)
|
113
113
|
logging (2.1.0)
|
@@ -187,7 +187,7 @@ PLATFORMS
|
|
187
187
|
ruby
|
188
188
|
|
189
189
|
DEPENDENCIES
|
190
|
-
aws-sdk (~> 2.
|
190
|
+
aws-sdk (~> 2.10)
|
191
191
|
daru (= 0.1.4.1)!
|
192
192
|
github-markup (~> 1.4)
|
193
193
|
google-api-client (~> 0.9)
|
@@ -200,4 +200,4 @@ DEPENDENCIES
|
|
200
200
|
yard (~> 0.9)
|
201
201
|
|
202
202
|
BUNDLED WITH
|
203
|
-
1.
|
203
|
+
1.15.1
|
@@ -0,0 +1,10 @@
|
|
1
|
+
Feature: Tests targets that are S3 Files.
|
2
|
+
|
3
|
+
Background:
|
4
|
+
Given the job is 'S3 File Target'
|
5
|
+
And the job target 'Some File'
|
6
|
+
|
7
|
+
Scenario: Defining the remote path.
|
8
|
+
Given the target 'Some File'
|
9
|
+
Then the file is uploaded to the S3 bucket "the-big-one"
|
10
|
+
And the file is uploaded to the remote path "some_file_*Today: %Y%m%d*.csv"
|
@@ -69,6 +69,14 @@ Then /^the file is uploaded to the remote path "([^"]+)"$/ do |remote_path|
|
|
69
69
|
expect(@brt.target.data_subject.loaders.map(&:remote_path)).to include expected_path
|
70
70
|
end
|
71
71
|
|
72
|
+
Then /^the file is uploaded to the S3 bucket "([^"]+)"$/ do |bucket_name|
|
73
|
+
expected_bucket_name = Remi::Testing::BusinessRules::ParseFormula.parse(bucket_name)
|
74
|
+
bucket_names = @brt.target.data_subject.loaders.map do |loader|
|
75
|
+
loader.bucket_name if loader.respond_to? :bucket_name
|
76
|
+
end
|
77
|
+
expect(bucket_names).to include expected_bucket_name
|
78
|
+
end
|
79
|
+
|
72
80
|
## CSV Options
|
73
81
|
|
74
82
|
Given /^the (source|target) file is delimited with a (\w+)$/ do |st, delimiter|
|
@@ -124,6 +132,16 @@ Given /^the (source|target) file contains all of the following headers in this o
|
|
124
132
|
expect(@brt.send(st.to_sym).data_subject.df.vectors.to_a).to eq @brt.send(st.to_sym).fields.field_names
|
125
133
|
end
|
126
134
|
|
135
|
+
Given /^the (source|target) file contains all of the following headers in no particular order:$/ do |st, table|
|
136
|
+
table.rows.each do |row|
|
137
|
+
field = row.first
|
138
|
+
step "the #{st} field '#{field}'"
|
139
|
+
end
|
140
|
+
|
141
|
+
@brt.run_transforms if st == 'target'
|
142
|
+
expect(@brt.send(st.to_sym).data_subject.df.vectors.to_a).to match_array @brt.send(st.to_sym).fields.field_names
|
143
|
+
end
|
144
|
+
|
127
145
|
### Source
|
128
146
|
|
129
147
|
Given /^the source '([[:alnum:]\s\-_]+)'$/ do |arg|
|
@@ -260,6 +278,7 @@ Then /^the target field '([^']+)' has the label '([^']+)'$/ do |target_field, la
|
|
260
278
|
data_field = @brt.targets.fields.next
|
261
279
|
expect(data_field.metadata[:label]).to eq label
|
262
280
|
expect(data_field.name).to eq target_field
|
281
|
+
|
263
282
|
end
|
264
283
|
|
265
284
|
Then /^the target field '([^']+)' is copied from the source field$/ do |target_field|
|
@@ -780,3 +799,10 @@ Then /^the target '([[:alnum:]\s\-_]+)' has (\d+) record(?:s|) where '([[:alnum:
|
|
780
799
|
@brt.run_transforms
|
781
800
|
expect(@brt.targets[target_name].where_between(field_name, low_value, high_value).size).to eq nrecords.to_i
|
782
801
|
end
|
802
|
+
|
803
|
+
Then /^the target field '([^']+)' (?:has|is set to) the multiline value$/ do |target_field, value|
|
804
|
+
step "the target field '#{target_field}'"
|
805
|
+
@brt.run_transforms
|
806
|
+
target_name, target_field_name = @brt.targets.parse_full_field(target_field)
|
807
|
+
expect(@brt.targets[target_name].fields[target_field_name].value).to eq Remi::Testing::BusinessRules::ParseFormula.parse(value)
|
808
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require_relative 'all_jobs_shared'
|
2
|
+
require 'aws-sdk'
|
3
|
+
|
4
|
+
class S3FileTargetJob < Remi::Job
|
5
|
+
target :some_file do
|
6
|
+
encoder Remi::Encoder::CsvFile.new
|
7
|
+
loader Remi::Loader::S3File.new(
|
8
|
+
credentials: {
|
9
|
+
aws_access_key_id: 'blort',
|
10
|
+
aws_secret_access_key: 'blerg',
|
11
|
+
region: 'us-west-2'
|
12
|
+
},
|
13
|
+
kms_opt: {
|
14
|
+
ciphertext: 'blergity'
|
15
|
+
},
|
16
|
+
bucket: 'the-big-one',
|
17
|
+
remote_path: "some_file_#{DateTime.current.strftime('%Y%m%d')}.csv"
|
18
|
+
)
|
19
|
+
end
|
20
|
+
|
21
|
+
transform :main do
|
22
|
+
end
|
23
|
+
end
|
data/lib/remi/data_subject.rb
CHANGED
@@ -93,7 +93,11 @@ module Remi
|
|
93
93
|
sttm = SourceToTargetMap.new(df, source_metadata: fields)
|
94
94
|
fields.keys.each do |field|
|
95
95
|
next unless (types.size == 0 || types.include?(fields[field][:type])) && df.vectors.include?(field)
|
96
|
-
|
96
|
+
begin
|
97
|
+
sttm.source(field).target(field).transform(Remi::Transform::EnforceType.new).execute
|
98
|
+
rescue StandardError => err
|
99
|
+
raise ArgumentError, "Field '#{field}': #{err.message}"
|
100
|
+
end
|
97
101
|
end
|
98
102
|
|
99
103
|
self
|
@@ -8,10 +8,10 @@ module Remi
|
|
8
8
|
end
|
9
9
|
|
10
10
|
|
11
|
-
# The None Parser just returns
|
11
|
+
# The None Parser just returns an empty dataframe if it's not given any data
|
12
12
|
class Parser::None < Parser
|
13
|
-
def parse(data)
|
14
|
-
data
|
13
|
+
def parse(data=nil)
|
14
|
+
data || Remi::DataFrame::Daru.new([], order: fields.keys)
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
@@ -1,13 +1,59 @@
|
|
1
1
|
module Remi
|
2
2
|
|
3
|
+
module DataSubject::S3File
|
4
|
+
attr_accessor :region
|
5
|
+
attr_accessor :aws_credentials
|
6
|
+
|
7
|
+
def init_aws_credentials(credentials)
|
8
|
+
@aws_credentials = Aws::Credentials.new(
|
9
|
+
credentials.fetch(:aws_access_key_id, ENV['AWS_ACCESS_KEY_ID']),
|
10
|
+
credentials.fetch(:aws_secret_access_key, ENV['AWS_SECRET_ACCESS_KEY'])
|
11
|
+
)
|
12
|
+
end
|
13
|
+
|
14
|
+
def s3
|
15
|
+
@s3 ||= Aws::S3::Resource.new(
|
16
|
+
credentials: aws_credentials,
|
17
|
+
region: region
|
18
|
+
)
|
19
|
+
end
|
20
|
+
|
21
|
+
def encrypt_args
|
22
|
+
@kms_args || {}
|
23
|
+
end
|
24
|
+
|
25
|
+
def init_kms(opt)
|
26
|
+
return nil unless opt
|
27
|
+
|
28
|
+
kms = Aws::KMS::Client.new(
|
29
|
+
region: @region,
|
30
|
+
credentials: @aws_credentials
|
31
|
+
)
|
32
|
+
|
33
|
+
ciphertext = opt.fetch(:ciphertext)
|
34
|
+
algorithm = opt.fetch(:algorithm, 'AES256')
|
35
|
+
key = kms.decrypt(ciphertext_blob: Base64.decode64(ciphertext)).plaintext
|
36
|
+
|
37
|
+
@kms_args = {
|
38
|
+
sse_customer_algorithm: algorithm,
|
39
|
+
sse_customer_key: key
|
40
|
+
}
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
3
44
|
# S3 File extractor
|
4
45
|
# Used to extract files from Amazon S3
|
5
46
|
#
|
6
|
-
# @example
|
47
|
+
# @example Standard use
|
7
48
|
#
|
8
49
|
# class MyJob < Remi::Job
|
9
50
|
# source :some_file do
|
10
51
|
# extractor Remi::Extractor::S3File.new(
|
52
|
+
# credentials: {
|
53
|
+
# aws_access_key_id: ENV['AWS_ACCESS_KEY_ID'],
|
54
|
+
# aws_secret_access_key: ENV['AWS_SECRET_ACCESS_KEY'],
|
55
|
+
# region: 'us-west-2'
|
56
|
+
# },
|
11
57
|
# bucket: 'my-awesome-bucket',
|
12
58
|
# remote_path: 'some_file-',
|
13
59
|
# most_recent_only: true
|
@@ -28,9 +74,40 @@ module Remi
|
|
28
74
|
# # 0 1 Albert
|
29
75
|
# # 1 2 Betsy
|
30
76
|
# # 2 3 Camu
|
77
|
+
#
|
78
|
+
# @example Using AWS KMS
|
79
|
+
# To use AWS KMS, supply a :ciphertext and optional :algorithm (default is AES256).
|
80
|
+
# The encrypted key stored in the ciphertext must be the same as that used when the file was written.
|
81
|
+
#
|
82
|
+
# class MyJob < Remi::Job
|
83
|
+
# source :some_file do
|
84
|
+
# extractor Remi::Extractor::S3File.new(
|
85
|
+
# credentials: {
|
86
|
+
# aws_access_key_id: ENV['AWS_ACCESS_KEY_ID'],
|
87
|
+
# aws_secret_access_key: ENV['AWS_SECRET_ACCESS_KEY'],
|
88
|
+
# region: 'us-west-2'
|
89
|
+
# },
|
90
|
+
# bucket: 'my-awesome-bucket',
|
91
|
+
# remote_path: 'some_file-',
|
92
|
+
# most_recent_only: true,
|
93
|
+
# kms_opt: {
|
94
|
+
# ciphertext: '<base64-encoded ciphertext>'
|
95
|
+
# }
|
96
|
+
# )
|
97
|
+
# parser Remi::Parser::CsvFile.new(
|
98
|
+
# csv_options: {
|
99
|
+
# headers: true,
|
100
|
+
# col_sep: '|'
|
101
|
+
# }
|
102
|
+
# )
|
103
|
+
# end
|
104
|
+
# end
|
31
105
|
class Extractor::S3File < Extractor::FileSystem
|
106
|
+
include Remi::DataSubject::S3File
|
32
107
|
|
33
|
-
# @param
|
108
|
+
# @param bucket [String] Name of S3 bucket containing the files
|
109
|
+
# @param kms_opt [Hash] Hash containing AWS KMS options
|
110
|
+
# @param credentials [Hash] Hash containing AWS credentials (must contain :aws_access_key_id, :aws_secret_access_key, :region)
|
34
111
|
def initialize(*args, **kargs, &block)
|
35
112
|
super
|
36
113
|
init_s3_file(*args, **kargs, &block)
|
@@ -39,10 +116,12 @@ module Remi
|
|
39
116
|
# Called to extract files from the source filesystem.
|
40
117
|
# @return [Array<String>] An array of paths to a local copy of the files extacted
|
41
118
|
def extract
|
119
|
+
init_kms(@kms_opt)
|
120
|
+
|
42
121
|
entries.map do |entry|
|
43
122
|
local_file = File.join(@local_path, entry.name)
|
44
123
|
logger.info "Downloading #{entry.pathname} from S3 to #{local_file}"
|
45
|
-
File.open(local_file, 'wb') { |file| entry.raw.get(response_target: file) }
|
124
|
+
File.open(local_file, 'wb') { |file| entry.raw.get({ response_target: file }.merge(encrypt_args)) }
|
46
125
|
local_file
|
47
126
|
end
|
48
127
|
end
|
@@ -55,7 +134,7 @@ module Remi
|
|
55
134
|
# @return [Array<Extractor::FileSystemEntry>] List of objects in the bucket/prefix
|
56
135
|
def all_entries!
|
57
136
|
# S3 does not track anything like a create time, so use last modified for both
|
58
|
-
bucket.objects(prefix: @remote_path.to_s).map do |entry|
|
137
|
+
s3.bucket(@bucket_name).objects(prefix: @remote_path.to_s).map do |entry|
|
59
138
|
Extractor::FileSystemEntry.new(
|
60
139
|
pathname: entry.key,
|
61
140
|
create_time: entry.last_modified,
|
@@ -65,20 +144,128 @@ module Remi
|
|
65
144
|
end
|
66
145
|
end
|
67
146
|
|
68
|
-
# @return [Aws::S3::Client] The S3 client used
|
69
|
-
def s3_client
|
70
|
-
@s3_client ||= Aws::S3::Client.new
|
71
|
-
end
|
72
|
-
|
73
147
|
private
|
74
148
|
|
75
|
-
def init_s3_file(*args, bucket:, **kargs)
|
149
|
+
def init_s3_file(*args, credentials: {}, bucket:, kms_opt: nil, **kargs)
|
150
|
+
@region = credentials.fetch(:region, 'us-west-2')
|
151
|
+
@kms_opt = kms_opt
|
152
|
+
init_aws_credentials(credentials)
|
153
|
+
|
76
154
|
@bucket_name = bucket
|
77
155
|
end
|
156
|
+
end
|
157
|
+
|
158
|
+
|
159
|
+
|
160
|
+
# S3 File loader
|
161
|
+
# Used to post files to Amazon S3
|
162
|
+
#
|
163
|
+
# @example Standard use
|
164
|
+
#
|
165
|
+
# class MyJob < Remi::Job
|
166
|
+
# target :some_file do
|
167
|
+
# encoder Remi::Encoder::CsvFile.new
|
168
|
+
# loader Remi::Loader::S3File.new(
|
169
|
+
# credentials: {
|
170
|
+
# aws_access_key_id: ENV['AWS_ACCESS_KEY_ID'],
|
171
|
+
# aws_secret_access_key: ENV['AWS_SECRET_ACCESS_KEY'],
|
172
|
+
# region: 'us-west-2'
|
173
|
+
# },
|
174
|
+
# bucket: 'itk-de-archive',
|
175
|
+
# remote_path: 'awesome.csv'
|
176
|
+
# )
|
177
|
+
# end
|
178
|
+
# end
|
179
|
+
#
|
180
|
+
# job = MyJob.new
|
181
|
+
# job.some_file.df = Daru::DataFrame.new(
|
182
|
+
# {
|
183
|
+
# numbers: [1,2,3],
|
184
|
+
# words: ['one', 'two', 'three']
|
185
|
+
# }
|
186
|
+
# )
|
187
|
+
# job.some_file.load
|
188
|
+
#
|
189
|
+
# @example Using AWS KMS
|
190
|
+
# To use AWS KMS, supply a :ciphertext and optional :algorithm (default is AES256).
|
191
|
+
# The encrypted key stored in the ciphertext must be the same as that used for reading the file.
|
192
|
+
#
|
193
|
+
# class MyJob < Remi::Job
|
194
|
+
# target :some_file do
|
195
|
+
# encoder Remi::Encoder::CsvFile.new
|
196
|
+
# loader Remi::Loader::S3File.new(
|
197
|
+
# credentials: {
|
198
|
+
# aws_access_key_id: ENV['AWS_ACCESS_KEY_ID'],
|
199
|
+
# aws_secret_access_key: ENV['AWS_SECRET_ACCESS_KEY'],
|
200
|
+
# region: 'us-west-2'
|
201
|
+
# },
|
202
|
+
# bucket: 'itk-de-archive',
|
203
|
+
# remote_path: 'awesome.csv',
|
204
|
+
# kms_opt: {
|
205
|
+
# ciphertext: '<base64-encoded ciphertext>'
|
206
|
+
# }
|
207
|
+
# )
|
208
|
+
# end
|
209
|
+
# end
|
210
|
+
#
|
211
|
+
# @example Generating a ciphertext
|
212
|
+
# A ciphertext can be generated using the AWS SDK
|
213
|
+
#
|
214
|
+
# require 'aws-sdk'
|
215
|
+
# require 'base64'
|
216
|
+
#
|
217
|
+
# aws_credentials = Aws::Credentials.new(
|
218
|
+
# ENV['AWS_ACCESS_KEY_ID'],
|
219
|
+
# ENV['AWS_SECRET_ACCESS_KEY']
|
220
|
+
# )
|
221
|
+
#
|
222
|
+
# kms = Aws::KMS::Client.new(
|
223
|
+
# region: 'us-west-2',
|
224
|
+
# credentials: aws_credentials
|
225
|
+
# )
|
226
|
+
#
|
227
|
+
# # See AWS docs for creating keys: http://docs.aws.amazon.com/kms/latest/developerguide/create-keys.html
|
228
|
+
# data_key = kms.generate_data_key(
|
229
|
+
# key_id: 'alias/alias-of-kms-key',
|
230
|
+
# key_spec: 'AES_256'
|
231
|
+
# )
|
232
|
+
#
|
233
|
+
# ciphertext = Base64.strict_encode64(data_key.ciphertext_blob)
|
234
|
+
# #=> "AQIDAHjmmRVcBAdMHsA9VUoJKgbW8niK2qL1qPcQ2OWEUlh5XAFw0vfl+QIgawB8cbAZ2OqXAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMIUIFFh++2w4d9al7AgEQgDvSRXQCOPLSMOjRS/lM5uxuyRV47qInlKKBIezIaYzXuFu1sRU+L46HqRyS0XqR4flFJ/fc8yEj3pU1UA=="
|
235
|
+
class Loader::S3File < Loader
|
236
|
+
include Remi::DataSubject::S3File
|
237
|
+
|
238
|
+
# @param bucket [String] Name of S3 bucket containing the files
|
239
|
+
# @param kms_opt [Hash] Hash containing AWS KMS options
|
240
|
+
# @param credentials [Hash] Hash containing AWS credentials (must contain :aws_access_key_id, :aws_secret_access_key, :region)
|
241
|
+
def initialize(*args, **kargs, &block)
|
242
|
+
super
|
243
|
+
init_s3_loader(*args, **kargs, &block)
|
244
|
+
end
|
245
|
+
|
246
|
+
attr_reader :remote_path
|
247
|
+
attr_reader :bucket_name
|
78
248
|
|
79
|
-
|
80
|
-
|
249
|
+
# Copies data to S3
|
250
|
+
# @param data [Object] The path to the file in the temporary work location
|
251
|
+
# @return [true] On success
|
252
|
+
def load(data)
|
253
|
+
init_kms(@kms_opt)
|
254
|
+
|
255
|
+
@logger.info "Writing file #{data} to S3 #{@bucket_name} as #{@remote_path}"
|
256
|
+
s3.bucket(@bucket_name).object(@remote_path).upload_file(data, encrypt_args)
|
257
|
+
true
|
81
258
|
end
|
82
259
|
|
260
|
+
private
|
261
|
+
|
262
|
+
def init_s3_loader(*args, credentials:{}, bucket:, remote_path:, kms_opt: nil, **kargs, &block)
|
263
|
+
@region = credentials.fetch(:region, 'us-west-2')
|
264
|
+
@kms_opt = kms_opt
|
265
|
+
init_aws_credentials(credentials)
|
266
|
+
|
267
|
+
@bucket_name = bucket
|
268
|
+
@remote_path = remote_path
|
269
|
+
end
|
83
270
|
end
|
84
271
|
end
|
@@ -185,11 +185,11 @@ module Remi
|
|
185
185
|
if @operation == :update
|
186
186
|
Remi::SfBulkHelper::SfBulkUpdate.update(restforce_client, @sfo, data, batch_size: @batch_size, logger: logger)
|
187
187
|
elsif @operation == :create
|
188
|
-
Remi::SfBulkHelper::SfBulkCreate.create(restforce_client, @sfo, data, batch_size: @batch_size, logger: logger)
|
188
|
+
Remi::SfBulkHelper::SfBulkCreate.create(restforce_client, @sfo, data, batch_size: @batch_size, max_attempts: 1, logger: logger)
|
189
189
|
elsif @operation == :upsert
|
190
190
|
Remi::SfBulkHelper::SfBulkUpsert.upsert(restforce_client, @sfo, data, batch_size: @batch_size, external_id: @external_id, logger: logger)
|
191
191
|
elsif @operation == :delete
|
192
|
-
Remi::SfBulkHelper::SfBulkDelete.
|
192
|
+
Remi::SfBulkHelper::SfBulkDelete.delete(restforce_client, @sfo, data, batch_size: @batch_size, logger: logger)
|
193
193
|
else
|
194
194
|
raise ArgumentError, "Unknown operation: #{@operation}"
|
195
195
|
end
|
@@ -79,7 +79,9 @@ module Remi
|
|
79
79
|
end
|
80
80
|
|
81
81
|
merge_id = Array(row.delete(@merge_id_field))
|
82
|
-
|
82
|
+
merge_row = row.select { |_, v| !v.blank? }
|
83
|
+
logger.info "Merging Id #{merge_id} into #{merge_row}"
|
84
|
+
soapforce_client.merge!(@sfo, merge_row, merge_id)
|
83
85
|
end
|
84
86
|
else
|
85
87
|
raise ArgumentError, "Unknown soap operation: #{@operation}"
|
@@ -1,4 +1,44 @@
|
|
1
1
|
module Remi
|
2
|
+
module DataSubject::SftpFile
|
3
|
+
|
4
|
+
attr_reader :sftp_session
|
5
|
+
|
6
|
+
def sftp_retry(&block)
|
7
|
+
tries ||= @retries
|
8
|
+
|
9
|
+
block.call
|
10
|
+
rescue StandardError => err
|
11
|
+
if (tries -= 1) > 0
|
12
|
+
logger.error "Error: #{err.message}"
|
13
|
+
logger.error "Will retry #{tries} more times"
|
14
|
+
sleep(1)
|
15
|
+
retry
|
16
|
+
else
|
17
|
+
raise err
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def begin_connection
|
22
|
+
sftp_retry do
|
23
|
+
Timeout.timeout(@timeout) do
|
24
|
+
@ssh_session = Net::SSH.start(@host, @username, password: @password, port: @port, number_of_password_prompts: 0)
|
25
|
+
@sftp_session = Net::SFTP::Session.new(@ssh_session)
|
26
|
+
@sftp_session.connect!
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def end_connection
|
32
|
+
@sftp_session.close_channel unless @sftp_session.nil?
|
33
|
+
@ssh_session.close unless @ssh_session.nil?
|
34
|
+
|
35
|
+
Timeout.timeout(@timeout) do
|
36
|
+
sleep 1 until (@sftp_session.nil? || @sftp_session.closed?) && (@ssh_session.nil? || @ssh_session.closed?)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
|
2
42
|
|
3
43
|
# Sftp File extractor
|
4
44
|
# Used to extract files from an SFTP server
|
@@ -35,13 +75,15 @@ module Remi
|
|
35
75
|
# # 1 2 Betsy
|
36
76
|
# # 2 3 Camu
|
37
77
|
class Extractor::SftpFile < Extractor::FileSystem
|
38
|
-
|
78
|
+
include DataSubject::SftpFile
|
39
79
|
|
40
80
|
# @param credentials [Hash] Options hash containing login credentials
|
41
81
|
# @param credentials [String] :host SFTP host (e.g., coolserver.com)
|
42
82
|
# @param credentials [String] :username SFTP username
|
43
83
|
# @param credentials [String] :password SFTP password
|
44
84
|
# @param credentials [String] :port SFTP port (default: 22)
|
85
|
+
# @param retries [Integer] Number of times a connection or operation will be retried (default: 3)
|
86
|
+
# @param timeout [Integer] Number of seconds to wait for establishing/closing a connection (default: 30)
|
45
87
|
def initialize(*args, **kargs, &block)
|
46
88
|
super
|
47
89
|
init_sftp_extractor(*args, **kargs)
|
@@ -55,15 +97,16 @@ module Remi
|
|
55
97
|
# Called to extract files from the source filesystem.
|
56
98
|
# @return [Array<String>] An array of paths to a local copy of the files extacted
|
57
99
|
def extract
|
58
|
-
|
59
|
-
entries.map do |entry|
|
60
|
-
local_file = File.join(@local_path, entry.name)
|
61
|
-
logger.info "Downloading #{entry.name} to #{local_file}"
|
62
|
-
retry_download { sftp.download!(File.join(@remote_path, entry.name), local_file) }
|
63
|
-
local_file
|
100
|
+
begin_connection
|
64
101
|
|
65
|
-
|
102
|
+
entries.map do |entry|
|
103
|
+
local_file = File.join(@local_path, entry.name)
|
104
|
+
logger.info "Downloading #{entry.name} to #{local_file}"
|
105
|
+
sftp_retry { sftp_session.download!(File.join(@remote_path, entry.name), local_file) }
|
106
|
+
local_file
|
66
107
|
end
|
108
|
+
ensure
|
109
|
+
end_connection
|
67
110
|
end
|
68
111
|
|
69
112
|
# @return [Array<Extractor::FileSystemEntry>] (Memoized) list of objects in the bucket/prefix
|
@@ -73,8 +116,7 @@ module Remi
|
|
73
116
|
|
74
117
|
# @return [Array<Extractor::FileSystemEntry>] (Memoized) list of objects in the bucket/prefix
|
75
118
|
def all_entries!
|
76
|
-
|
77
|
-
sftp_entries.map do |entry|
|
119
|
+
sftp_session.dir.entries(@remote_path).map do |entry|
|
78
120
|
# Early versions of the protocol don't support create time, fake it with modified time?
|
79
121
|
FileSystemEntry.new(
|
80
122
|
pathname: File.join(@remote_path, entry.name),
|
@@ -87,33 +129,13 @@ module Remi
|
|
87
129
|
|
88
130
|
private
|
89
131
|
|
90
|
-
def init_sftp_extractor(*args, credentials:, **kargs)
|
132
|
+
def init_sftp_extractor(*args, credentials:, retries: 3, timeout: 30, **kargs)
|
91
133
|
@host = credentials.fetch(:host)
|
92
134
|
@username = credentials.fetch(:username)
|
93
|
-
@password = credentials.fetch(:password)
|
135
|
+
@password = credentials.fetch(:password, nil)
|
94
136
|
@port = credentials.fetch(:port, '22')
|
95
|
-
|
96
|
-
|
97
|
-
def connection(&block)
|
98
|
-
result = nil
|
99
|
-
Net::SFTP.start(@host, @username, password: @password, port: @port) do |sftp|
|
100
|
-
result = yield sftp
|
101
|
-
end
|
102
|
-
result
|
103
|
-
end
|
104
|
-
|
105
|
-
def retry_download(&block)
|
106
|
-
1.upto(N_RETRY).each do |itry|
|
107
|
-
begin
|
108
|
-
block.call
|
109
|
-
break
|
110
|
-
rescue RuntimeError => err
|
111
|
-
raise err unless itry < N_RETRY
|
112
|
-
logger.error "Download failed with error: #{err.message}"
|
113
|
-
logger.error "Retry attempt #{itry}/#{N_RETRY-1}"
|
114
|
-
sleep(1)
|
115
|
-
end
|
116
|
-
end
|
137
|
+
@retries = retries
|
138
|
+
@timeout = timeout
|
117
139
|
end
|
118
140
|
end
|
119
141
|
|
@@ -143,8 +165,16 @@ module Remi
|
|
143
165
|
# job.my_target.df = my_df
|
144
166
|
# job.my_target.load
|
145
167
|
class Loader::SftpFile < Loader
|
168
|
+
include DataSubject::SftpFile
|
146
169
|
|
170
|
+
# @param credentials [Hash] Options hash containing login credentials
|
171
|
+
# @param credentials [String] :host SFTP host (e.g., coolserver.com)
|
172
|
+
# @param credentials [String] :username SFTP username
|
173
|
+
# @param credentials [String] :password SFTP password
|
174
|
+
# @param credentials [String] :port SFTP port (default: 22)
|
147
175
|
# @param remote_path [String, Pathname] Full path to the file to be created on the target filesystem
|
176
|
+
# @param retries [Integer] Number of times a connection or operation will be retried (default: 3)
|
177
|
+
# @param timeout [Integer] Number of seconds to wait for establishing/closing a connection (default: 30)
|
148
178
|
def initialize(*args, **kargs, &block)
|
149
179
|
super
|
150
180
|
init_sftp_loader(*args, **kargs, &block)
|
@@ -156,42 +186,27 @@ module Remi
|
|
156
186
|
# @param data [Object] The path to the file in the temporary work location
|
157
187
|
# @return [true] On success
|
158
188
|
def load(data)
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
189
|
+
begin_connection
|
190
|
+
|
191
|
+
logger.info "Uploading #{data} to #{@username}@#{@host}: #{@remote_path}"
|
192
|
+
sftp_retry { sftp_session.upload! data, @remote_path }
|
163
193
|
|
164
194
|
true
|
195
|
+
ensure
|
196
|
+
end_connection
|
165
197
|
end
|
166
198
|
|
167
199
|
|
168
200
|
private
|
169
201
|
|
170
|
-
def init_sftp_loader(*args, credentials:, remote_path:, **kargs, &block)
|
171
|
-
@
|
202
|
+
def init_sftp_loader(*args, credentials:, remote_path:, retries: 3, timeout: 30, **kargs, &block)
|
203
|
+
@host = credentials.fetch(:host)
|
204
|
+
@username = credentials.fetch(:username)
|
205
|
+
@password = credentials.fetch(:password, nil)
|
206
|
+
@port = credentials.fetch(:port, '22')
|
172
207
|
@remote_path = remote_path
|
173
|
-
|
174
|
-
|
175
|
-
def connection(&block)
|
176
|
-
result = nil
|
177
|
-
Net::SFTP.start(@credentials[:host], @credentials[:username], password: @credentials[:password], port: @credentials[:port] || '22') do |sftp|
|
178
|
-
result = yield sftp
|
179
|
-
end
|
180
|
-
result
|
181
|
-
end
|
182
|
-
|
183
|
-
def retry_upload(ntry=2, &block)
|
184
|
-
1.upto(ntry).each do |itry|
|
185
|
-
begin
|
186
|
-
block.call
|
187
|
-
break
|
188
|
-
rescue RuntimeError => err
|
189
|
-
raise err unless itry < ntry
|
190
|
-
logger.error "Upload failed with error: #{err.message}"
|
191
|
-
logger.error "Retry attempt #{itry}/#{ntry-1}"
|
192
|
-
sleep(1)
|
193
|
-
end
|
194
|
-
end
|
208
|
+
@retries = retries
|
209
|
+
@timeout = timeout
|
195
210
|
end
|
196
211
|
end
|
197
212
|
end
|
data/lib/remi/job/transform.rb
CHANGED
@@ -15,6 +15,7 @@ module Remi
|
|
15
15
|
# end
|
16
16
|
# tform.execute
|
17
17
|
class Transform
|
18
|
+
class IncompatibleTargetIndexError < StandardError; end
|
18
19
|
|
19
20
|
FieldMap = Struct.new(:from_subject, :to_subject, :field_from_to)
|
20
21
|
|
@@ -152,6 +153,19 @@ module Remi
|
|
152
153
|
sub_trans_ds = field_map.from_subject
|
153
154
|
fields_to_map = field_map.field_from_to.keys
|
154
155
|
|
156
|
+
job_idx = job_ds.df.index.to_a
|
157
|
+
sub_idx = sub_trans_ds.df.index.to_a
|
158
|
+
diff = ((job_idx | sub_idx) - (job_idx & sub_idx))
|
159
|
+
if job_idx.size > 0 && diff.size > 0 then
|
160
|
+
msg = <<-EOT
|
161
|
+
Incompatible target index!
|
162
|
+
Sub transform target #{sub_trans_ds.name} index is #{sub_trans_ds.df.index.inspect}
|
163
|
+
Job transform target #{job_ds.name} index is #{job_ds.df.index.inspect}
|
164
|
+
EOT
|
165
|
+
raise IncompatibleTargetIndexError.new msg
|
166
|
+
end
|
167
|
+
|
168
|
+
|
155
169
|
fields_to_map.each do |sub_trans_field|
|
156
170
|
job_field = field_map.field_from_to[sub_trans_field]
|
157
171
|
job_ds.fields[job_field].merge! sub_trans_ds.fields[sub_trans_field]
|
@@ -128,10 +128,13 @@ module Remi
|
|
128
128
|
|
129
129
|
# Private: Converts the transformed data into vectors in the target dataframe.
|
130
130
|
def map_to_target_df
|
131
|
+
index = @target_df.index.size > 0 ? @target_df.index : @source_df.index
|
132
|
+
|
131
133
|
result_hash_of_arrays.each do |vector, values|
|
132
|
-
@target_df[vector] = Daru::Vector.new(values, index:
|
134
|
+
@target_df[vector] = Daru::Vector.new(values, index: index)
|
133
135
|
end
|
134
136
|
|
137
|
+
@target_df.index = index
|
135
138
|
@target_df
|
136
139
|
end
|
137
140
|
|
data/lib/remi/transform.rb
CHANGED
data/lib/remi/version.rb
CHANGED
@@ -14,7 +14,11 @@ describe Parser::None do
|
|
14
14
|
let(:parser) { Parser::None.new }
|
15
15
|
|
16
16
|
context '#parse' do
|
17
|
-
it 'returns
|
17
|
+
it 'returns an empty dataframe when given no data' do
|
18
|
+
expect(parser.parse.to_a).to eq Remi::DataFrame::Daru.new([]).to_a
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'returns an what it was given' do
|
18
22
|
expect(parser.parse('some data')).to eq 'some data'
|
19
23
|
end
|
20
24
|
end
|
@@ -9,14 +9,23 @@ describe Extractor::S3File do
|
|
9
9
|
}
|
10
10
|
|
11
11
|
prefix = "the-best-prefix"
|
12
|
-
|
13
|
-
|
12
|
+
credentials = {
|
13
|
+
aws_access_key_id: 'BLAH',
|
14
|
+
aws_secret_access_key: 'DEBLAH'
|
15
|
+
}
|
16
|
+
|
17
|
+
@s3_file = Extractor::S3File.new(
|
18
|
+
bucket: 'the-best-bucket',
|
19
|
+
credentials: credentials,
|
20
|
+
remote_path: "#{prefix}"
|
21
|
+
)
|
22
|
+
|
23
|
+
@s3_file.s3.client.stub_responses(:list_objects, {
|
14
24
|
contents: [
|
15
25
|
{ key: "#{prefix}/file1.csv" },
|
16
26
|
{ key: "#{prefix}/file2.csv" }
|
17
27
|
]
|
18
28
|
})
|
19
|
-
|
20
29
|
end
|
21
30
|
|
22
31
|
it 'returns all entries' do
|
@@ -45,7 +45,7 @@ describe Loader::SalesforceSoap do
|
|
45
45
|
{ Id: '1234', Custom__c: 'something', Merge_Id: '5678' }
|
46
46
|
]
|
47
47
|
|
48
|
-
expect(soapforce_client).to receive(:merge) do
|
48
|
+
expect(soapforce_client).to receive(:merge!) do
|
49
49
|
[
|
50
50
|
:Contact,
|
51
51
|
{
|
@@ -65,7 +65,25 @@ describe Loader::SalesforceSoap do
|
|
65
65
|
{ Id: '2', Custom__c: 'something', Merge_Id: '20' }
|
66
66
|
]
|
67
67
|
|
68
|
-
expect(soapforce_client).to receive(:merge).twice
|
68
|
+
expect(soapforce_client).to receive(:merge!).twice
|
69
|
+
loader.load(data)
|
70
|
+
end
|
71
|
+
|
72
|
+
it 'excludes blank data fields from the merge command' do
|
73
|
+
data = [
|
74
|
+
{ Id: '1234', Custom__c: '', Merge_Id: '5678' }
|
75
|
+
]
|
76
|
+
|
77
|
+
expect(soapforce_client).to receive(:merge!) do
|
78
|
+
[
|
79
|
+
:Contact,
|
80
|
+
{
|
81
|
+
Id: '1234'
|
82
|
+
},
|
83
|
+
['5678']
|
84
|
+
]
|
85
|
+
end
|
86
|
+
|
69
87
|
loader.load(data)
|
70
88
|
end
|
71
89
|
|
@@ -76,5 +94,4 @@ describe Loader::SalesforceSoap do
|
|
76
94
|
|
77
95
|
expect { loader.load(data) }.to raise_error KeyError
|
78
96
|
end
|
79
|
-
|
80
97
|
end
|
@@ -10,21 +10,25 @@ describe Extractor::SftpFile do
|
|
10
10
|
}
|
11
11
|
}
|
12
12
|
|
13
|
-
|
13
|
+
def generate_extractor
|
14
14
|
Extractor::SftpFile.new(
|
15
15
|
credentials: credentials,
|
16
16
|
remote_path: remote_path
|
17
17
|
)
|
18
|
-
|
18
|
+
end
|
19
|
+
|
20
|
+
let(:extractor) { generate_extractor }
|
19
21
|
|
20
22
|
let(:remote_filenames) { ['file1.csv', 'file2.csv'] }
|
21
|
-
let(:sftp_session) { instance_double('Net:SFTP::Session') }
|
22
23
|
|
23
24
|
before do
|
24
|
-
|
25
|
+
allow(extractor).to receive(:begin_connection)
|
25
26
|
|
26
|
-
|
27
|
-
allow(
|
27
|
+
sftp_session = double('sftp_session')
|
28
|
+
allow(extractor).to receive(:sftp_session).and_return(sftp_session)
|
29
|
+
|
30
|
+
sftp_dir = instance_double('Net::SFTP::Operations::Dir')
|
31
|
+
allow(sftp_session).to receive(:dir).and_return(sftp_dir)
|
28
32
|
|
29
33
|
allow(sftp_dir).to receive(:entries).and_return(remote_filenames.map { |fname|
|
30
34
|
Net::SFTP::Protocol::V04::Name.new(
|
@@ -36,65 +40,76 @@ describe Extractor::SftpFile do
|
|
36
40
|
|
37
41
|
context '.new' do
|
38
42
|
it 'creates an instance with valid parameters' do
|
39
|
-
|
43
|
+
extractor
|
40
44
|
end
|
41
45
|
|
42
46
|
it 'requires a hostname' do
|
43
47
|
credentials.delete(:host)
|
44
|
-
expect {
|
48
|
+
expect { generate_extractor }.to raise_error KeyError
|
45
49
|
end
|
46
50
|
|
47
51
|
it 'requires a username' do
|
48
52
|
credentials.delete(:username)
|
49
|
-
expect {
|
53
|
+
expect { generate_extractor }.to raise_error KeyError
|
50
54
|
end
|
51
55
|
|
52
|
-
it '
|
56
|
+
it 'does not require a password' do # If empty, it will use private keys
|
53
57
|
credentials.delete(:password)
|
54
|
-
expect {
|
58
|
+
expect { generate_extractor }.not_to raise_error
|
55
59
|
end
|
56
60
|
|
57
61
|
it 'defaults to using port 22' do
|
58
|
-
expect(
|
62
|
+
expect(extractor.port).to eq '22'
|
59
63
|
end
|
60
64
|
|
61
65
|
it 'allows the port to be defined in the credentials' do
|
62
66
|
credentials[:port] = '1234'
|
63
|
-
expect(
|
67
|
+
expect(generate_extractor.port).to eq '1234'
|
64
68
|
end
|
65
69
|
end
|
66
70
|
|
67
71
|
context '#all_entires' do
|
68
72
|
it 'returns all entries' do
|
69
|
-
expect(
|
73
|
+
expect(extractor.all_entries.map(&:name)).to eq remote_filenames
|
70
74
|
end
|
71
75
|
end
|
72
76
|
|
73
77
|
context '#extract' do
|
74
78
|
it 'downloads files from the ftp' do
|
75
|
-
expect(sftp_session).to receive(:download!).exactly(remote_filenames.size).times
|
76
|
-
|
79
|
+
expect(extractor.sftp_session).to receive(:download!).exactly(remote_filenames.size).times
|
80
|
+
extractor.extract
|
77
81
|
end
|
78
82
|
|
79
83
|
it 'creates local files with the right names' do
|
80
|
-
allow(sftp_session).to receive(:download!)
|
81
|
-
expect(
|
84
|
+
allow(extractor.sftp_session).to receive(:download!)
|
85
|
+
expect(extractor.extract.map { |f| Pathname.new(f).basename.to_s }).to eq remote_filenames
|
82
86
|
end
|
83
87
|
end
|
84
88
|
end
|
85
89
|
|
86
90
|
|
87
91
|
describe Loader::SftpFile do
|
88
|
-
|
92
|
+
|
93
|
+
let(:credentials) {
|
94
|
+
{
|
95
|
+
host: 'host',
|
96
|
+
username: 'username',
|
97
|
+
password: 'password'
|
98
|
+
}
|
99
|
+
}
|
100
|
+
|
101
|
+
let(:loader) { Loader::SftpFile.new(credentials: credentials, remote_path: 'some_path') }
|
89
102
|
let(:data) { double('some_data') }
|
90
|
-
let(:sftp_session) { instance_double('Net:SFTP::Session') }
|
91
103
|
|
92
104
|
before do
|
93
|
-
allow(
|
105
|
+
allow(loader).to receive(:begin_connection)
|
106
|
+
|
107
|
+
sftp_session = double('sftp_session')
|
108
|
+
allow(loader).to receive(:sftp_session).and_return(sftp_session)
|
94
109
|
end
|
95
110
|
|
96
111
|
it 'loads a csv to a target sftp filesystem' do
|
97
|
-
expect(sftp_session).to receive(:upload!).with(data, 'some_path')
|
112
|
+
expect(loader.sftp_session).to receive(:upload!).with(data, 'some_path')
|
98
113
|
loader.load data
|
99
114
|
end
|
100
115
|
end
|
data/spec/job/transform_spec.rb
CHANGED
@@ -253,5 +253,89 @@ describe Job do
|
|
253
253
|
my_transform.execute
|
254
254
|
end
|
255
255
|
end
|
256
|
+
|
257
|
+
describe '#import - edge cases' do
|
258
|
+
before do
|
259
|
+
class MyJob
|
260
|
+
source :job_source do
|
261
|
+
fields({ :id => {}, :name => {} })
|
262
|
+
end
|
263
|
+
target :job_target do
|
264
|
+
fields({ :id => {}, :name => {}, :funny_name => {} })
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
job.job_source.df = Remi::DataFrame::Daru.new({
|
269
|
+
id: [1, 2, 3],
|
270
|
+
name: ['one', 'two', 'three']
|
271
|
+
})
|
272
|
+
end
|
273
|
+
|
274
|
+
it 'correctly maps back to a source if the sub transform sorts the data' do
|
275
|
+
sub_transform = Job::Transform.new('arbitrary') do
|
276
|
+
source :st_source, [:id, :name]
|
277
|
+
target :st_target, [:funny_name]
|
278
|
+
|
279
|
+
st_source.df.sort!([:id], ascending: [false])
|
280
|
+
|
281
|
+
Remi::SourceToTargetMap.apply(st_source.df, st_target.df) do
|
282
|
+
map source(:name) .target(:funny_name)
|
283
|
+
.transform(->(v) { "funny-#{v}" })
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
my_transform = Job::Transform.new(job) do
|
288
|
+
import sub_transform do
|
289
|
+
map_source_fields :job_source, :st_source, {
|
290
|
+
:id => :id,
|
291
|
+
:name => :name
|
292
|
+
}
|
293
|
+
map_target_fields :st_target, :job_source, {
|
294
|
+
:funny_name => :funny_name
|
295
|
+
}
|
296
|
+
end
|
297
|
+
|
298
|
+
job.job_target.df = job.job_source.df.dup
|
299
|
+
end
|
300
|
+
|
301
|
+
my_transform.execute
|
302
|
+
expect(job.job_target.df[:funny_name].to_a).to eq(
|
303
|
+
job.job_target.df[:name].to_a.map { |v| "funny-#{v}" }
|
304
|
+
)
|
305
|
+
end
|
306
|
+
|
307
|
+
it 'raises an error if the subtransform fucks with index', wip: true do
|
308
|
+
sub_transform = Job::Transform.new('arbitrary') do
|
309
|
+
source :st_source, [:id, :name]
|
310
|
+
target :st_target, [:funny_name]
|
311
|
+
|
312
|
+
duplicated_df = Daru::DataFrame.new({ id: Array(st_source.df[:id][0]) * 3 })
|
313
|
+
|
314
|
+
st_source.df = st_source.df.join(duplicated_df, on: [:id], how: :left)
|
315
|
+
|
316
|
+
Remi::SourceToTargetMap.apply(st_source.df, st_target.df) do
|
317
|
+
map source(:name) .target(:funny_name)
|
318
|
+
.transform(->(v) { "funny-#{v}" })
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
my_transform = Job::Transform.new(job) do
|
323
|
+
import sub_transform do
|
324
|
+
map_source_fields :job_source, :st_source, {
|
325
|
+
:id => :id,
|
326
|
+
:name => :name
|
327
|
+
}
|
328
|
+
map_target_fields :st_target, :job_source, {
|
329
|
+
:funny_name => :funny_name
|
330
|
+
}
|
331
|
+
end
|
332
|
+
|
333
|
+
job.job_target.df = job.job_source.df.dup
|
334
|
+
end
|
335
|
+
|
336
|
+
expect { my_transform.execute }.to raise_error Job::Transform::IncompatibleTargetIndexError
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
256
340
|
end
|
257
341
|
end
|
@@ -298,4 +298,34 @@ describe SourceToTargetMap do
|
|
298
298
|
expect(sttm).to be_a(Remi::DataFrame::Daru)
|
299
299
|
end
|
300
300
|
end
|
301
|
+
|
302
|
+
describe 'source and target dataframes differ', wip: true do
|
303
|
+
it 'does not fail when the dataframe has been filtered' do
|
304
|
+
some_df = Daru::DataFrame.new(
|
305
|
+
{
|
306
|
+
:id => [1,2,3,4,5],
|
307
|
+
:something => ['x','','x','','x'],
|
308
|
+
:name => ['one', 'two', 'three', 'four', 'five']
|
309
|
+
}
|
310
|
+
)
|
311
|
+
|
312
|
+
filtered_df = some_df.where(some_df[:something].eq('x'))
|
313
|
+
target_df = Remi::DataFrame::Daru.new([])
|
314
|
+
|
315
|
+
Remi::SourceToTargetMap.apply(filtered_df, target_df) do
|
316
|
+
map source(:id) .target(:id)
|
317
|
+
map source(:name) .target(:name)
|
318
|
+
end
|
319
|
+
|
320
|
+
result = target_df[:id, :name].to_h.each_with_object({}) { |(k,v), h| h[k] = v.to_a }
|
321
|
+
expect(result).to eq({
|
322
|
+
:id => [1, 3, 5],
|
323
|
+
:name => ['one', 'three', 'five']
|
324
|
+
})
|
325
|
+
end
|
326
|
+
|
327
|
+
|
328
|
+
end
|
329
|
+
|
330
|
+
|
301
331
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sterling Paramore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-06-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bond
|
@@ -231,6 +231,7 @@ files:
|
|
231
231
|
- features/json.feature
|
232
232
|
- features/metadata.feature
|
233
233
|
- features/parameters.feature
|
234
|
+
- features/s3_file_target_job.feature
|
234
235
|
- features/sample_job.feature
|
235
236
|
- features/sftp_file_target_job.feature
|
236
237
|
- features/step_definitions/remi_step.rb
|
@@ -254,6 +255,7 @@ files:
|
|
254
255
|
- jobs/json_job.rb
|
255
256
|
- jobs/metadata_job.rb
|
256
257
|
- jobs/parameters_job.rb
|
258
|
+
- jobs/s3_file_target_job.rb
|
257
259
|
- jobs/sample_job.rb
|
258
260
|
- jobs/sftp_file_target_job.rb
|
259
261
|
- jobs/sub_job_example_job.rb
|
@@ -372,6 +374,7 @@ test_files:
|
|
372
374
|
- features/json.feature
|
373
375
|
- features/metadata.feature
|
374
376
|
- features/parameters.feature
|
377
|
+
- features/s3_file_target_job.feature
|
375
378
|
- features/sample_job.feature
|
376
379
|
- features/sftp_file_target_job.feature
|
377
380
|
- features/step_definitions/remi_step.rb
|