remi 0.3.2 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/Gemfile.lock +11 -11
- data/features/s3_file_target_job.feature +10 -0
- data/features/step_definitions/remi_step.rb +26 -0
- data/jobs/s3_file_target_job.rb +23 -0
- data/lib/remi/data_subject.rb +5 -1
- data/lib/remi/data_subjects/none.rb +3 -3
- data/lib/remi/data_subjects/s3_file.rb +199 -12
- data/lib/remi/data_subjects/salesforce.rb +2 -2
- data/lib/remi/data_subjects/salesforce_soap.rb +3 -1
- data/lib/remi/data_subjects/sftp_file.rb +77 -62
- data/lib/remi/job/transform.rb +14 -0
- data/lib/remi/source_to_target_map/map.rb +4 -1
- data/lib/remi/transform.rb +3 -0
- data/lib/remi/version.rb +1 -1
- data/spec/data_subjects/none_spec.rb +5 -1
- data/spec/data_subjects/s3_file_spec.rb +12 -3
- data/spec/data_subjects/salesforce_soap_spec.rb +20 -3
- data/spec/data_subjects/sftp_file_spec.rb +37 -22
- data/spec/job/transform_spec.rb +84 -0
- data/spec/source_to_target_map_spec.rb +30 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 051d6add4664343ee59a6c722a2abadc15ea4377
|
4
|
+
data.tar.gz: f7f438b794a08948617b767dfca83e58533300ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d0e46a405da1e48dc0b82afe9c350b26f0b436fe99aa1cdf9500d035ec5a01612257c679338063920d370ead1e09c68ec35f7de9d732338417874a396ed8634c
|
7
|
+
data.tar.gz: d39aaad0be382a3f70359c03eeaa1b13ef9a594967cd029014da40ca2882cffe53a81a534cdf4c8221399fb10636e5c1304ac591e6a0ef589ecd1d0fab1b07f2
|
data/Gemfile
CHANGED
@@ -7,4 +7,4 @@ gem 'daru', '0.1.4.1', git: 'git@github.com:inside-track/daru.git', branch: '0.1
|
|
7
7
|
gem 'restforce', '~> 2.1'
|
8
8
|
gem 'salesforce_bulk_api', git: 'git@github.com:inside-track/salesforce_bulk_api.git', branch: 'master'
|
9
9
|
gem 'soapforce', '~> 0.5'
|
10
|
-
gem 'aws-sdk', '~> 2.
|
10
|
+
gem 'aws-sdk', '~> 2.10'
|
data/Gemfile.lock
CHANGED
@@ -18,7 +18,7 @@ GIT
|
|
18
18
|
PATH
|
19
19
|
remote: .
|
20
20
|
specs:
|
21
|
-
remi (0.3.
|
21
|
+
remi (0.3.3)
|
22
22
|
activesupport (~> 4.2)
|
23
23
|
bond (~> 0.5)
|
24
24
|
cucumber (~> 2.1)
|
@@ -43,12 +43,14 @@ GEM
|
|
43
43
|
akami (1.3.1)
|
44
44
|
gyoku (>= 0.4.0)
|
45
45
|
nokogiri
|
46
|
-
aws-sdk (2.3
|
47
|
-
aws-sdk-resources (= 2.3
|
48
|
-
aws-sdk-core (2.3
|
46
|
+
aws-sdk (2.10.3)
|
47
|
+
aws-sdk-resources (= 2.10.3)
|
48
|
+
aws-sdk-core (2.10.3)
|
49
|
+
aws-sigv4 (~> 1.0)
|
49
50
|
jmespath (~> 1.0)
|
50
|
-
aws-sdk-resources (2.3
|
51
|
-
aws-sdk-core (= 2.3
|
51
|
+
aws-sdk-resources (2.10.3)
|
52
|
+
aws-sdk-core (= 2.10.3)
|
53
|
+
aws-sigv4 (1.0.0)
|
52
54
|
backports (3.6.8)
|
53
55
|
bond (0.5.1)
|
54
56
|
builder (3.2.2)
|
@@ -104,10 +106,8 @@ GEM
|
|
104
106
|
mimemagic (~> 0.3)
|
105
107
|
multi_json (~> 1.11)
|
106
108
|
rbczmq (~> 1.7)
|
107
|
-
jmespath (1.
|
108
|
-
json_pure (>= 1.8.1)
|
109
|
+
jmespath (1.3.1)
|
109
110
|
json (1.8.3)
|
110
|
-
json_pure (1.8.3)
|
111
111
|
jwt (1.5.6)
|
112
112
|
little-plugger (1.1.4)
|
113
113
|
logging (2.1.0)
|
@@ -187,7 +187,7 @@ PLATFORMS
|
|
187
187
|
ruby
|
188
188
|
|
189
189
|
DEPENDENCIES
|
190
|
-
aws-sdk (~> 2.
|
190
|
+
aws-sdk (~> 2.10)
|
191
191
|
daru (= 0.1.4.1)!
|
192
192
|
github-markup (~> 1.4)
|
193
193
|
google-api-client (~> 0.9)
|
@@ -200,4 +200,4 @@ DEPENDENCIES
|
|
200
200
|
yard (~> 0.9)
|
201
201
|
|
202
202
|
BUNDLED WITH
|
203
|
-
1.
|
203
|
+
1.15.1
|
@@ -0,0 +1,10 @@
|
|
1
|
+
Feature: Tests targets that are S3 Files.
|
2
|
+
|
3
|
+
Background:
|
4
|
+
Given the job is 'S3 File Target'
|
5
|
+
And the job target 'Some File'
|
6
|
+
|
7
|
+
Scenario: Defining the remote path.
|
8
|
+
Given the target 'Some File'
|
9
|
+
Then the file is uploaded to the S3 bucket "the-big-one"
|
10
|
+
And the file is uploaded to the remote path "some_file_*Today: %Y%m%d*.csv"
|
@@ -69,6 +69,14 @@ Then /^the file is uploaded to the remote path "([^"]+)"$/ do |remote_path|
|
|
69
69
|
expect(@brt.target.data_subject.loaders.map(&:remote_path)).to include expected_path
|
70
70
|
end
|
71
71
|
|
72
|
+
Then /^the file is uploaded to the S3 bucket "([^"]+)"$/ do |bucket_name|
|
73
|
+
expected_bucket_name = Remi::Testing::BusinessRules::ParseFormula.parse(bucket_name)
|
74
|
+
bucket_names = @brt.target.data_subject.loaders.map do |loader|
|
75
|
+
loader.bucket_name if loader.respond_to? :bucket_name
|
76
|
+
end
|
77
|
+
expect(bucket_names).to include expected_bucket_name
|
78
|
+
end
|
79
|
+
|
72
80
|
## CSV Options
|
73
81
|
|
74
82
|
Given /^the (source|target) file is delimited with a (\w+)$/ do |st, delimiter|
|
@@ -124,6 +132,16 @@ Given /^the (source|target) file contains all of the following headers in this o
|
|
124
132
|
expect(@brt.send(st.to_sym).data_subject.df.vectors.to_a).to eq @brt.send(st.to_sym).fields.field_names
|
125
133
|
end
|
126
134
|
|
135
|
+
Given /^the (source|target) file contains all of the following headers in no particular order:$/ do |st, table|
|
136
|
+
table.rows.each do |row|
|
137
|
+
field = row.first
|
138
|
+
step "the #{st} field '#{field}'"
|
139
|
+
end
|
140
|
+
|
141
|
+
@brt.run_transforms if st == 'target'
|
142
|
+
expect(@brt.send(st.to_sym).data_subject.df.vectors.to_a).to match_array @brt.send(st.to_sym).fields.field_names
|
143
|
+
end
|
144
|
+
|
127
145
|
### Source
|
128
146
|
|
129
147
|
Given /^the source '([[:alnum:]\s\-_]+)'$/ do |arg|
|
@@ -260,6 +278,7 @@ Then /^the target field '([^']+)' has the label '([^']+)'$/ do |target_field, la
|
|
260
278
|
data_field = @brt.targets.fields.next
|
261
279
|
expect(data_field.metadata[:label]).to eq label
|
262
280
|
expect(data_field.name).to eq target_field
|
281
|
+
|
263
282
|
end
|
264
283
|
|
265
284
|
Then /^the target field '([^']+)' is copied from the source field$/ do |target_field|
|
@@ -780,3 +799,10 @@ Then /^the target '([[:alnum:]\s\-_]+)' has (\d+) record(?:s|) where '([[:alnum:
|
|
780
799
|
@brt.run_transforms
|
781
800
|
expect(@brt.targets[target_name].where_between(field_name, low_value, high_value).size).to eq nrecords.to_i
|
782
801
|
end
|
802
|
+
|
803
|
+
Then /^the target field '([^']+)' (?:has|is set to) the multiline value$/ do |target_field, value|
|
804
|
+
step "the target field '#{target_field}'"
|
805
|
+
@brt.run_transforms
|
806
|
+
target_name, target_field_name = @brt.targets.parse_full_field(target_field)
|
807
|
+
expect(@brt.targets[target_name].fields[target_field_name].value).to eq Remi::Testing::BusinessRules::ParseFormula.parse(value)
|
808
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require_relative 'all_jobs_shared'
|
2
|
+
require 'aws-sdk'
|
3
|
+
|
4
|
+
class S3FileTargetJob < Remi::Job
|
5
|
+
target :some_file do
|
6
|
+
encoder Remi::Encoder::CsvFile.new
|
7
|
+
loader Remi::Loader::S3File.new(
|
8
|
+
credentials: {
|
9
|
+
aws_access_key_id: 'blort',
|
10
|
+
aws_secret_access_key: 'blerg',
|
11
|
+
region: 'us-west-2'
|
12
|
+
},
|
13
|
+
kms_opt: {
|
14
|
+
ciphertext: 'blergity'
|
15
|
+
},
|
16
|
+
bucket: 'the-big-one',
|
17
|
+
remote_path: "some_file_#{DateTime.current.strftime('%Y%m%d')}.csv"
|
18
|
+
)
|
19
|
+
end
|
20
|
+
|
21
|
+
transform :main do
|
22
|
+
end
|
23
|
+
end
|
data/lib/remi/data_subject.rb
CHANGED
@@ -93,7 +93,11 @@ module Remi
|
|
93
93
|
sttm = SourceToTargetMap.new(df, source_metadata: fields)
|
94
94
|
fields.keys.each do |field|
|
95
95
|
next unless (types.size == 0 || types.include?(fields[field][:type])) && df.vectors.include?(field)
|
96
|
-
|
96
|
+
begin
|
97
|
+
sttm.source(field).target(field).transform(Remi::Transform::EnforceType.new).execute
|
98
|
+
rescue StandardError => err
|
99
|
+
raise ArgumentError, "Field '#{field}': #{err.message}"
|
100
|
+
end
|
97
101
|
end
|
98
102
|
|
99
103
|
self
|
@@ -8,10 +8,10 @@ module Remi
|
|
8
8
|
end
|
9
9
|
|
10
10
|
|
11
|
-
# The None Parser just returns
|
11
|
+
# The None Parser just returns an empty dataframe if it's not given any data
|
12
12
|
class Parser::None < Parser
|
13
|
-
def parse(data)
|
14
|
-
data
|
13
|
+
def parse(data=nil)
|
14
|
+
data || Remi::DataFrame::Daru.new([], order: fields.keys)
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
@@ -1,13 +1,59 @@
|
|
1
1
|
module Remi
|
2
2
|
|
3
|
+
module DataSubject::S3File
|
4
|
+
attr_accessor :region
|
5
|
+
attr_accessor :aws_credentials
|
6
|
+
|
7
|
+
def init_aws_credentials(credentials)
|
8
|
+
@aws_credentials = Aws::Credentials.new(
|
9
|
+
credentials.fetch(:aws_access_key_id, ENV['AWS_ACCESS_KEY_ID']),
|
10
|
+
credentials.fetch(:aws_secret_access_key, ENV['AWS_SECRET_ACCESS_KEY'])
|
11
|
+
)
|
12
|
+
end
|
13
|
+
|
14
|
+
def s3
|
15
|
+
@s3 ||= Aws::S3::Resource.new(
|
16
|
+
credentials: aws_credentials,
|
17
|
+
region: region
|
18
|
+
)
|
19
|
+
end
|
20
|
+
|
21
|
+
def encrypt_args
|
22
|
+
@kms_args || {}
|
23
|
+
end
|
24
|
+
|
25
|
+
def init_kms(opt)
|
26
|
+
return nil unless opt
|
27
|
+
|
28
|
+
kms = Aws::KMS::Client.new(
|
29
|
+
region: @region,
|
30
|
+
credentials: @aws_credentials
|
31
|
+
)
|
32
|
+
|
33
|
+
ciphertext = opt.fetch(:ciphertext)
|
34
|
+
algorithm = opt.fetch(:algorithm, 'AES256')
|
35
|
+
key = kms.decrypt(ciphertext_blob: Base64.decode64(ciphertext)).plaintext
|
36
|
+
|
37
|
+
@kms_args = {
|
38
|
+
sse_customer_algorithm: algorithm,
|
39
|
+
sse_customer_key: key
|
40
|
+
}
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
3
44
|
# S3 File extractor
|
4
45
|
# Used to extract files from Amazon S3
|
5
46
|
#
|
6
|
-
# @example
|
47
|
+
# @example Standard use
|
7
48
|
#
|
8
49
|
# class MyJob < Remi::Job
|
9
50
|
# source :some_file do
|
10
51
|
# extractor Remi::Extractor::S3File.new(
|
52
|
+
# credentials: {
|
53
|
+
# aws_access_key_id: ENV['AWS_ACCESS_KEY_ID'],
|
54
|
+
# aws_secret_access_key: ENV['AWS_SECRET_ACCESS_KEY'],
|
55
|
+
# region: 'us-west-2'
|
56
|
+
# },
|
11
57
|
# bucket: 'my-awesome-bucket',
|
12
58
|
# remote_path: 'some_file-',
|
13
59
|
# most_recent_only: true
|
@@ -28,9 +74,40 @@ module Remi
|
|
28
74
|
# # 0 1 Albert
|
29
75
|
# # 1 2 Betsy
|
30
76
|
# # 2 3 Camu
|
77
|
+
#
|
78
|
+
# @example Using AWS KMS
|
79
|
+
# To use AWS KMS, supply a :ciphertext and optional :algorithm (default is AES256).
|
80
|
+
# The encrypted key stored in the ciphertext must be the same as that used when the file was written.
|
81
|
+
#
|
82
|
+
# class MyJob < Remi::Job
|
83
|
+
# source :some_file do
|
84
|
+
# extractor Remi::Extractor::S3File.new(
|
85
|
+
# credentials: {
|
86
|
+
# aws_access_key_id: ENV['AWS_ACCESS_KEY_ID'],
|
87
|
+
# aws_secret_access_key: ENV['AWS_SECRET_ACCESS_KEY'],
|
88
|
+
# region: 'us-west-2'
|
89
|
+
# },
|
90
|
+
# bucket: 'my-awesome-bucket',
|
91
|
+
# remote_path: 'some_file-',
|
92
|
+
# most_recent_only: true,
|
93
|
+
# kms_opt: {
|
94
|
+
# ciphertext: '<base64-encoded ciphertext>'
|
95
|
+
# }
|
96
|
+
# )
|
97
|
+
# parser Remi::Parser::CsvFile.new(
|
98
|
+
# csv_options: {
|
99
|
+
# headers: true,
|
100
|
+
# col_sep: '|'
|
101
|
+
# }
|
102
|
+
# )
|
103
|
+
# end
|
104
|
+
# end
|
31
105
|
class Extractor::S3File < Extractor::FileSystem
|
106
|
+
include Remi::DataSubject::S3File
|
32
107
|
|
33
|
-
# @param
|
108
|
+
# @param bucket [String] Name of S3 bucket containing the files
|
109
|
+
# @param kms_opt [Hash] Hash containing AWS KMS options
|
110
|
+
# @param credentials [Hash] Hash containing AWS credentials (must contain :aws_access_key_id, :aws_secret_access_key, :region)
|
34
111
|
def initialize(*args, **kargs, &block)
|
35
112
|
super
|
36
113
|
init_s3_file(*args, **kargs, &block)
|
@@ -39,10 +116,12 @@ module Remi
|
|
39
116
|
# Called to extract files from the source filesystem.
|
40
117
|
# @return [Array<String>] An array of paths to a local copy of the files extacted
|
41
118
|
def extract
|
119
|
+
init_kms(@kms_opt)
|
120
|
+
|
42
121
|
entries.map do |entry|
|
43
122
|
local_file = File.join(@local_path, entry.name)
|
44
123
|
logger.info "Downloading #{entry.pathname} from S3 to #{local_file}"
|
45
|
-
File.open(local_file, 'wb') { |file| entry.raw.get(response_target: file) }
|
124
|
+
File.open(local_file, 'wb') { |file| entry.raw.get({ response_target: file }.merge(encrypt_args)) }
|
46
125
|
local_file
|
47
126
|
end
|
48
127
|
end
|
@@ -55,7 +134,7 @@ module Remi
|
|
55
134
|
# @return [Array<Extractor::FileSystemEntry>] List of objects in the bucket/prefix
|
56
135
|
def all_entries!
|
57
136
|
# S3 does not track anything like a create time, so use last modified for both
|
58
|
-
bucket.objects(prefix: @remote_path.to_s).map do |entry|
|
137
|
+
s3.bucket(@bucket_name).objects(prefix: @remote_path.to_s).map do |entry|
|
59
138
|
Extractor::FileSystemEntry.new(
|
60
139
|
pathname: entry.key,
|
61
140
|
create_time: entry.last_modified,
|
@@ -65,20 +144,128 @@ module Remi
|
|
65
144
|
end
|
66
145
|
end
|
67
146
|
|
68
|
-
# @return [Aws::S3::Client] The S3 client used
|
69
|
-
def s3_client
|
70
|
-
@s3_client ||= Aws::S3::Client.new
|
71
|
-
end
|
72
|
-
|
73
147
|
private
|
74
148
|
|
75
|
-
def init_s3_file(*args, bucket:, **kargs)
|
149
|
+
def init_s3_file(*args, credentials: {}, bucket:, kms_opt: nil, **kargs)
|
150
|
+
@region = credentials.fetch(:region, 'us-west-2')
|
151
|
+
@kms_opt = kms_opt
|
152
|
+
init_aws_credentials(credentials)
|
153
|
+
|
76
154
|
@bucket_name = bucket
|
77
155
|
end
|
156
|
+
end
|
157
|
+
|
158
|
+
|
159
|
+
|
160
|
+
# S3 File loader
|
161
|
+
# Used to post files to Amazon S3
|
162
|
+
#
|
163
|
+
# @example Standard use
|
164
|
+
#
|
165
|
+
# class MyJob < Remi::Job
|
166
|
+
# target :some_file do
|
167
|
+
# encoder Remi::Encoder::CsvFile.new
|
168
|
+
# loader Remi::Loader::S3File.new(
|
169
|
+
# credentials: {
|
170
|
+
# aws_access_key_id: ENV['AWS_ACCESS_KEY_ID'],
|
171
|
+
# aws_secret_access_key: ENV['AWS_SECRET_ACCESS_KEY'],
|
172
|
+
# region: 'us-west-2'
|
173
|
+
# },
|
174
|
+
# bucket: 'itk-de-archive',
|
175
|
+
# remote_path: 'awesome.csv'
|
176
|
+
# )
|
177
|
+
# end
|
178
|
+
# end
|
179
|
+
#
|
180
|
+
# job = MyJob.new
|
181
|
+
# job.some_file.df = Daru::DataFrame.new(
|
182
|
+
# {
|
183
|
+
# numbers: [1,2,3],
|
184
|
+
# words: ['one', 'two', 'three']
|
185
|
+
# }
|
186
|
+
# )
|
187
|
+
# job.some_file.load
|
188
|
+
#
|
189
|
+
# @example Using AWS KMS
|
190
|
+
# To use AWS KMS, supply a :ciphertext and optional :algorithm (default is AES256).
|
191
|
+
# The encrypted key stored in the ciphertext must be the same as that used for reading the file.
|
192
|
+
#
|
193
|
+
# class MyJob < Remi::Job
|
194
|
+
# target :some_file do
|
195
|
+
# encoder Remi::Encoder::CsvFile.new
|
196
|
+
# loader Remi::Loader::S3File.new(
|
197
|
+
# credentials: {
|
198
|
+
# aws_access_key_id: ENV['AWS_ACCESS_KEY_ID'],
|
199
|
+
# aws_secret_access_key: ENV['AWS_SECRET_ACCESS_KEY'],
|
200
|
+
# region: 'us-west-2'
|
201
|
+
# },
|
202
|
+
# bucket: 'itk-de-archive',
|
203
|
+
# remote_path: 'awesome.csv',
|
204
|
+
# kms_opt: {
|
205
|
+
# ciphertext: '<base64-encoded ciphertext>'
|
206
|
+
# }
|
207
|
+
# )
|
208
|
+
# end
|
209
|
+
# end
|
210
|
+
#
|
211
|
+
# @example Generating a ciphertext
|
212
|
+
# A ciphertext can be generated using the AWS SDK
|
213
|
+
#
|
214
|
+
# require 'aws-sdk'
|
215
|
+
# require 'base64'
|
216
|
+
#
|
217
|
+
# aws_credentials = Aws::Credentials.new(
|
218
|
+
# ENV['AWS_ACCESS_KEY_ID'],
|
219
|
+
# ENV['AWS_SECRET_ACCESS_KEY']
|
220
|
+
# )
|
221
|
+
#
|
222
|
+
# kms = Aws::KMS::Client.new(
|
223
|
+
# region: 'us-west-2',
|
224
|
+
# credentials: aws_credentials
|
225
|
+
# )
|
226
|
+
#
|
227
|
+
# # See AWS docs for creating keys: http://docs.aws.amazon.com/kms/latest/developerguide/create-keys.html
|
228
|
+
# data_key = kms.generate_data_key(
|
229
|
+
# key_id: 'alias/alias-of-kms-key',
|
230
|
+
# key_spec: 'AES_256'
|
231
|
+
# )
|
232
|
+
#
|
233
|
+
# ciphertext = Base64.strict_encode64(data_key.ciphertext_blob)
|
234
|
+
# #=> "AQIDAHjmmRVcBAdMHsA9VUoJKgbW8niK2qL1qPcQ2OWEUlh5XAFw0vfl+QIgawB8cbAZ2OqXAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMIUIFFh++2w4d9al7AgEQgDvSRXQCOPLSMOjRS/lM5uxuyRV47qInlKKBIezIaYzXuFu1sRU+L46HqRyS0XqR4flFJ/fc8yEj3pU1UA=="
|
235
|
+
class Loader::S3File < Loader
|
236
|
+
include Remi::DataSubject::S3File
|
237
|
+
|
238
|
+
# @param bucket [String] Name of S3 bucket containing the files
|
239
|
+
# @param kms_opt [Hash] Hash containing AWS KMS options
|
240
|
+
# @param credentials [Hash] Hash containing AWS credentials (must contain :aws_access_key_id, :aws_secret_access_key, :region)
|
241
|
+
def initialize(*args, **kargs, &block)
|
242
|
+
super
|
243
|
+
init_s3_loader(*args, **kargs, &block)
|
244
|
+
end
|
245
|
+
|
246
|
+
attr_reader :remote_path
|
247
|
+
attr_reader :bucket_name
|
78
248
|
|
79
|
-
|
80
|
-
|
249
|
+
# Copies data to S3
|
250
|
+
# @param data [Object] The path to the file in the temporary work location
|
251
|
+
# @return [true] On success
|
252
|
+
def load(data)
|
253
|
+
init_kms(@kms_opt)
|
254
|
+
|
255
|
+
@logger.info "Writing file #{data} to S3 #{@bucket_name} as #{@remote_path}"
|
256
|
+
s3.bucket(@bucket_name).object(@remote_path).upload_file(data, encrypt_args)
|
257
|
+
true
|
81
258
|
end
|
82
259
|
|
260
|
+
private
|
261
|
+
|
262
|
+
def init_s3_loader(*args, credentials:{}, bucket:, remote_path:, kms_opt: nil, **kargs, &block)
|
263
|
+
@region = credentials.fetch(:region, 'us-west-2')
|
264
|
+
@kms_opt = kms_opt
|
265
|
+
init_aws_credentials(credentials)
|
266
|
+
|
267
|
+
@bucket_name = bucket
|
268
|
+
@remote_path = remote_path
|
269
|
+
end
|
83
270
|
end
|
84
271
|
end
|
@@ -185,11 +185,11 @@ module Remi
|
|
185
185
|
if @operation == :update
|
186
186
|
Remi::SfBulkHelper::SfBulkUpdate.update(restforce_client, @sfo, data, batch_size: @batch_size, logger: logger)
|
187
187
|
elsif @operation == :create
|
188
|
-
Remi::SfBulkHelper::SfBulkCreate.create(restforce_client, @sfo, data, batch_size: @batch_size, logger: logger)
|
188
|
+
Remi::SfBulkHelper::SfBulkCreate.create(restforce_client, @sfo, data, batch_size: @batch_size, max_attempts: 1, logger: logger)
|
189
189
|
elsif @operation == :upsert
|
190
190
|
Remi::SfBulkHelper::SfBulkUpsert.upsert(restforce_client, @sfo, data, batch_size: @batch_size, external_id: @external_id, logger: logger)
|
191
191
|
elsif @operation == :delete
|
192
|
-
Remi::SfBulkHelper::SfBulkDelete.
|
192
|
+
Remi::SfBulkHelper::SfBulkDelete.delete(restforce_client, @sfo, data, batch_size: @batch_size, logger: logger)
|
193
193
|
else
|
194
194
|
raise ArgumentError, "Unknown operation: #{@operation}"
|
195
195
|
end
|
@@ -79,7 +79,9 @@ module Remi
|
|
79
79
|
end
|
80
80
|
|
81
81
|
merge_id = Array(row.delete(@merge_id_field))
|
82
|
-
|
82
|
+
merge_row = row.select { |_, v| !v.blank? }
|
83
|
+
logger.info "Merging Id #{merge_id} into #{merge_row}"
|
84
|
+
soapforce_client.merge!(@sfo, merge_row, merge_id)
|
83
85
|
end
|
84
86
|
else
|
85
87
|
raise ArgumentError, "Unknown soap operation: #{@operation}"
|
@@ -1,4 +1,44 @@
|
|
1
1
|
module Remi
|
2
|
+
module DataSubject::SftpFile
|
3
|
+
|
4
|
+
attr_reader :sftp_session
|
5
|
+
|
6
|
+
def sftp_retry(&block)
|
7
|
+
tries ||= @retries
|
8
|
+
|
9
|
+
block.call
|
10
|
+
rescue StandardError => err
|
11
|
+
if (tries -= 1) > 0
|
12
|
+
logger.error "Error: #{err.message}"
|
13
|
+
logger.error "Will retry #{tries} more times"
|
14
|
+
sleep(1)
|
15
|
+
retry
|
16
|
+
else
|
17
|
+
raise err
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def begin_connection
|
22
|
+
sftp_retry do
|
23
|
+
Timeout.timeout(@timeout) do
|
24
|
+
@ssh_session = Net::SSH.start(@host, @username, password: @password, port: @port, number_of_password_prompts: 0)
|
25
|
+
@sftp_session = Net::SFTP::Session.new(@ssh_session)
|
26
|
+
@sftp_session.connect!
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def end_connection
|
32
|
+
@sftp_session.close_channel unless @sftp_session.nil?
|
33
|
+
@ssh_session.close unless @ssh_session.nil?
|
34
|
+
|
35
|
+
Timeout.timeout(@timeout) do
|
36
|
+
sleep 1 until (@sftp_session.nil? || @sftp_session.closed?) && (@ssh_session.nil? || @ssh_session.closed?)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
|
2
42
|
|
3
43
|
# Sftp File extractor
|
4
44
|
# Used to extract files from an SFTP server
|
@@ -35,13 +75,15 @@ module Remi
|
|
35
75
|
# # 1 2 Betsy
|
36
76
|
# # 2 3 Camu
|
37
77
|
class Extractor::SftpFile < Extractor::FileSystem
|
38
|
-
|
78
|
+
include DataSubject::SftpFile
|
39
79
|
|
40
80
|
# @param credentials [Hash] Options hash containing login credentials
|
41
81
|
# @param credentials [String] :host SFTP host (e.g., coolserver.com)
|
42
82
|
# @param credentials [String] :username SFTP username
|
43
83
|
# @param credentials [String] :password SFTP password
|
44
84
|
# @param credentials [String] :port SFTP port (default: 22)
|
85
|
+
# @param retries [Integer] Number of times a connection or operation will be retried (default: 3)
|
86
|
+
# @param timeout [Integer] Number of seconds to wait for establishing/closing a connection (default: 30)
|
45
87
|
def initialize(*args, **kargs, &block)
|
46
88
|
super
|
47
89
|
init_sftp_extractor(*args, **kargs)
|
@@ -55,15 +97,16 @@ module Remi
|
|
55
97
|
# Called to extract files from the source filesystem.
|
56
98
|
# @return [Array<String>] An array of paths to a local copy of the files extacted
|
57
99
|
def extract
|
58
|
-
|
59
|
-
entries.map do |entry|
|
60
|
-
local_file = File.join(@local_path, entry.name)
|
61
|
-
logger.info "Downloading #{entry.name} to #{local_file}"
|
62
|
-
retry_download { sftp.download!(File.join(@remote_path, entry.name), local_file) }
|
63
|
-
local_file
|
100
|
+
begin_connection
|
64
101
|
|
65
|
-
|
102
|
+
entries.map do |entry|
|
103
|
+
local_file = File.join(@local_path, entry.name)
|
104
|
+
logger.info "Downloading #{entry.name} to #{local_file}"
|
105
|
+
sftp_retry { sftp_session.download!(File.join(@remote_path, entry.name), local_file) }
|
106
|
+
local_file
|
66
107
|
end
|
108
|
+
ensure
|
109
|
+
end_connection
|
67
110
|
end
|
68
111
|
|
69
112
|
# @return [Array<Extractor::FileSystemEntry>] (Memoized) list of objects in the bucket/prefix
|
@@ -73,8 +116,7 @@ module Remi
|
|
73
116
|
|
74
117
|
# @return [Array<Extractor::FileSystemEntry>] (Memoized) list of objects in the bucket/prefix
|
75
118
|
def all_entries!
|
76
|
-
|
77
|
-
sftp_entries.map do |entry|
|
119
|
+
sftp_session.dir.entries(@remote_path).map do |entry|
|
78
120
|
# Early versions of the protocol don't support create time, fake it with modified time?
|
79
121
|
FileSystemEntry.new(
|
80
122
|
pathname: File.join(@remote_path, entry.name),
|
@@ -87,33 +129,13 @@ module Remi
|
|
87
129
|
|
88
130
|
private
|
89
131
|
|
90
|
-
def init_sftp_extractor(*args, credentials:, **kargs)
|
132
|
+
def init_sftp_extractor(*args, credentials:, retries: 3, timeout: 30, **kargs)
|
91
133
|
@host = credentials.fetch(:host)
|
92
134
|
@username = credentials.fetch(:username)
|
93
|
-
@password = credentials.fetch(:password)
|
135
|
+
@password = credentials.fetch(:password, nil)
|
94
136
|
@port = credentials.fetch(:port, '22')
|
95
|
-
|
96
|
-
|
97
|
-
def connection(&block)
|
98
|
-
result = nil
|
99
|
-
Net::SFTP.start(@host, @username, password: @password, port: @port) do |sftp|
|
100
|
-
result = yield sftp
|
101
|
-
end
|
102
|
-
result
|
103
|
-
end
|
104
|
-
|
105
|
-
def retry_download(&block)
|
106
|
-
1.upto(N_RETRY).each do |itry|
|
107
|
-
begin
|
108
|
-
block.call
|
109
|
-
break
|
110
|
-
rescue RuntimeError => err
|
111
|
-
raise err unless itry < N_RETRY
|
112
|
-
logger.error "Download failed with error: #{err.message}"
|
113
|
-
logger.error "Retry attempt #{itry}/#{N_RETRY-1}"
|
114
|
-
sleep(1)
|
115
|
-
end
|
116
|
-
end
|
137
|
+
@retries = retries
|
138
|
+
@timeout = timeout
|
117
139
|
end
|
118
140
|
end
|
119
141
|
|
@@ -143,8 +165,16 @@ module Remi
|
|
143
165
|
# job.my_target.df = my_df
|
144
166
|
# job.my_target.load
|
145
167
|
class Loader::SftpFile < Loader
|
168
|
+
include DataSubject::SftpFile
|
146
169
|
|
170
|
+
# @param credentials [Hash] Options hash containing login credentials
|
171
|
+
# @param credentials [String] :host SFTP host (e.g., coolserver.com)
|
172
|
+
# @param credentials [String] :username SFTP username
|
173
|
+
# @param credentials [String] :password SFTP password
|
174
|
+
# @param credentials [String] :port SFTP port (default: 22)
|
147
175
|
# @param remote_path [String, Pathname] Full path to the file to be created on the target filesystem
|
176
|
+
# @param retries [Integer] Number of times a connection or operation will be retried (default: 3)
|
177
|
+
# @param timeout [Integer] Number of seconds to wait for establishing/closing a connection (default: 30)
|
148
178
|
def initialize(*args, **kargs, &block)
|
149
179
|
super
|
150
180
|
init_sftp_loader(*args, **kargs, &block)
|
@@ -156,42 +186,27 @@ module Remi
|
|
156
186
|
# @param data [Object] The path to the file in the temporary work location
|
157
187
|
# @return [true] On success
|
158
188
|
def load(data)
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
189
|
+
begin_connection
|
190
|
+
|
191
|
+
logger.info "Uploading #{data} to #{@username}@#{@host}: #{@remote_path}"
|
192
|
+
sftp_retry { sftp_session.upload! data, @remote_path }
|
163
193
|
|
164
194
|
true
|
195
|
+
ensure
|
196
|
+
end_connection
|
165
197
|
end
|
166
198
|
|
167
199
|
|
168
200
|
private
|
169
201
|
|
170
|
-
def init_sftp_loader(*args, credentials:, remote_path:, **kargs, &block)
|
171
|
-
@
|
202
|
+
def init_sftp_loader(*args, credentials:, remote_path:, retries: 3, timeout: 30, **kargs, &block)
|
203
|
+
@host = credentials.fetch(:host)
|
204
|
+
@username = credentials.fetch(:username)
|
205
|
+
@password = credentials.fetch(:password, nil)
|
206
|
+
@port = credentials.fetch(:port, '22')
|
172
207
|
@remote_path = remote_path
|
173
|
-
|
174
|
-
|
175
|
-
def connection(&block)
|
176
|
-
result = nil
|
177
|
-
Net::SFTP.start(@credentials[:host], @credentials[:username], password: @credentials[:password], port: @credentials[:port] || '22') do |sftp|
|
178
|
-
result = yield sftp
|
179
|
-
end
|
180
|
-
result
|
181
|
-
end
|
182
|
-
|
183
|
-
def retry_upload(ntry=2, &block)
|
184
|
-
1.upto(ntry).each do |itry|
|
185
|
-
begin
|
186
|
-
block.call
|
187
|
-
break
|
188
|
-
rescue RuntimeError => err
|
189
|
-
raise err unless itry < ntry
|
190
|
-
logger.error "Upload failed with error: #{err.message}"
|
191
|
-
logger.error "Retry attempt #{itry}/#{ntry-1}"
|
192
|
-
sleep(1)
|
193
|
-
end
|
194
|
-
end
|
208
|
+
@retries = retries
|
209
|
+
@timeout = timeout
|
195
210
|
end
|
196
211
|
end
|
197
212
|
end
|
data/lib/remi/job/transform.rb
CHANGED
@@ -15,6 +15,7 @@ module Remi
|
|
15
15
|
# end
|
16
16
|
# tform.execute
|
17
17
|
class Transform
|
18
|
+
class IncompatibleTargetIndexError < StandardError; end
|
18
19
|
|
19
20
|
FieldMap = Struct.new(:from_subject, :to_subject, :field_from_to)
|
20
21
|
|
@@ -152,6 +153,19 @@ module Remi
|
|
152
153
|
sub_trans_ds = field_map.from_subject
|
153
154
|
fields_to_map = field_map.field_from_to.keys
|
154
155
|
|
156
|
+
job_idx = job_ds.df.index.to_a
|
157
|
+
sub_idx = sub_trans_ds.df.index.to_a
|
158
|
+
diff = ((job_idx | sub_idx) - (job_idx & sub_idx))
|
159
|
+
if job_idx.size > 0 && diff.size > 0 then
|
160
|
+
msg = <<-EOT
|
161
|
+
Incompatible target index!
|
162
|
+
Sub transform target #{sub_trans_ds.name} index is #{sub_trans_ds.df.index.inspect}
|
163
|
+
Job transform target #{job_ds.name} index is #{job_ds.df.index.inspect}
|
164
|
+
EOT
|
165
|
+
raise IncompatibleTargetIndexError.new msg
|
166
|
+
end
|
167
|
+
|
168
|
+
|
155
169
|
fields_to_map.each do |sub_trans_field|
|
156
170
|
job_field = field_map.field_from_to[sub_trans_field]
|
157
171
|
job_ds.fields[job_field].merge! sub_trans_ds.fields[sub_trans_field]
|
@@ -128,10 +128,13 @@ module Remi
|
|
128
128
|
|
129
129
|
# Private: Converts the transformed data into vectors in the target dataframe.
|
130
130
|
def map_to_target_df
|
131
|
+
index = @target_df.index.size > 0 ? @target_df.index : @source_df.index
|
132
|
+
|
131
133
|
result_hash_of_arrays.each do |vector, values|
|
132
|
-
@target_df[vector] = Daru::Vector.new(values, index:
|
134
|
+
@target_df[vector] = Daru::Vector.new(values, index: index)
|
133
135
|
end
|
134
136
|
|
137
|
+
@target_df.index = index
|
135
138
|
@target_df
|
136
139
|
end
|
137
140
|
|
data/lib/remi/transform.rb
CHANGED
data/lib/remi/version.rb
CHANGED
@@ -14,7 +14,11 @@ describe Parser::None do
|
|
14
14
|
let(:parser) { Parser::None.new }
|
15
15
|
|
16
16
|
context '#parse' do
|
17
|
-
it 'returns
|
17
|
+
it 'returns an empty dataframe when given no data' do
|
18
|
+
expect(parser.parse.to_a).to eq Remi::DataFrame::Daru.new([]).to_a
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'returns an what it was given' do
|
18
22
|
expect(parser.parse('some data')).to eq 'some data'
|
19
23
|
end
|
20
24
|
end
|
@@ -9,14 +9,23 @@ describe Extractor::S3File do
|
|
9
9
|
}
|
10
10
|
|
11
11
|
prefix = "the-best-prefix"
|
12
|
-
|
13
|
-
|
12
|
+
credentials = {
|
13
|
+
aws_access_key_id: 'BLAH',
|
14
|
+
aws_secret_access_key: 'DEBLAH'
|
15
|
+
}
|
16
|
+
|
17
|
+
@s3_file = Extractor::S3File.new(
|
18
|
+
bucket: 'the-best-bucket',
|
19
|
+
credentials: credentials,
|
20
|
+
remote_path: "#{prefix}"
|
21
|
+
)
|
22
|
+
|
23
|
+
@s3_file.s3.client.stub_responses(:list_objects, {
|
14
24
|
contents: [
|
15
25
|
{ key: "#{prefix}/file1.csv" },
|
16
26
|
{ key: "#{prefix}/file2.csv" }
|
17
27
|
]
|
18
28
|
})
|
19
|
-
|
20
29
|
end
|
21
30
|
|
22
31
|
it 'returns all entries' do
|
@@ -45,7 +45,7 @@ describe Loader::SalesforceSoap do
|
|
45
45
|
{ Id: '1234', Custom__c: 'something', Merge_Id: '5678' }
|
46
46
|
]
|
47
47
|
|
48
|
-
expect(soapforce_client).to receive(:merge) do
|
48
|
+
expect(soapforce_client).to receive(:merge!) do
|
49
49
|
[
|
50
50
|
:Contact,
|
51
51
|
{
|
@@ -65,7 +65,25 @@ describe Loader::SalesforceSoap do
|
|
65
65
|
{ Id: '2', Custom__c: 'something', Merge_Id: '20' }
|
66
66
|
]
|
67
67
|
|
68
|
-
expect(soapforce_client).to receive(:merge).twice
|
68
|
+
expect(soapforce_client).to receive(:merge!).twice
|
69
|
+
loader.load(data)
|
70
|
+
end
|
71
|
+
|
72
|
+
it 'excludes blank data fields from the merge command' do
|
73
|
+
data = [
|
74
|
+
{ Id: '1234', Custom__c: '', Merge_Id: '5678' }
|
75
|
+
]
|
76
|
+
|
77
|
+
expect(soapforce_client).to receive(:merge!) do
|
78
|
+
[
|
79
|
+
:Contact,
|
80
|
+
{
|
81
|
+
Id: '1234'
|
82
|
+
},
|
83
|
+
['5678']
|
84
|
+
]
|
85
|
+
end
|
86
|
+
|
69
87
|
loader.load(data)
|
70
88
|
end
|
71
89
|
|
@@ -76,5 +94,4 @@ describe Loader::SalesforceSoap do
|
|
76
94
|
|
77
95
|
expect { loader.load(data) }.to raise_error KeyError
|
78
96
|
end
|
79
|
-
|
80
97
|
end
|
@@ -10,21 +10,25 @@ describe Extractor::SftpFile do
|
|
10
10
|
}
|
11
11
|
}
|
12
12
|
|
13
|
-
|
13
|
+
def generate_extractor
|
14
14
|
Extractor::SftpFile.new(
|
15
15
|
credentials: credentials,
|
16
16
|
remote_path: remote_path
|
17
17
|
)
|
18
|
-
|
18
|
+
end
|
19
|
+
|
20
|
+
let(:extractor) { generate_extractor }
|
19
21
|
|
20
22
|
let(:remote_filenames) { ['file1.csv', 'file2.csv'] }
|
21
|
-
let(:sftp_session) { instance_double('Net:SFTP::Session') }
|
22
23
|
|
23
24
|
before do
|
24
|
-
|
25
|
+
allow(extractor).to receive(:begin_connection)
|
25
26
|
|
26
|
-
|
27
|
-
allow(
|
27
|
+
sftp_session = double('sftp_session')
|
28
|
+
allow(extractor).to receive(:sftp_session).and_return(sftp_session)
|
29
|
+
|
30
|
+
sftp_dir = instance_double('Net::SFTP::Operations::Dir')
|
31
|
+
allow(sftp_session).to receive(:dir).and_return(sftp_dir)
|
28
32
|
|
29
33
|
allow(sftp_dir).to receive(:entries).and_return(remote_filenames.map { |fname|
|
30
34
|
Net::SFTP::Protocol::V04::Name.new(
|
@@ -36,65 +40,76 @@ describe Extractor::SftpFile do
|
|
36
40
|
|
37
41
|
context '.new' do
|
38
42
|
it 'creates an instance with valid parameters' do
|
39
|
-
|
43
|
+
extractor
|
40
44
|
end
|
41
45
|
|
42
46
|
it 'requires a hostname' do
|
43
47
|
credentials.delete(:host)
|
44
|
-
expect {
|
48
|
+
expect { generate_extractor }.to raise_error KeyError
|
45
49
|
end
|
46
50
|
|
47
51
|
it 'requires a username' do
|
48
52
|
credentials.delete(:username)
|
49
|
-
expect {
|
53
|
+
expect { generate_extractor }.to raise_error KeyError
|
50
54
|
end
|
51
55
|
|
52
|
-
it '
|
56
|
+
it 'does not require a password' do # If empty, it will use private keys
|
53
57
|
credentials.delete(:password)
|
54
|
-
expect {
|
58
|
+
expect { generate_extractor }.not_to raise_error
|
55
59
|
end
|
56
60
|
|
57
61
|
it 'defaults to using port 22' do
|
58
|
-
expect(
|
62
|
+
expect(extractor.port).to eq '22'
|
59
63
|
end
|
60
64
|
|
61
65
|
it 'allows the port to be defined in the credentials' do
|
62
66
|
credentials[:port] = '1234'
|
63
|
-
expect(
|
67
|
+
expect(generate_extractor.port).to eq '1234'
|
64
68
|
end
|
65
69
|
end
|
66
70
|
|
67
71
|
context '#all_entires' do
|
68
72
|
it 'returns all entries' do
|
69
|
-
expect(
|
73
|
+
expect(extractor.all_entries.map(&:name)).to eq remote_filenames
|
70
74
|
end
|
71
75
|
end
|
72
76
|
|
73
77
|
context '#extract' do
|
74
78
|
it 'downloads files from the ftp' do
|
75
|
-
expect(sftp_session).to receive(:download!).exactly(remote_filenames.size).times
|
76
|
-
|
79
|
+
expect(extractor.sftp_session).to receive(:download!).exactly(remote_filenames.size).times
|
80
|
+
extractor.extract
|
77
81
|
end
|
78
82
|
|
79
83
|
it 'creates local files with the right names' do
|
80
|
-
allow(sftp_session).to receive(:download!)
|
81
|
-
expect(
|
84
|
+
allow(extractor.sftp_session).to receive(:download!)
|
85
|
+
expect(extractor.extract.map { |f| Pathname.new(f).basename.to_s }).to eq remote_filenames
|
82
86
|
end
|
83
87
|
end
|
84
88
|
end
|
85
89
|
|
86
90
|
|
87
91
|
describe Loader::SftpFile do
|
88
|
-
|
92
|
+
|
93
|
+
let(:credentials) {
|
94
|
+
{
|
95
|
+
host: 'host',
|
96
|
+
username: 'username',
|
97
|
+
password: 'password'
|
98
|
+
}
|
99
|
+
}
|
100
|
+
|
101
|
+
let(:loader) { Loader::SftpFile.new(credentials: credentials, remote_path: 'some_path') }
|
89
102
|
let(:data) { double('some_data') }
|
90
|
-
let(:sftp_session) { instance_double('Net:SFTP::Session') }
|
91
103
|
|
92
104
|
before do
|
93
|
-
allow(
|
105
|
+
allow(loader).to receive(:begin_connection)
|
106
|
+
|
107
|
+
sftp_session = double('sftp_session')
|
108
|
+
allow(loader).to receive(:sftp_session).and_return(sftp_session)
|
94
109
|
end
|
95
110
|
|
96
111
|
it 'loads a csv to a target sftp filesystem' do
|
97
|
-
expect(sftp_session).to receive(:upload!).with(data, 'some_path')
|
112
|
+
expect(loader.sftp_session).to receive(:upload!).with(data, 'some_path')
|
98
113
|
loader.load data
|
99
114
|
end
|
100
115
|
end
|
data/spec/job/transform_spec.rb
CHANGED
@@ -253,5 +253,89 @@ describe Job do
|
|
253
253
|
my_transform.execute
|
254
254
|
end
|
255
255
|
end
|
256
|
+
|
257
|
+
describe '#import - edge cases' do
|
258
|
+
before do
|
259
|
+
class MyJob
|
260
|
+
source :job_source do
|
261
|
+
fields({ :id => {}, :name => {} })
|
262
|
+
end
|
263
|
+
target :job_target do
|
264
|
+
fields({ :id => {}, :name => {}, :funny_name => {} })
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
job.job_source.df = Remi::DataFrame::Daru.new({
|
269
|
+
id: [1, 2, 3],
|
270
|
+
name: ['one', 'two', 'three']
|
271
|
+
})
|
272
|
+
end
|
273
|
+
|
274
|
+
it 'correctly maps back to a source if the sub transform sorts the data' do
|
275
|
+
sub_transform = Job::Transform.new('arbitrary') do
|
276
|
+
source :st_source, [:id, :name]
|
277
|
+
target :st_target, [:funny_name]
|
278
|
+
|
279
|
+
st_source.df.sort!([:id], ascending: [false])
|
280
|
+
|
281
|
+
Remi::SourceToTargetMap.apply(st_source.df, st_target.df) do
|
282
|
+
map source(:name) .target(:funny_name)
|
283
|
+
.transform(->(v) { "funny-#{v}" })
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
my_transform = Job::Transform.new(job) do
|
288
|
+
import sub_transform do
|
289
|
+
map_source_fields :job_source, :st_source, {
|
290
|
+
:id => :id,
|
291
|
+
:name => :name
|
292
|
+
}
|
293
|
+
map_target_fields :st_target, :job_source, {
|
294
|
+
:funny_name => :funny_name
|
295
|
+
}
|
296
|
+
end
|
297
|
+
|
298
|
+
job.job_target.df = job.job_source.df.dup
|
299
|
+
end
|
300
|
+
|
301
|
+
my_transform.execute
|
302
|
+
expect(job.job_target.df[:funny_name].to_a).to eq(
|
303
|
+
job.job_target.df[:name].to_a.map { |v| "funny-#{v}" }
|
304
|
+
)
|
305
|
+
end
|
306
|
+
|
307
|
+
it 'raises an error if the subtransform fucks with index', wip: true do
|
308
|
+
sub_transform = Job::Transform.new('arbitrary') do
|
309
|
+
source :st_source, [:id, :name]
|
310
|
+
target :st_target, [:funny_name]
|
311
|
+
|
312
|
+
duplicated_df = Daru::DataFrame.new({ id: Array(st_source.df[:id][0]) * 3 })
|
313
|
+
|
314
|
+
st_source.df = st_source.df.join(duplicated_df, on: [:id], how: :left)
|
315
|
+
|
316
|
+
Remi::SourceToTargetMap.apply(st_source.df, st_target.df) do
|
317
|
+
map source(:name) .target(:funny_name)
|
318
|
+
.transform(->(v) { "funny-#{v}" })
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
my_transform = Job::Transform.new(job) do
|
323
|
+
import sub_transform do
|
324
|
+
map_source_fields :job_source, :st_source, {
|
325
|
+
:id => :id,
|
326
|
+
:name => :name
|
327
|
+
}
|
328
|
+
map_target_fields :st_target, :job_source, {
|
329
|
+
:funny_name => :funny_name
|
330
|
+
}
|
331
|
+
end
|
332
|
+
|
333
|
+
job.job_target.df = job.job_source.df.dup
|
334
|
+
end
|
335
|
+
|
336
|
+
expect { my_transform.execute }.to raise_error Job::Transform::IncompatibleTargetIndexError
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
256
340
|
end
|
257
341
|
end
|
@@ -298,4 +298,34 @@ describe SourceToTargetMap do
|
|
298
298
|
expect(sttm).to be_a(Remi::DataFrame::Daru)
|
299
299
|
end
|
300
300
|
end
|
301
|
+
|
302
|
+
describe 'source and target dataframes differ', wip: true do
|
303
|
+
it 'does not fail when the dataframe has been filtered' do
|
304
|
+
some_df = Daru::DataFrame.new(
|
305
|
+
{
|
306
|
+
:id => [1,2,3,4,5],
|
307
|
+
:something => ['x','','x','','x'],
|
308
|
+
:name => ['one', 'two', 'three', 'four', 'five']
|
309
|
+
}
|
310
|
+
)
|
311
|
+
|
312
|
+
filtered_df = some_df.where(some_df[:something].eq('x'))
|
313
|
+
target_df = Remi::DataFrame::Daru.new([])
|
314
|
+
|
315
|
+
Remi::SourceToTargetMap.apply(filtered_df, target_df) do
|
316
|
+
map source(:id) .target(:id)
|
317
|
+
map source(:name) .target(:name)
|
318
|
+
end
|
319
|
+
|
320
|
+
result = target_df[:id, :name].to_h.each_with_object({}) { |(k,v), h| h[k] = v.to_a }
|
321
|
+
expect(result).to eq({
|
322
|
+
:id => [1, 3, 5],
|
323
|
+
:name => ['one', 'three', 'five']
|
324
|
+
})
|
325
|
+
end
|
326
|
+
|
327
|
+
|
328
|
+
end
|
329
|
+
|
330
|
+
|
301
331
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sterling Paramore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-06-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bond
|
@@ -231,6 +231,7 @@ files:
|
|
231
231
|
- features/json.feature
|
232
232
|
- features/metadata.feature
|
233
233
|
- features/parameters.feature
|
234
|
+
- features/s3_file_target_job.feature
|
234
235
|
- features/sample_job.feature
|
235
236
|
- features/sftp_file_target_job.feature
|
236
237
|
- features/step_definitions/remi_step.rb
|
@@ -254,6 +255,7 @@ files:
|
|
254
255
|
- jobs/json_job.rb
|
255
256
|
- jobs/metadata_job.rb
|
256
257
|
- jobs/parameters_job.rb
|
258
|
+
- jobs/s3_file_target_job.rb
|
257
259
|
- jobs/sample_job.rb
|
258
260
|
- jobs/sftp_file_target_job.rb
|
259
261
|
- jobs/sub_job_example_job.rb
|
@@ -372,6 +374,7 @@ test_files:
|
|
372
374
|
- features/json.feature
|
373
375
|
- features/metadata.feature
|
374
376
|
- features/parameters.feature
|
377
|
+
- features/s3_file_target_job.feature
|
375
378
|
- features/sample_job.feature
|
376
379
|
- features/sftp_file_target_job.feature
|
377
380
|
- features/step_definitions/remi_step.rb
|