remi 0.2.27 → 0.2.28

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/Gemfile.lock +34 -5
  4. data/features/metadata.feature +17 -0
  5. data/features/step_definitions/remi_step.rb +6 -6
  6. data/features/transforms/date_diff.feature +1 -0
  7. data/jobs/aggregate_job.rb +0 -1
  8. data/jobs/all_jobs_shared.rb +0 -2
  9. data/jobs/copy_source_job.rb +0 -1
  10. data/jobs/csv_file_target_job.rb +0 -1
  11. data/jobs/metadata_job.rb +60 -0
  12. data/jobs/parameters_job.rb +1 -1
  13. data/jobs/sample_job.rb +19 -20
  14. data/jobs/sftp_file_target_job.rb +0 -1
  15. data/jobs/transforms/date_diff_job.rb +1 -1
  16. data/jobs/transforms/nvl_job.rb +1 -1
  17. data/jobs/transforms/parse_date_job.rb +7 -4
  18. data/jobs/transforms/prefix_job.rb +1 -1
  19. data/jobs/transforms/truncate_job.rb +1 -1
  20. data/lib/remi.rb +10 -15
  21. data/lib/remi/cucumber/business_rules.rb +23 -23
  22. data/lib/remi/cucumber/data_source.rb +2 -1
  23. data/lib/remi/data_frame.rb +36 -0
  24. data/lib/remi/data_frame/daru.rb +67 -0
  25. data/lib/remi/data_subject.rb +71 -10
  26. data/lib/remi/data_subject/csv_file.rb +151 -0
  27. data/lib/remi/data_subject/data_frame.rb +53 -0
  28. data/lib/remi/data_subject/postgres.rb +136 -0
  29. data/lib/remi/data_subject/salesforce.rb +136 -0
  30. data/lib/remi/data_subject/sftp_file.rb +66 -0
  31. data/lib/remi/fields.rb +8 -0
  32. data/lib/remi/source_to_target_map.rb +56 -32
  33. data/lib/remi/transform.rb +426 -83
  34. data/lib/remi/version.rb +1 -1
  35. data/remi.gemspec +2 -1
  36. data/spec/metadata_spec.rb +62 -0
  37. metadata +15 -28
  38. data/lib/remi/data_source.rb +0 -13
  39. data/lib/remi/data_source/csv_file.rb +0 -101
  40. data/lib/remi/data_source/data_frame.rb +0 -16
  41. data/lib/remi/data_source/postgres.rb +0 -58
  42. data/lib/remi/data_source/salesforce.rb +0 -87
  43. data/lib/remi/data_target.rb +0 -15
  44. data/lib/remi/data_target/csv_file.rb +0 -42
  45. data/lib/remi/data_target/data_frame.rb +0 -14
  46. data/lib/remi/data_target/postgres.rb +0 -74
  47. data/lib/remi/data_target/salesforce.rb +0 -54
  48. data/lib/remi/data_target/sftp_file.rb +0 -54
  49. data/lib/remi/refinements/daru.rb +0 -85
@@ -1,152 +1,495 @@
1
1
  module Remi
2
- module Transform
3
- extend self
2
+ class Transform
4
3
 
5
- def [](meth)
6
- method(meth)
4
+ # Public: Initializes the static arguments of a transform.
5
+ #
6
+ # source_metadata - Metadata for the transform source.
7
+ # target_metadata - Metadata for the transform target.
8
+ def initialize(*args, source_metadata: {}, target_metadata: {}, **kargs, &block)
9
+ @source_metadata = source_metadata
10
+ @target_metadata = target_metadata
11
+ @multi_args = false
7
12
  end
8
13
 
9
- # We need to memoize each lambda with its static arguments so it's not recreated each row.
10
- # Inspired by parameter memoization in http://www.justinweiss.com/articles/4-simple-memoization-patterns-in-ruby-and-one-gem/
11
- def memoize_as_lambda(func, *args, &block)
12
- iv = instance_variable_get("@#{func}")
13
- return iv[args] if iv
14
+ # Public: Accessor for source metadata
15
+ attr_accessor :source_metadata
14
16
 
15
- hash_memo = Hash.new do |h, margs|
16
- h[margs] = lambda { |*largs| block.call(margs, *largs) }
17
+ # Public: Accessor for target metadata
18
+ attr_accessor :target_metadata
19
+
20
+ # Public: Set to true if the transform expects multiple arguments (default: false)
21
+ attr_reader :multi_arg
22
+
23
+ # Public: Defines the operation of this transform class.
24
+ #
25
+ # value - The value to be transformed
26
+ #
27
+ # Returns the transformed value.
28
+ def transform(value)
29
+ raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
30
+ end
31
+
32
+ # Public: Allows one to call the proc defined by the transform so that
33
+ # Remi::Transform instances can be used interchangeably with normal lambdas.
34
+ #
35
+ # values - The values to be transformed.
36
+ #
37
+ # Returns the transformed value.
38
+ def call(*values)
39
+ if @multi_arg
40
+ to_proc.call(*values)
41
+ else
42
+ to_proc.call(Array(values).first)
17
43
  end
18
- instance_variable_set("@#{func}", hash_memo)[args]
19
44
  end
20
45
 
21
- def prefix(prefix, if_blank: '')
22
- memoize_as_lambda(__method__, prefix, if_blank) do |(mprefix, mif_blank), larg|
23
- if larg.blank?
24
- mif_blank
46
+ # Public: Returns the transform as a lambda.
47
+ def to_proc
48
+ @to_proc ||= method(:transform).to_proc
49
+ end
50
+
51
+
52
+
53
+
54
+
55
+
56
+ # Public: Transform used to prefix string values in a vector.
57
+ #
58
+ # prefix - The string prefix.
59
+ # if_blank - String value to substitute if the value is blank (default: '').
60
+ #
61
+ # Examples:
62
+ #
63
+ # Prefix.new('CU').to_proc.call('123') # => "CU123"
64
+ class Prefix < Transform
65
+ def initialize(prefix, *args, if_blank: '', **kargs, &block)
66
+ super
67
+ @prefix = prefix
68
+ @if_blank = if_blank
69
+ end
70
+
71
+ def transform(value)
72
+ if value.blank?
73
+ @if_blank
25
74
  else
26
- "#{mprefix}#{larg}"
75
+ "#{@prefix}#{value}"
27
76
  end
28
77
  end
29
78
  end
30
79
 
31
- def postfix(postfix, if_blank: '')
32
- memoize_as_lambda(__method__, postfix, if_blank) do |(mpostfix, mif_blank), larg|
33
- if larg.blank?
34
- mif_blank
80
+
81
+ # Public: Transform used to postfix values in a vector.
82
+ #
83
+ # postfix - The string postfix.
84
+ # if_blank - String value to substitute if the value is blank (default: '').
85
+ #
86
+ # Examples:
87
+ #
88
+ # Postfix.new('A').to_proc.call('123') # => "123A"
89
+ class Postfix < Transform
90
+ def initialize(postfix, *args, if_blank: '', **kargs, &block)
91
+ super
92
+ @postfix = postfix
93
+ @if_blank = if_blank
94
+ end
95
+
96
+ def transform(value)
97
+ if value.blank?
98
+ @if_blank
35
99
  else
36
- "#{larg}#{mpostfix}"
100
+ "#{value}#{@postfix}"
37
101
  end
38
102
  end
39
103
  end
40
104
 
41
- def truncate(len)
42
- memoize_as_lambda(__method__, len) do |(mlen), larg|
43
- larg.slice(0,mlen)
105
+
106
+ # Public: Transform used to truncate values in a vector.
107
+ #
108
+ # len - The maximum length of the string.
109
+ #
110
+ # Examples:
111
+ #
112
+ # Truncate.new(3).to_proc.call('1234') # => "123"
113
+ class Truncate < Transform
114
+ def initialize(len, *args, **kargs, &block)
115
+ super
116
+ @len = len
117
+ end
118
+
119
+ def transform(value)
120
+ value.slice(0,@len)
44
121
  end
45
122
  end
46
123
 
47
- def concatenate(delimiter="")
48
- memoize_as_lambda(__method__, delimiter) do |(mdelimiter), *largs|
49
- Array(largs).join(mdelimiter)
124
+ # Public: Transform used to concatenate a list of values, joined by a delimiter.
125
+ #
126
+ # delimiter - The delimiter used between values in the list (default: '').
127
+ #
128
+ # Examples:
129
+ #
130
+ # Concatenate.new('-').to_proc.call('a', 'b', 'c') # => "a-b-c"
131
+ class Concatenate < Transform
132
+ def initialize(delimiter='', *args, **kargs, &block)
133
+ super
134
+ @multi_args = true
135
+ @delimiter = delimiter
136
+ end
137
+
138
+ def transform(*values)
139
+ Array(values).join(@delimiter)
50
140
  end
51
141
  end
52
142
 
53
- def lookup(h_lookup, missing: nil)
54
- memoize_as_lambda(__method__, h_lookup, missing) do |(mh_lookup, mmissing), larg|
55
- result = mh_lookup[larg]
143
+
144
+ # Public: Transform used to do key-value lookup on hash-like objects
145
+ #
146
+ # lookup - The lookup object that takes keys and returns values.
147
+ # missing - What to use if a key is not found in the lookup (default: nil). If this
148
+ # is a proc, it is sent the key as an argument.
149
+ #
150
+ # Examples:
151
+ #
152
+ # my_lookup = { 1 => 'one', 2 => 'two }
153
+ # Lookup.new().to_proc.call(1) # => "1"
154
+ # Lookup.new().to_proc.call(3) # => nil
155
+ # Lookup.new().to_proc.call(3, missing: 'UNK') # => "UNK"
156
+ # Lookup.new().to_proc.call(3, missing: ->(v) { "I don't know #{v}" }) # => "I don't know 3"
157
+ class Lookup < Transform
158
+ def initialize(lookup, *args, missing: nil, **kargs, &block)
159
+ super
160
+ @lookup = lookup
161
+ @missing = missing
162
+ end
163
+
164
+ def transform(value)
165
+ result = @lookup[value]
56
166
 
57
167
  if !result.nil?
58
168
  result
59
- elsif mmissing.class == Proc
60
- mmissing.call(larg)
169
+ elsif @missing.respond_to? :call
170
+ @missing.call(value)
61
171
  else
62
- mmissing
172
+ @missing
63
173
  end
64
174
  end
65
175
  end
66
176
 
67
- def nvl(default='')
68
- memoize_as_lambda(__method__, default) do |(mdefault), *largs|
69
- Array(largs).find(->() { mdefault }) { |arg| !arg.blank? }
177
+ # Public: (Next-Value-Lookup) transform used to find the first non-blank value in a list.
178
+ #
179
+ # default - What to use if all values are blank (default: '').
180
+ #
181
+ # Examples:
182
+ #
183
+ # Nvl.new.to_proc.call(nil,'','a','b') # => "a"
184
+ class Nvl < Transform
185
+ def initialize(default='', *args, **kargs, &block)
186
+ super
187
+ @multi_args = true
188
+ @default = default
189
+ end
190
+
191
+ def transform(*values)
192
+ Array(values).find(->() { @default }) { |arg| !arg.blank? }
70
193
  end
71
194
  end
72
195
 
73
- def ifblank(replace_with)
74
- memoize_as_lambda(__method__, replace_with) do |(mreplace_with), larg|
75
- larg.blank? ? mreplace_with : larg
196
+ # Public: Used to replace blank values.
197
+ #
198
+ # replace_with - Use this if the source value is blank (default: '').
199
+ #
200
+ # Examples:
201
+ #
202
+ # IfBlank.new('MISSING VALUE').to_proc.call('alpha') # => "alpha"
203
+ # IfBlank.new('MISSING VALUE').to_proc.call('') # => "MISSING VALUE"
204
+ class IfBlank < Transform
205
+ def initialize(replace_with='', *args, **kargs, &block)
206
+ super
207
+ @replace_with = replace_with
208
+ end
209
+
210
+ def transform(value)
211
+ value.blank? ? @replace_with : value
76
212
  end
77
213
  end
78
214
 
79
- def format_date(from_fmt: '%m/%d/%Y', to_fmt: '%Y-%m-%d')
80
- memoize_as_lambda(__method__, from_fmt, to_fmt) do |(mfrom_fmt, mto_fmt), larg|
215
+ # Public: Parses a string and converts it to a date.
216
+ # This transform is metadata aware and will use :in_format metadata
217
+ # from the source
218
+ #
219
+ # in_format - The date format to use to convert the string (default: uses :in_format
220
+ # from the source metadata. If that is not defined, use '%Y-%m-%d').
221
+ # if_blank - Value to use if the the incoming value is blank (default: uses :if_blank
222
+ # from the source metadata. If that is not defined, use nil). If set to
223
+ # :high, then use the largest date, if set to :ow, use the lowest date.
224
+ #
225
+ # Examples:
226
+ #
227
+ # ParseDate.new(in_format: '%m/%d/%Y').to_proc.call('02/22/2013') # => Date.new(2013,2,22)
228
+ #
229
+ # tform = ParseDate.new
230
+ # tform.source_metadata = { in_format: '%m/%d/%Y' }
231
+ # tform.to_proc.call('02/22/2013') # => Date.new(2013,2,22)
232
+ class ParseDate < Transform
233
+ def initialize(*args, in_format: nil, if_blank: nil, **kargs, &block)
234
+ super
235
+ @in_format = in_format
236
+ @if_blank = if_blank
237
+ end
238
+
239
+ def in_format
240
+ @in_format ||= @source_metadata.fetch(:in_format, '%Y-%m-%d')
241
+ end
242
+
243
+ def if_blank
244
+ @if_blank ||= @source_metadata.fetch(:if_blank, nil)
245
+ end
246
+
247
+ def transform(value)
81
248
  begin
82
- if larg.blank? then
83
- ''
84
- elsif larg.respond_to? :strftime
85
- larg.strftime(mto_fmt)
249
+ if value.respond_to?(:strftime)
250
+ value
251
+ elsif value.blank? then
252
+ blank_handler(value)
86
253
  else
87
- Date.strptime(larg, mfrom_fmt).strftime(mto_fmt)
254
+ string_to_date(value)
88
255
  end
89
256
  rescue ArgumentError => err
90
- puts "Error parsing date (#{larg.class}): '#{larg}'"
91
- raise err
257
+ raise err, "Error parsing date (#{value.class}): '#{value}' with format #{in_format})"
258
+ end
259
+ end
260
+
261
+ def string_to_date(value)
262
+ Date.strptime(value, in_format)
263
+ end
264
+
265
+ def blank_handler(value)
266
+ if if_blank == :low
267
+ Date.new(1900,01,01)
268
+ elsif if_blank == :high
269
+ Date.new(2999,12,31)
270
+ elsif if_blank.respond_to? :call
271
+ if_blank.call(value)
272
+ else
273
+ if_blank
92
274
  end
93
275
  end
94
276
  end
95
277
 
96
278
 
97
- def parse_date(format: '%Y-%m-%d', if_blank: nil)
98
- memoize_as_lambda(__method__, format, if_blank.try(:to_sym)) do |(mformat, mif_blank), larg|
279
+ # Public: (Re)formats a date.
280
+ # This transform is metadata aware and will use :in_format/:out_format metadata
281
+ # from the source.
282
+ #
283
+ # in_format - The date format to used to parse the input value. If the input value
284
+ # is a date, then then parameter is ignored. (default: uses :in_format
285
+ # from the source metadata. If that is not defined, use '%Y-%m-%d')
286
+ # out_format - The date format applied to provide the resulting string. (default:
287
+ # uses :out_format from the source metadata. If that is not defined,
288
+ # use '%Y-%m-%d')
289
+ #
290
+ # Examples:
291
+ #
292
+ # FormatDate.new(in_format: '%m/%d/%Y', out_format: '%Y-%m-%d').to_proc.call('02/22/2013') # => "2013-02-22"
293
+ #
294
+ # tform = FormatDate.new
295
+ # tform.source_metadata = { in_format: '%m/%d/%Y', out_format: '%Y-%m-%d' }
296
+ # tform.to_proc.call('02/22/2013') # => "2013-02-22"
297
+ class FormatDate < Transform
298
+ def initialize(*args, in_format: nil, out_format: nil, **kargs, &block)
299
+ super
300
+ @in_format = in_format
301
+ @out_format = out_format
302
+ end
303
+
304
+ def in_format
305
+ @in_format ||= @source_metadata.fetch(:in_format, '%Y-%m-%d')
306
+ end
307
+
308
+ def out_format
309
+ @out_format ||= @source_metadata.fetch(:out_format, '%Y-%m-%d')
310
+ end
311
+
312
+ def transform(value)
99
313
  begin
100
- if larg.respond_to?(:strftime)
101
- larg
102
- elsif larg.blank? then
103
- if mif_blank == :low
104
- Date.new(1900,01,01)
105
- elsif mif_blank == :high
106
- Date.new(2999,12,31)
107
- else
108
- mif_blank
109
- end
314
+ if value.blank? then
315
+ ''
316
+ elsif value.respond_to? :strftime
317
+ value.strftime(out_format)
110
318
  else
111
- Date.strptime(larg, mformat)
319
+ Date.strptime(value, in_format).strftime(out_format)
112
320
  end
113
321
  rescue ArgumentError => err
114
- puts "Error parsing date (#{larg.class}): '#{larg}')"
115
- raise err
322
+ raise err, "Error parsing date (#{value.class}): '#{value}' using the format #{in_format} => #{out_format}"
116
323
  end
117
324
  end
118
325
  end
119
326
 
120
- def date_diff(measure = :days)
121
- memoize_as_lambda(__method__, measure.to_sym) do |(mmeasure), *larg|
122
- if mmeasure == :days
123
- (larg.last - larg.first).to_i
124
- elsif mmeasure == :months
125
- (larg.last.year * 12 + larg.last.month) - (larg.first.year * 12 + larg.first.month)
126
- elsif mmeasure == :years
127
- larg.last.year - larg.first.year
327
+ # Public: Used to calculate differences between dates by a given measure.
328
+ #
329
+ # measure - One of :days, :months, or :years. (default: :days).
330
+ #
331
+ # Examples:
332
+ #
333
+ # DateDiff.new(:months).to_proc.call([Date.new(2016,1,30), Date.new(2016,3,1)]) # => 2
334
+ class DateDiff < Transform
335
+ def initialize(measure = :days, *args, **kargs, &block)
336
+ super
337
+ @multi_args = true
338
+ @measure = measure
339
+ end
340
+
341
+ def transform(from_date, to_date)
342
+
343
+ case @measure.to_sym
344
+ when :days
345
+ (to_date - from_date).to_i
346
+ when :months
347
+ (to_date.year * 12 + to_date.month) - (from_date.year * 12 + from_date.month)
348
+ when :years
349
+ to_date.year - from_date.year
128
350
  else
129
- raise "I don't know how to handle #{mmeasure} yet"
351
+ raise ArgumentError, "Unknown date difference measure: #{@measure}"
130
352
  end
131
353
  end
132
354
  end
133
355
 
134
- def constant(const)
135
- memoize_as_lambda(__method__, const) do |(mconst), larg|
136
- mconst
356
+ # Public: Simply returns a constant.
357
+ #
358
+ # constant - The constant value to return.
359
+ #
360
+ # Examples:
361
+ #
362
+ # Constant.new('ewoks').to_proc.call('whatever') # => 'ewoks'
363
+ class Constant < Transform
364
+ def initialize(constant, *args, **kargs, &block)
365
+ super
366
+ @constant = constant
367
+ end
368
+
369
+ def transform(values)
370
+ @constant
137
371
  end
138
372
  end
139
373
 
140
- def replace(regex, replace_with)
141
- memoize_as_lambda(__method__, regex, replace_with) do |(mregex, mreplace_with), larg|
142
- larg.gsub(regex, replace_with)
374
+ # Public: Replaces one substring with another.
375
+ #
376
+ # to_replace - The string or regex to be replaced.
377
+ # repalce_with - The value to substitute.
378
+ #
379
+ # Examples:
380
+ #
381
+ # Replace.new(/\s/, '-').to_proc.call('hey jude') #=> 'hey-jude'
382
+ class Replace < Transform
383
+ def initialize(to_replace, replace_with, *args, **kargs, &block)
384
+ super
385
+ @to_replace = to_replace
386
+ @replace_with = replace_with
387
+ end
388
+
389
+ def transform(value)
390
+ value.gsub(@to_replace, @replace_with)
391
+ end
392
+ end
393
+
394
+ # Public: Checks to see if an email validates against a regex (imperfect)
395
+ # and will substitute it with some value if not.
396
+ #
397
+ # substitute - The value used to substitute for an invalid email. Can use a proc
398
+ # that accepts the value of the invalid email
399
+ #
400
+ # Examples:
401
+ #
402
+ # ValidateEmail.new('invalid@example.com').to_proc.call('uhave.email') #=> 'invalid@example.com'
403
+ # ValidateEmail.new(->(v) { "#{SecureRandom.uuid}@example.com" }).to_proc.call('uhave.email') #=> '3f158f29-bc75-44f0-91ed-22fbe5157297@example.com'
404
+ class ValidateEmail < Transform
405
+ def initialize(substitute='', *args, **kargs, &block)
406
+ super
407
+ @substitute = substitute
408
+ end
409
+
410
+ def transform(value)
411
+ value = value || ''
412
+ if value.match(/^[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,}$/i)
413
+ value
414
+ elsif @substitute.respond_to? :call
415
+ @substitute.call value
416
+ else
417
+ @substitute
418
+ end
143
419
  end
144
420
  end
145
421
 
146
- def validate_email(substitute='')
147
- memoize_as_lambda(__method__, substitute) do |(msubstitute), larg|
148
- larg = larg || ''
149
- larg.match(/^[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,}$/i) ? larg : msubstitute
422
+
423
+
424
+ # Public: Enforces the type declared in the :type metadata field (if it exists)
425
+ #
426
+ # Examples:
427
+ #
428
+ # tform = EnforceType.new
429
+ # tform.source_metadata = { type: :date, in_format: '%m/%d/%Y' }
430
+ # tform.to_proc.call('02/22/2013') # => Date.new(2013,2,22)
431
+ #
432
+ # tform = EnforceType.new
433
+ # tform.source_metadata = { type: :integer }
434
+ # tform.to_proc.call('12') # => 12
435
+ #
436
+ # tform = EnforceType.new
437
+ # tform.source_metadata = { type: :integer }
438
+ # tform.to_proc.call('12A') # => ArgumentError: invalid value for Integer(): "12A"
439
+ class EnforceType < Transform
440
+ def initialize(*args, **kargs, &block)
441
+ super
442
+ end
443
+
444
+ def type
445
+ @type ||= @source_metadata.fetch(:type, :string)
446
+ end
447
+
448
+ def in_format
449
+ @in_format ||= @source_metadata.fetch(:in_format, '')
450
+ end
451
+
452
+ def scale
453
+ @scale ||= @source_metadata.fetch(:scale, 0)
454
+ end
455
+
456
+ def if_blank
457
+ return @if_blank if @if_blank_set
458
+ @if_blank_set = true
459
+ @if_blank = @source_metadata.fetch(:if_blank, nil)
460
+ end
461
+
462
+ def blank_handler(value)
463
+ return value unless value.blank?
464
+
465
+ if if_blank.respond_to? :to_proc
466
+ if_blank.to_proc.call(value)
467
+ else
468
+ if_blank
469
+ end
470
+ end
471
+
472
+ def transform(value)
473
+ if value.blank?
474
+ blank_handler(value)
475
+ else
476
+ case type
477
+ when :string
478
+ value
479
+ when :integer
480
+ Integer(value)
481
+ when :float
482
+ Float(value)
483
+ when :decimal
484
+ Float("%.#{scale}f" % Float(value))
485
+ when :date
486
+ Date.strptime(value, in_format)
487
+ when :datetime
488
+ Time.strptime(value, in_format)
489
+ else
490
+ raise ArgumentError, "Unknown type enforcement: #{type}"
491
+ end
492
+ end
150
493
  end
151
494
  end
152
495