remi 0.2.27 → 0.2.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/Gemfile.lock +34 -5
  4. data/features/metadata.feature +17 -0
  5. data/features/step_definitions/remi_step.rb +6 -6
  6. data/features/transforms/date_diff.feature +1 -0
  7. data/jobs/aggregate_job.rb +0 -1
  8. data/jobs/all_jobs_shared.rb +0 -2
  9. data/jobs/copy_source_job.rb +0 -1
  10. data/jobs/csv_file_target_job.rb +0 -1
  11. data/jobs/metadata_job.rb +60 -0
  12. data/jobs/parameters_job.rb +1 -1
  13. data/jobs/sample_job.rb +19 -20
  14. data/jobs/sftp_file_target_job.rb +0 -1
  15. data/jobs/transforms/date_diff_job.rb +1 -1
  16. data/jobs/transforms/nvl_job.rb +1 -1
  17. data/jobs/transforms/parse_date_job.rb +7 -4
  18. data/jobs/transforms/prefix_job.rb +1 -1
  19. data/jobs/transforms/truncate_job.rb +1 -1
  20. data/lib/remi.rb +10 -15
  21. data/lib/remi/cucumber/business_rules.rb +23 -23
  22. data/lib/remi/cucumber/data_source.rb +2 -1
  23. data/lib/remi/data_frame.rb +36 -0
  24. data/lib/remi/data_frame/daru.rb +67 -0
  25. data/lib/remi/data_subject.rb +71 -10
  26. data/lib/remi/data_subject/csv_file.rb +151 -0
  27. data/lib/remi/data_subject/data_frame.rb +53 -0
  28. data/lib/remi/data_subject/postgres.rb +136 -0
  29. data/lib/remi/data_subject/salesforce.rb +136 -0
  30. data/lib/remi/data_subject/sftp_file.rb +66 -0
  31. data/lib/remi/fields.rb +8 -0
  32. data/lib/remi/source_to_target_map.rb +56 -32
  33. data/lib/remi/transform.rb +426 -83
  34. data/lib/remi/version.rb +1 -1
  35. data/remi.gemspec +2 -1
  36. data/spec/metadata_spec.rb +62 -0
  37. metadata +15 -28
  38. data/lib/remi/data_source.rb +0 -13
  39. data/lib/remi/data_source/csv_file.rb +0 -101
  40. data/lib/remi/data_source/data_frame.rb +0 -16
  41. data/lib/remi/data_source/postgres.rb +0 -58
  42. data/lib/remi/data_source/salesforce.rb +0 -87
  43. data/lib/remi/data_target.rb +0 -15
  44. data/lib/remi/data_target/csv_file.rb +0 -42
  45. data/lib/remi/data_target/data_frame.rb +0 -14
  46. data/lib/remi/data_target/postgres.rb +0 -74
  47. data/lib/remi/data_target/salesforce.rb +0 -54
  48. data/lib/remi/data_target/sftp_file.rb +0 -54
  49. data/lib/remi/refinements/daru.rb +0 -85
@@ -1,152 +1,495 @@
1
1
  module Remi
2
- module Transform
3
- extend self
2
+ class Transform
4
3
 
5
- def [](meth)
6
- method(meth)
4
+ # Public: Initializes the static arguments of a transform.
5
+ #
6
+ # source_metadata - Metadata for the transform source.
7
+ # target_metadata - Metadata for the transform target.
8
+ def initialize(*args, source_metadata: {}, target_metadata: {}, **kargs, &block)
9
+ @source_metadata = source_metadata
10
+ @target_metadata = target_metadata
11
+ @multi_args = false
7
12
  end
8
13
 
9
- # We need to memoize each lambda with its static arguments so it's not recreated each row.
10
- # Inspired by parameter memoization in http://www.justinweiss.com/articles/4-simple-memoization-patterns-in-ruby-and-one-gem/
11
- def memoize_as_lambda(func, *args, &block)
12
- iv = instance_variable_get("@#{func}")
13
- return iv[args] if iv
14
+ # Public: Accessor for source metadata
15
+ attr_accessor :source_metadata
14
16
 
15
- hash_memo = Hash.new do |h, margs|
16
- h[margs] = lambda { |*largs| block.call(margs, *largs) }
17
+ # Public: Accessor for target metadata
18
+ attr_accessor :target_metadata
19
+
20
+ # Public: Set to true if the transform expects multiple arguments (default: false)
21
+ attr_reader :multi_arg
22
+
23
+ # Public: Defines the operation of this transform class.
24
+ #
25
+ # value - The value to be transformed
26
+ #
27
+ # Returns the transformed value.
28
+ def transform(value)
29
+ raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
30
+ end
31
+
32
+ # Public: Allows one to call the proc defined by the transform so that
33
+ # Remi::Transform instances can be used interchangeably with normal lambdas.
34
+ #
35
+ # values - The values to be transformed.
36
+ #
37
+ # Returns the transformed value.
38
+ def call(*values)
39
+ if @multi_arg
40
+ to_proc.call(*values)
41
+ else
42
+ to_proc.call(Array(values).first)
17
43
  end
18
- instance_variable_set("@#{func}", hash_memo)[args]
19
44
  end
20
45
 
21
- def prefix(prefix, if_blank: '')
22
- memoize_as_lambda(__method__, prefix, if_blank) do |(mprefix, mif_blank), larg|
23
- if larg.blank?
24
- mif_blank
46
+ # Public: Returns the transform as a lambda.
47
+ def to_proc
48
+ @to_proc ||= method(:transform).to_proc
49
+ end
50
+
51
+
52
+
53
+
54
+
55
+
56
+ # Public: Transform used to prefix string values in a vector.
57
+ #
58
+ # prefix - The string prefix.
59
+ # if_blank - String value to substitute if the value is blank (default: '').
60
+ #
61
+ # Examples:
62
+ #
63
+ # Prefix.new('CU').to_proc.call('123') # => "CU123"
64
+ class Prefix < Transform
65
+ def initialize(prefix, *args, if_blank: '', **kargs, &block)
66
+ super
67
+ @prefix = prefix
68
+ @if_blank = if_blank
69
+ end
70
+
71
+ def transform(value)
72
+ if value.blank?
73
+ @if_blank
25
74
  else
26
- "#{mprefix}#{larg}"
75
+ "#{@prefix}#{value}"
27
76
  end
28
77
  end
29
78
  end
30
79
 
31
- def postfix(postfix, if_blank: '')
32
- memoize_as_lambda(__method__, postfix, if_blank) do |(mpostfix, mif_blank), larg|
33
- if larg.blank?
34
- mif_blank
80
+
81
+ # Public: Transform used to postfix values in a vector.
82
+ #
83
+ # postfix - The string postfix.
84
+ # if_blank - String value to substitute if the value is blank (default: '').
85
+ #
86
+ # Examples:
87
+ #
88
+ # Postfix.new('A').to_proc.call('123') # => "123A"
89
+ class Postfix < Transform
90
+ def initialize(postfix, *args, if_blank: '', **kargs, &block)
91
+ super
92
+ @postfix = postfix
93
+ @if_blank = if_blank
94
+ end
95
+
96
+ def transform(value)
97
+ if value.blank?
98
+ @if_blank
35
99
  else
36
- "#{larg}#{mpostfix}"
100
+ "#{value}#{@postfix}"
37
101
  end
38
102
  end
39
103
  end
40
104
 
41
- def truncate(len)
42
- memoize_as_lambda(__method__, len) do |(mlen), larg|
43
- larg.slice(0,mlen)
105
+
106
+ # Public: Transform used to truncate values in a vector.
107
+ #
108
+ # len - The maximum length of the string.
109
+ #
110
+ # Examples:
111
+ #
112
+ # Truncate.new(3).to_proc.call('1234') # => "123"
113
+ class Truncate < Transform
114
+ def initialize(len, *args, **kargs, &block)
115
+ super
116
+ @len = len
117
+ end
118
+
119
+ def transform(value)
120
+ value.slice(0,@len)
44
121
  end
45
122
  end
46
123
 
47
- def concatenate(delimiter="")
48
- memoize_as_lambda(__method__, delimiter) do |(mdelimiter), *largs|
49
- Array(largs).join(mdelimiter)
124
+ # Public: Transform used to concatenate a list of values, joined by a delimiter.
125
+ #
126
+ # delimiter - The delimiter used between values in the list (default: '').
127
+ #
128
+ # Examples:
129
+ #
130
+ # Concatenate.new('-').to_proc.call('a', 'b', 'c') # => "a-b-c"
131
+ class Concatenate < Transform
132
+ def initialize(delimiter='', *args, **kargs, &block)
133
+ super
134
+ @multi_args = true
135
+ @delimiter = delimiter
136
+ end
137
+
138
+ def transform(*values)
139
+ Array(values).join(@delimiter)
50
140
  end
51
141
  end
52
142
 
53
- def lookup(h_lookup, missing: nil)
54
- memoize_as_lambda(__method__, h_lookup, missing) do |(mh_lookup, mmissing), larg|
55
- result = mh_lookup[larg]
143
+
144
+ # Public: Transform used to do key-value lookup on hash-like objects
145
+ #
146
+ # lookup - The lookup object that takes keys and returns values.
147
+ # missing - What to use if a key is not found in the lookup (default: nil). If this
148
+ # is a proc, it is sent the key as an argument.
149
+ #
150
+ # Examples:
151
+ #
152
+ # my_lookup = { 1 => 'one', 2 => 'two }
153
+ # Lookup.new().to_proc.call(1) # => "1"
154
+ # Lookup.new().to_proc.call(3) # => nil
155
+ # Lookup.new().to_proc.call(3, missing: 'UNK') # => "UNK"
156
+ # Lookup.new().to_proc.call(3, missing: ->(v) { "I don't know #{v}" }) # => "I don't know 3"
157
+ class Lookup < Transform
158
+ def initialize(lookup, *args, missing: nil, **kargs, &block)
159
+ super
160
+ @lookup = lookup
161
+ @missing = missing
162
+ end
163
+
164
+ def transform(value)
165
+ result = @lookup[value]
56
166
 
57
167
  if !result.nil?
58
168
  result
59
- elsif mmissing.class == Proc
60
- mmissing.call(larg)
169
+ elsif @missing.respond_to? :call
170
+ @missing.call(value)
61
171
  else
62
- mmissing
172
+ @missing
63
173
  end
64
174
  end
65
175
  end
66
176
 
67
- def nvl(default='')
68
- memoize_as_lambda(__method__, default) do |(mdefault), *largs|
69
- Array(largs).find(->() { mdefault }) { |arg| !arg.blank? }
177
+ # Public: (Next-Value-Lookup) transform used to find the first non-blank value in a list.
178
+ #
179
+ # default - What to use if all values are blank (default: '').
180
+ #
181
+ # Examples:
182
+ #
183
+ # Nvl.new.to_proc.call(nil,'','a','b') # => "a"
184
+ class Nvl < Transform
185
+ def initialize(default='', *args, **kargs, &block)
186
+ super
187
+ @multi_args = true
188
+ @default = default
189
+ end
190
+
191
+ def transform(*values)
192
+ Array(values).find(->() { @default }) { |arg| !arg.blank? }
70
193
  end
71
194
  end
72
195
 
73
- def ifblank(replace_with)
74
- memoize_as_lambda(__method__, replace_with) do |(mreplace_with), larg|
75
- larg.blank? ? mreplace_with : larg
196
+ # Public: Used to replace blank values.
197
+ #
198
+ # replace_with - Use this if the source value is blank (default: '').
199
+ #
200
+ # Examples:
201
+ #
202
+ # IfBlank.new('MISSING VALUE').to_proc.call('alpha') # => "alpha"
203
+ # IfBlank.new('MISSING VALUE').to_proc.call('') # => "MISSING VALUE"
204
+ class IfBlank < Transform
205
+ def initialize(replace_with='', *args, **kargs, &block)
206
+ super
207
+ @replace_with = replace_with
208
+ end
209
+
210
+ def transform(value)
211
+ value.blank? ? @replace_with : value
76
212
  end
77
213
  end
78
214
 
79
- def format_date(from_fmt: '%m/%d/%Y', to_fmt: '%Y-%m-%d')
80
- memoize_as_lambda(__method__, from_fmt, to_fmt) do |(mfrom_fmt, mto_fmt), larg|
215
+ # Public: Parses a string and converts it to a date.
216
+ # This transform is metadata aware and will use :in_format metadata
217
+ # from the source
218
+ #
219
+ # in_format - The date format to use to convert the string (default: uses :in_format
220
+ # from the source metadata. If that is not defined, use '%Y-%m-%d').
221
+ # if_blank - Value to use if the the incoming value is blank (default: uses :if_blank
222
+ # from the source metadata. If that is not defined, use nil). If set to
223
+ # :high, then use the largest date, if set to :ow, use the lowest date.
224
+ #
225
+ # Examples:
226
+ #
227
+ # ParseDate.new(in_format: '%m/%d/%Y').to_proc.call('02/22/2013') # => Date.new(2013,2,22)
228
+ #
229
+ # tform = ParseDate.new
230
+ # tform.source_metadata = { in_format: '%m/%d/%Y' }
231
+ # tform.to_proc.call('02/22/2013') # => Date.new(2013,2,22)
232
+ class ParseDate < Transform
233
+ def initialize(*args, in_format: nil, if_blank: nil, **kargs, &block)
234
+ super
235
+ @in_format = in_format
236
+ @if_blank = if_blank
237
+ end
238
+
239
+ def in_format
240
+ @in_format ||= @source_metadata.fetch(:in_format, '%Y-%m-%d')
241
+ end
242
+
243
+ def if_blank
244
+ @if_blank ||= @source_metadata.fetch(:if_blank, nil)
245
+ end
246
+
247
+ def transform(value)
81
248
  begin
82
- if larg.blank? then
83
- ''
84
- elsif larg.respond_to? :strftime
85
- larg.strftime(mto_fmt)
249
+ if value.respond_to?(:strftime)
250
+ value
251
+ elsif value.blank? then
252
+ blank_handler(value)
86
253
  else
87
- Date.strptime(larg, mfrom_fmt).strftime(mto_fmt)
254
+ string_to_date(value)
88
255
  end
89
256
  rescue ArgumentError => err
90
- puts "Error parsing date (#{larg.class}): '#{larg}'"
91
- raise err
257
+ raise err, "Error parsing date (#{value.class}): '#{value}' with format #{in_format})"
258
+ end
259
+ end
260
+
261
+ def string_to_date(value)
262
+ Date.strptime(value, in_format)
263
+ end
264
+
265
+ def blank_handler(value)
266
+ if if_blank == :low
267
+ Date.new(1900,01,01)
268
+ elsif if_blank == :high
269
+ Date.new(2999,12,31)
270
+ elsif if_blank.respond_to? :call
271
+ if_blank.call(value)
272
+ else
273
+ if_blank
92
274
  end
93
275
  end
94
276
  end
95
277
 
96
278
 
97
- def parse_date(format: '%Y-%m-%d', if_blank: nil)
98
- memoize_as_lambda(__method__, format, if_blank.try(:to_sym)) do |(mformat, mif_blank), larg|
279
+ # Public: (Re)formats a date.
280
+ # This transform is metadata aware and will use :in_format/:out_format metadata
281
+ # from the source.
282
+ #
283
+ # in_format - The date format to used to parse the input value. If the input value
284
+ # is a date, then then parameter is ignored. (default: uses :in_format
285
+ # from the source metadata. If that is not defined, use '%Y-%m-%d')
286
+ # out_format - The date format applied to provide the resulting string. (default:
287
+ # uses :out_format from the source metadata. If that is not defined,
288
+ # use '%Y-%m-%d')
289
+ #
290
+ # Examples:
291
+ #
292
+ # FormatDate.new(in_format: '%m/%d/%Y', out_format: '%Y-%m-%d').to_proc.call('02/22/2013') # => "2013-02-22"
293
+ #
294
+ # tform = FormatDate.new
295
+ # tform.source_metadata = { in_format: '%m/%d/%Y', out_format: '%Y-%m-%d' }
296
+ # tform.to_proc.call('02/22/2013') # => "2013-02-22"
297
+ class FormatDate < Transform
298
+ def initialize(*args, in_format: nil, out_format: nil, **kargs, &block)
299
+ super
300
+ @in_format = in_format
301
+ @out_format = out_format
302
+ end
303
+
304
+ def in_format
305
+ @in_format ||= @source_metadata.fetch(:in_format, '%Y-%m-%d')
306
+ end
307
+
308
+ def out_format
309
+ @out_format ||= @source_metadata.fetch(:out_format, '%Y-%m-%d')
310
+ end
311
+
312
+ def transform(value)
99
313
  begin
100
- if larg.respond_to?(:strftime)
101
- larg
102
- elsif larg.blank? then
103
- if mif_blank == :low
104
- Date.new(1900,01,01)
105
- elsif mif_blank == :high
106
- Date.new(2999,12,31)
107
- else
108
- mif_blank
109
- end
314
+ if value.blank? then
315
+ ''
316
+ elsif value.respond_to? :strftime
317
+ value.strftime(out_format)
110
318
  else
111
- Date.strptime(larg, mformat)
319
+ Date.strptime(value, in_format).strftime(out_format)
112
320
  end
113
321
  rescue ArgumentError => err
114
- puts "Error parsing date (#{larg.class}): '#{larg}')"
115
- raise err
322
+ raise err, "Error parsing date (#{value.class}): '#{value}' using the format #{in_format} => #{out_format}"
116
323
  end
117
324
  end
118
325
  end
119
326
 
120
- def date_diff(measure = :days)
121
- memoize_as_lambda(__method__, measure.to_sym) do |(mmeasure), *larg|
122
- if mmeasure == :days
123
- (larg.last - larg.first).to_i
124
- elsif mmeasure == :months
125
- (larg.last.year * 12 + larg.last.month) - (larg.first.year * 12 + larg.first.month)
126
- elsif mmeasure == :years
127
- larg.last.year - larg.first.year
327
+ # Public: Used to calculate differences between dates by a given measure.
328
+ #
329
+ # measure - One of :days, :months, or :years. (default: :days).
330
+ #
331
+ # Examples:
332
+ #
333
+ # DateDiff.new(:months).to_proc.call([Date.new(2016,1,30), Date.new(2016,3,1)]) # => 2
334
+ class DateDiff < Transform
335
+ def initialize(measure = :days, *args, **kargs, &block)
336
+ super
337
+ @multi_args = true
338
+ @measure = measure
339
+ end
340
+
341
+ def transform(from_date, to_date)
342
+
343
+ case @measure.to_sym
344
+ when :days
345
+ (to_date - from_date).to_i
346
+ when :months
347
+ (to_date.year * 12 + to_date.month) - (from_date.year * 12 + from_date.month)
348
+ when :years
349
+ to_date.year - from_date.year
128
350
  else
129
- raise "I don't know how to handle #{mmeasure} yet"
351
+ raise ArgumentError, "Unknown date difference measure: #{@measure}"
130
352
  end
131
353
  end
132
354
  end
133
355
 
134
- def constant(const)
135
- memoize_as_lambda(__method__, const) do |(mconst), larg|
136
- mconst
356
+ # Public: Simply returns a constant.
357
+ #
358
+ # constant - The constant value to return.
359
+ #
360
+ # Examples:
361
+ #
362
+ # Constant.new('ewoks').to_proc.call('whatever') # => 'ewoks'
363
+ class Constant < Transform
364
+ def initialize(constant, *args, **kargs, &block)
365
+ super
366
+ @constant = constant
367
+ end
368
+
369
+ def transform(values)
370
+ @constant
137
371
  end
138
372
  end
139
373
 
140
- def replace(regex, replace_with)
141
- memoize_as_lambda(__method__, regex, replace_with) do |(mregex, mreplace_with), larg|
142
- larg.gsub(regex, replace_with)
374
+ # Public: Replaces one substring with another.
375
+ #
376
+ # to_replace - The string or regex to be replaced.
377
+ # repalce_with - The value to substitute.
378
+ #
379
+ # Examples:
380
+ #
381
+ # Replace.new(/\s/, '-').to_proc.call('hey jude') #=> 'hey-jude'
382
+ class Replace < Transform
383
+ def initialize(to_replace, replace_with, *args, **kargs, &block)
384
+ super
385
+ @to_replace = to_replace
386
+ @replace_with = replace_with
387
+ end
388
+
389
+ def transform(value)
390
+ value.gsub(@to_replace, @replace_with)
391
+ end
392
+ end
393
+
394
+ # Public: Checks to see if an email validates against a regex (imperfect)
395
+ # and will substitute it with some value if not.
396
+ #
397
+ # substitute - The value used to substitute for an invalid email. Can use a proc
398
+ # that accepts the value of the invalid email
399
+ #
400
+ # Examples:
401
+ #
402
+ # ValidateEmail.new('invalid@example.com').to_proc.call('uhave.email') #=> 'invalid@example.com'
403
+ # ValidateEmail.new(->(v) { "#{SecureRandom.uuid}@example.com" }).to_proc.call('uhave.email') #=> '3f158f29-bc75-44f0-91ed-22fbe5157297@example.com'
404
+ class ValidateEmail < Transform
405
+ def initialize(substitute='', *args, **kargs, &block)
406
+ super
407
+ @substitute = substitute
408
+ end
409
+
410
+ def transform(value)
411
+ value = value || ''
412
+ if value.match(/^[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,}$/i)
413
+ value
414
+ elsif @substitute.respond_to? :call
415
+ @substitute.call value
416
+ else
417
+ @substitute
418
+ end
143
419
  end
144
420
  end
145
421
 
146
- def validate_email(substitute='')
147
- memoize_as_lambda(__method__, substitute) do |(msubstitute), larg|
148
- larg = larg || ''
149
- larg.match(/^[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,}$/i) ? larg : msubstitute
422
+
423
+
424
+ # Public: Enforces the type declared in the :type metadata field (if it exists)
425
+ #
426
+ # Examples:
427
+ #
428
+ # tform = EnforceType.new
429
+ # tform.source_metadata = { type: :date, in_format: '%m/%d/%Y' }
430
+ # tform.to_proc.call('02/22/2013') # => Date.new(2013,2,22)
431
+ #
432
+ # tform = EnforceType.new
433
+ # tform.source_metadata = { type: :integer }
434
+ # tform.to_proc.call('12') # => 12
435
+ #
436
+ # tform = EnforceType.new
437
+ # tform.source_metadata = { type: :integer }
438
+ # tform.to_proc.call('12A') # => ArgumentError: invalid value for Integer(): "12A"
439
+ class EnforceType < Transform
440
+ def initialize(*args, **kargs, &block)
441
+ super
442
+ end
443
+
444
+ def type
445
+ @type ||= @source_metadata.fetch(:type, :string)
446
+ end
447
+
448
+ def in_format
449
+ @in_format ||= @source_metadata.fetch(:in_format, '')
450
+ end
451
+
452
+ def scale
453
+ @scale ||= @source_metadata.fetch(:scale, 0)
454
+ end
455
+
456
+ def if_blank
457
+ return @if_blank if @if_blank_set
458
+ @if_blank_set = true
459
+ @if_blank = @source_metadata.fetch(:if_blank, nil)
460
+ end
461
+
462
+ def blank_handler(value)
463
+ return value unless value.blank?
464
+
465
+ if if_blank.respond_to? :to_proc
466
+ if_blank.to_proc.call(value)
467
+ else
468
+ if_blank
469
+ end
470
+ end
471
+
472
+ def transform(value)
473
+ if value.blank?
474
+ blank_handler(value)
475
+ else
476
+ case type
477
+ when :string
478
+ value
479
+ when :integer
480
+ Integer(value)
481
+ when :float
482
+ Float(value)
483
+ when :decimal
484
+ Float("%.#{scale}f" % Float(value))
485
+ when :date
486
+ Date.strptime(value, in_format)
487
+ when :datetime
488
+ Time.strptime(value, in_format)
489
+ else
490
+ raise ArgumentError, "Unknown type enforcement: #{type}"
491
+ end
492
+ end
150
493
  end
151
494
  end
152
495