remi 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/remi/loader.rb CHANGED
@@ -4,11 +4,12 @@ module Remi
4
4
  # define specific ways to load data.
5
5
  class Loader
6
6
 
7
- def initialize(*args, logger: Remi::Settings.logger, **kargs, &block)
7
+ def initialize(*args, context: nil, logger: Remi::Settings.logger, **kargs, &block)
8
+ @context = context
8
9
  @logger = logger
9
10
  end
10
11
 
11
- attr_accessor :logger
12
+ attr_accessor :logger, :context
12
13
 
13
14
  # Any child classes need to define a load method that loads data from
14
15
  # the given dataframe into the target system.
@@ -18,5 +19,16 @@ module Remi
18
19
  raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
19
20
  end
20
21
 
22
+ # If autoload is set to true, then any loaders are called at the moment
23
+ # a dataframe is assigned to a target (e.g., `my_target.df = some_df` will
24
+ # call `#load` on any loaders associated with `my_target`).
25
+ def autoload
26
+ false
27
+ end
28
+
29
+ # @return [Remi::Fields] The fields defined in the context
30
+ def fields
31
+ context && context.respond_to?(:fields) ? context.fields : Remi::Fields.new({})
32
+ end
21
33
  end
22
34
  end
@@ -521,19 +521,22 @@ module Remi::Testing::BusinessRules
521
521
  @table = table
522
522
  end
523
523
 
524
+ def parse_formula(value)
525
+ parsed_value = ParseFormula.parse(value)
526
+ case parsed_value
527
+ when '\nil'
528
+ nil
529
+ else
530
+ parsed_value
531
+ end
532
+ end
533
+
524
534
  def to_df(seed_hash, field_symbolizer:)
525
535
  table_headers = @table.headers.map { |h| h.symbolize(field_symbolizer) }
526
536
  df = Daru::DataFrame.new([], order: seed_hash.keys | table_headers)
527
537
  @table.hashes.each do |example_row|
528
538
  example_row_sym = example_row.reduce({}) do |h, (k,v)|
529
- formula_value = ParseFormula.parse(v)
530
- value = case formula_value
531
- when '\nil'
532
- nil
533
- else
534
- formula_value
535
- end
536
- h[k.symbolize(field_symbolizer)] = value
539
+ h[k.symbolize(field_symbolizer)] = parse_formula(v)
537
540
  h
538
541
  end
539
542
  df.add_row(seed_hash.merge(example_row_sym))
@@ -546,7 +549,7 @@ module Remi::Testing::BusinessRules
546
549
  def column_hash
547
550
  @table.hashes.reduce({}) do |h, row|
548
551
  row.each do |k,v|
549
- (h[k.symbolize] ||= []) << v
552
+ (h[k.symbolize] ||= []) << parse_formula(v)
550
553
  end
551
554
  h
552
555
  end
@@ -514,6 +514,11 @@ module Remi
514
514
  type == :string ? '' : nil
515
515
  end
516
516
 
517
+ def truthy(value)
518
+ @truthy ||= Truthy.new(allow_nils: false).to_proc
519
+ @truthy.call(value)
520
+ end
521
+
517
522
  def transform(value)
518
523
  if value.blank? && type != :json
519
524
  blank_handler(value)
@@ -537,6 +542,10 @@ module Remi
537
542
  else
538
543
  value.is_a?(Hash) || value.is_a?(Array) ? value : JSON.parse(value)
539
544
  end
545
+ when :boolean
546
+ # Ugh, there is a bug with Daru 0.1.4 that converts false values to nil when joining
547
+ # For now, we'll just standardize boolean values (#to_s)
548
+ truthy(value).to_s
540
549
  else
541
550
  raise ArgumentError, "Unknown type enforcement: #{type}"
542
551
  end
data/lib/remi/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Remi
2
- VERSION = '0.3.0'
2
+ VERSION = '0.3.1'
3
3
  end
@@ -339,7 +339,9 @@ describe DataTarget do
339
339
 
340
340
  before do
341
341
  allow(my_loader).to receive(:load)
342
+ allow(my_loader).to receive(:context=)
342
343
  allow(my_loader2).to receive(:load)
344
+ allow(my_loader2).to receive(:context=)
343
345
  allow(my_encoder).to receive(:encode) .and_return 'encoded data'
344
346
  end
345
347
 
@@ -378,7 +380,6 @@ describe DataTarget do
378
380
  end
379
381
  end
380
382
 
381
-
382
383
  context '#field_symbolizer' do
383
384
  context 'field_symbolizer called before encoder' do
384
385
  let(:before_encoder) do
@@ -448,15 +449,15 @@ describe DataTarget do
448
449
  end
449
450
 
450
451
  context '#loader' do
451
- before { data_target.loader 'my_loader' }
452
+ before { data_target.loader my_loader }
452
453
 
453
454
  it 'adds a loader to the list of loaders' do
454
- expect(data_target.loaders).to eq ['my_loader']
455
+ expect(data_target.loaders).to eq [my_loader]
455
456
  end
456
457
 
457
458
  it 'allows for multiple loaders to be defined' do
458
- data_target.loader 'my_loader2'
459
- expect(data_target.loaders).to eq ['my_loader', 'my_loader2']
459
+ data_target.loader my_loader2
460
+ expect(data_target.loaders).to eq [my_loader, my_loader2]
460
461
  end
461
462
  end
462
463
 
@@ -505,4 +506,21 @@ describe DataTarget do
505
506
  end
506
507
  end
507
508
  end
509
+
510
+ context '#df=' do
511
+ before do
512
+ data_target.encoder my_encoder
513
+ data_target.loader my_loader
514
+ data_target.loader my_loader2
515
+
516
+ allow(my_loader).to receive(:autoload) { false }
517
+ allow(my_loader2).to receive(:autoload) { true }
518
+ end
519
+
520
+ it 'loads any loaders set to autoload' do
521
+ expect(my_loader).not_to receive :load
522
+ expect(my_loader2).to receive :load
523
+ data_target.df = Remi::DataFrame::Daru.new([])
524
+ end
525
+ end
508
526
  end
@@ -5,15 +5,15 @@ describe Extractor::FileSystem do
5
5
  now = Time.new
6
6
 
7
7
  example_files = [
8
- { pathname: "pdir/ApplicantsA-9.csv", create_time: now - 10.minutes },
9
- { pathname: "pdir/ApplicantsA-3.csv", create_time: now - 5.minutes },
10
- { pathname: "pdir/ApplicantsA-5.csv", create_time: now - 1.minutes },
11
- { pathname: "pdir/ApplicantsB-7.csv", create_time: now - 10.minutes },
12
- { pathname: "pdir/ApplicantsB-6.csv", create_time: now - 5.minutes },
13
- { pathname: "pdir/ApplicantsB-2.csv", create_time: now - 1.minutes },
14
- { pathname: "pdir/ApplicantsB-2.txt", create_time: now - 0.minutes },
15
- { pathname: "pdir/Apples.csv", createtime: now - 1.minutes },
16
- { pathname: "otherdir/ApplicantsA-11.csv", createtime: now - 1.minutes },
8
+ { pathname: "pdir/ApplicantsA-9.csv", create_time: now - 10.minutes },
9
+ { pathname: "pdir/ApplicantsA-3.csv", create_time: now - 5.minutes },
10
+ { pathname: "pdir/ApplicantsA-5.csv", create_time: now - 1.minutes },
11
+ { pathname: "pdir/ApplicantsB-7.csv", create_time: now - 10.minutes },
12
+ { pathname: "pdir/ApplicantsB-6.csv", create_time: now - 5.minutes },
13
+ { pathname: "pdir/ApplicantsB-2.csv", create_time: now - 1.minutes },
14
+ { pathname: "pdir/ApplicantsB-2.txt", create_time: now - 0.minutes },
15
+ { pathname: "pdir/Apples.csv", create_time: now - 1.minutes },
16
+ { pathname: "otherdir/ApplicantsA-11.csv", create_time: now - 1.minutes },
17
17
  ]
18
18
 
19
19
  remote_path = 'pdir'
@@ -89,6 +89,40 @@ describe Extractor::FileSystem do
89
89
  end
90
90
 
91
91
 
92
+ context 'extracting the most recent file by create time' do
93
+ before do
94
+ @params.merge!({
95
+ most_recent_within_n: 1.hour,
96
+ most_recent_only: true
97
+ })
98
+ end
99
+
100
+ it 'extracts the files within n hours of creation' do
101
+ expect(file_system.entries.map(&:name)).to match_array([
102
+ "ApplicantsB-2.txt"
103
+ ])
104
+ end
105
+ end
106
+
107
+ context 'extracting all recent files by create time' do
108
+ before do
109
+ @params.merge!({
110
+ created_within: 0.02.hours,
111
+ most_recent_only: false
112
+ })
113
+ end
114
+
115
+ it 'extracts the files within n hours of creation' do
116
+ puts @params
117
+ expect(file_system.entries.map(&:name)).to match_array([
118
+ "Apples.csv",
119
+ "ApplicantsA-5.csv",
120
+ "ApplicantsB-2.csv",
121
+ "ApplicantsB-2.txt"
122
+ ])
123
+ end
124
+ end
125
+
92
126
  context 'extracting files matching a pattern with a by group' do
93
127
  before do
94
128
  @params.merge!({
@@ -0,0 +1,133 @@
1
+ require 'remi_spec'
2
+ require 'remi/data_subjects/gsheet'
3
+
4
+ describe Extractor::Gsheet do
5
+
6
+ let(:remote_path) { '' }
7
+ let(:credentials) {
8
+ {
9
+ :client_id => 'some_client_id',
10
+ :access_token => 'some_access_token',
11
+ :refresh_token => 'some_refresh_token',
12
+ :client_secret => 'some_client_secret',
13
+ :application_name => 'some_application_name',
14
+ :project_id => 'some_project_id',
15
+ :expiration_time => '123456789'
16
+ }
17
+ }
18
+
19
+
20
+ let(:params) {
21
+ {
22
+ credentials: credentials,
23
+ folder_id: 'some_google_folder_id',
24
+ remote_path: remote_path
25
+ }
26
+ }
27
+
28
+ let(:gsheet_file) {
29
+ Extractor::Gsheet.new(params)
30
+ }
31
+
32
+ let(:response) { double('response') }
33
+ let(:remote_filenames) {["test_file_1","test_file_2"]}
34
+ let(:remote_files) do
35
+ [{name: "test_file_1", create_time:Date.current, id: "1234"},
36
+ {name: "test_file_2", create_time:Date.current, id: "5678"}]
37
+
38
+ end
39
+
40
+ context '.new' do
41
+ it 'creates an instance with valid parameters' do
42
+ gsheet_file
43
+ end
44
+
45
+ it 'requires a client_id' do
46
+ credentials.delete(:client_id)
47
+ expect { gsheet_file }.to raise_error KeyError
48
+ end
49
+
50
+ it 'requires an access_token' do
51
+ credentials.delete(:access_token)
52
+ expect { gsheet_file }.to raise_error KeyError
53
+ end
54
+
55
+ it 'requires a client_secret' do
56
+ credentials.delete(:client_secret)
57
+ expect { gsheet_file }.to raise_error KeyError
58
+ end
59
+
60
+ it 'requires a refresh_token' do
61
+ credentials.delete(:refresh_token)
62
+ expect { gsheet_file }.to raise_error KeyError
63
+ end
64
+
65
+ it 'requires a folder id' do
66
+ params.delete(:credentials)
67
+ expect { gsheet_file }.to raise_error ArgumentError
68
+ end
69
+
70
+ it 'requires an application name' do
71
+ credentials.delete(:application_name)
72
+ expect { gsheet_file }.to raise_error KeyError
73
+ end
74
+
75
+ it 'requires a project id' do
76
+ credentials.delete(:project_id)
77
+ expect { gsheet_file }.to raise_error KeyError
78
+ end
79
+
80
+ end
81
+
82
+ context '#all_entires' do
83
+ it 'returns all entries' do
84
+
85
+ allow(response).to receive(:files) { remote_files }
86
+ allow(gsheet_file).to receive(:service_list_files) { response }
87
+
88
+ expect(gsheet_file.all_entries.map(&:name)).to eq remote_filenames
89
+
90
+ end
91
+ end
92
+
93
+ context '#extract' do
94
+ it 'downloads files from google' do
95
+
96
+ allow(response).to receive(:files) { remote_files }
97
+ allow(gsheet_file).to receive(:service_list_files) { response }
98
+ expect(gsheet_file).to receive(:get_spreadsheet_vals).exactly(remote_filenames.size).times
99
+ gsheet_file.extract
100
+
101
+ end
102
+
103
+ end
104
+ end
105
+
106
+ describe Parser::Gsheet do
107
+
108
+ let(:parser) { Parser::Gsheet.new }
109
+ let(:gs_extract) { double('gs_extract') }
110
+ let(:example_data) do
111
+ [{"headers" => ["header_1", "header_2", "header_3"],
112
+ "row 1" => ["value 1", "value 2", "value 3"]
113
+ }]
114
+ end
115
+
116
+ before do
117
+ allow(gs_extract).to receive(:data) { example_data }
118
+ end
119
+
120
+ it 'converts Google Sheets response data into a dataframe' do
121
+ expect(parser.parse gs_extract).to be_a Remi::DataFrame::Daru
122
+ end
123
+
124
+ it 'converted data into the correct dataframe' do
125
+ expected_df = Daru::DataFrame.new(
126
+ :header_1 => ['value 1'],
127
+ :header_2 => ['value 2'],
128
+ :header_3 => ['value 3'],
129
+ )
130
+ expect(parser.parse(gs_extract).to_a).to eq expected_df.to_a
131
+ end
132
+
133
+ end
@@ -4,7 +4,10 @@ describe 'sub jobs' do
4
4
  before :each do
5
5
  Object.send(:remove_const, :MySubJob) if Object.constants.include?(:MySubJob)
6
6
  class MySubJob < Job
7
- source(:sub_source) {}
7
+ source :sub_source do
8
+ extractor Extractor::None.new
9
+ fields({ a: { from_sub_job: true, to_overwrite: 'from_sub_job' } })
10
+ end
8
11
  target(:sub_target) {}
9
12
  end
10
13
  end
@@ -13,21 +16,50 @@ describe 'sub jobs' do
13
16
 
14
17
 
15
18
  describe Extractor::SubJob do
16
- let(:extractor) { Extractor::SubJob.new(sub_job: sub_job, data_subject: :sub_target) }
19
+ let(:target_extractor) { Extractor::SubJob.new(sub_job: sub_job, data_subject: :sub_target) }
20
+ let(:source_extractor) { Extractor::SubJob.new(sub_job: sub_job, data_subject: :sub_source) }
17
21
 
18
22
  it 'returns the data from the sub-job' do
19
- allow(sub_job.job.sub_target).to receive(:df) { 'sub target df' }
20
- expect(extractor.extract).to eq 'sub target df'
23
+ allow(sub_job.sub_job.sub_target).to receive(:df) { 'sub target df' }
24
+ expect(target_extractor.extract).to eq 'sub target df'
25
+ end
26
+
27
+ it 'executes the sub job when target data is requested' do
28
+ expect(sub_job).to receive(:execute).once
29
+ target_extractor.extract
30
+ end
31
+
32
+ it 'does not execute the sub job when source data is requested' do
33
+ expect(sub_job).not_to receive(:execute)
34
+ source_extractor.extract
21
35
  end
22
36
  end
23
37
 
24
38
  describe Loader::SubJob do
25
- let(:loader) { Loader::SubJob.new(sub_job: sub_job, data_subject: :sub_source) }
39
+ let(:data_target) { DataTarget.new }
40
+ let(:loader) { Loader::SubJob.new(context: data_target, sub_job: sub_job, data_subject: :sub_source) }
41
+ let(:my_data_frame) { Daru::DataFrame.new({ a: [1,2,3] }) }
26
42
 
27
43
  it 'populates the sub-job data frame' do
28
- some_data_frame = Daru::DataFrame.new({ a: [1,2,3] })
29
- loader.load(some_data_frame)
30
- expect(sub_job.job.sub_source.df).to eq some_data_frame
44
+ loader.load(my_data_frame)
45
+ expect(sub_job.sub_job.sub_source.df).to eq my_data_frame
46
+ end
47
+
48
+ it 'merges fields from the parent source when requested' do
49
+ data_target.fields({ a: { from_parent: :true, to_overwrite: 'from_parent' } })
50
+ loader.load(my_data_frame)
51
+ expect(sub_job.sub_job.sub_source.fields).to eq MySubJob.new.sub_source.fields.merge data_target.fields
52
+ end
53
+
54
+ it 'does not merge fields from the parent source when requested' do
55
+ loader.merge_fields = false
56
+ data_target.fields({ a: { from_parent: :true, to_overwrite: 'from_parent' } })
57
+ loader.load(my_data_frame)
58
+ expect(sub_job.sub_job.sub_source.fields).to eq MySubJob.new.sub_source.fields
59
+ end
60
+
61
+ it 'autoloads the target' do
62
+ expect(loader.autoload).to be true
31
63
  end
32
64
  end
33
65
  end
data/spec/job_spec.rb CHANGED
@@ -70,11 +70,11 @@ describe Job do
70
70
  class MyJob
71
71
  sub_job(:my_sub_job) { 'something' }
72
72
  end
73
- expect { job.my_sub_job.job }.to raise_error ArgumentError
73
+ expect { job.my_sub_job.sub_job }.to raise_error ArgumentError
74
74
  end
75
75
 
76
76
  it 'returns a Remi job' do
77
- expect(job.my_sub_job.job).to be_a Remi::Job
77
+ expect(job.my_sub_job.sub_job).to be_a Remi::Job
78
78
  end
79
79
  end
80
80
 
@@ -276,6 +276,11 @@ describe Job do
276
276
  expect(job.params[:my_param]).to eq 'instantiated'
277
277
  end
278
278
 
279
+ it 'works with booleans too' do
280
+ other_job = MyJob.new(my_param: false)
281
+ expect(other_job.params[:my_param]).to eq false
282
+ end
283
+
279
284
  it 'does not affect the values of other instances' do
280
285
  job
281
286
  other_job = MyJob.new
@@ -295,17 +300,12 @@ describe Job do
295
300
  context '#execute' do
296
301
  before do
297
302
  class MyJob
298
- transform :transform_one do
299
- end
300
-
301
- transform :transform_two do
302
- end
303
-
304
- target :target_one do
305
- end
306
-
307
- target :target_two do
308
- end
303
+ transform(:transform_one) {}
304
+ transform(:transform_two) {}
305
+ sub_job(:sub_job_one) { Remi::Job.new }
306
+ sub_job(:sub_job_two) { Remi::Job.new }
307
+ target(:target_one) {}
308
+ target(:target_two) {}
309
309
  end
310
310
  end
311
311
 
@@ -314,6 +314,11 @@ describe Job do
314
314
  job.execute
315
315
  end
316
316
 
317
+ it 'executes all subjobs' do
318
+ expect(job).to receive(:execute_sub_jobs)
319
+ job.execute
320
+ end
321
+
317
322
  it 'executes load all targets' do
318
323
  expect(job).to receive(:execute_load_targets)
319
324
  job.execute
@@ -330,12 +335,39 @@ describe Job do
330
335
  job.execute(:transforms)
331
336
  end
332
337
 
338
+ it 'does not execute all sub jobs' do
339
+ expect(job).not_to receive(:execute_sub_jobs)
340
+ job.execute(:transforms)
341
+ end
342
+
333
343
  it 'does not load all targets' do
334
344
  expect(job).not_to receive(:execute_load_targets)
335
345
  job.execute(:transforms)
336
346
  end
337
347
  end
338
348
 
349
+ context '#execute(:sub_jobs)' do
350
+ it 'executes all sub_jobs' do
351
+ [:sub_job_one, :sub_job_two].each do |sub_job_name|
352
+ sub_job = instance_double(Job::SubJob)
353
+ expect(sub_job).to receive(:execute)
354
+ expect(job).to receive(sub_job_name) .and_return(sub_job)
355
+ end
356
+
357
+ job.execute(:sub_jobs)
358
+ end
359
+
360
+ it 'does not execute all transforms' do
361
+ expect(job).not_to receive(:execute_transforms)
362
+ job.execute(:sub_jobs)
363
+ end
364
+
365
+ it 'does not load all targets' do
366
+ expect(job).not_to receive(:execute_load_targets)
367
+ job.execute(:sub_jobs)
368
+ end
369
+ end
370
+
339
371
  context '#execute(:load_targets)' do
340
372
  it 'loads all targets' do
341
373
  [:target_one, :target_two].each do |target_name|
@@ -351,6 +383,11 @@ describe Job do
351
383
  expect(job).not_to receive(:execute_transforms)
352
384
  job.execute(:load_targets)
353
385
  end
386
+
387
+ it 'does not execute all sub jobs' do
388
+ expect(job).not_to receive(:execute_sub_jobs)
389
+ job.execute(:load_targets)
390
+ end
354
391
  end
355
392
  end
356
393
 
@@ -469,9 +506,9 @@ describe Job do
469
506
  Job::SubJob.new { scoped_sub_job }
470
507
  end
471
508
 
472
- context '#job' do
509
+ context '#sub_job' do
473
510
  it 'returns the job instance for the sub job' do
474
- expect(job_sub_job.job).to eq sub_job
511
+ expect(job_sub_job.sub_job).to eq sub_job
475
512
  end
476
513
  end
477
514
 
@@ -494,6 +531,12 @@ describe Job do
494
531
  expect(sub_job).to receive(:execute)
495
532
  job_sub_job.execute
496
533
  end
534
+
535
+ it 'only executes the sub job once' do
536
+ expect(sub_job).to receive(:execute).once
537
+ job_sub_job.execute
538
+ job_sub_job.execute
539
+ end
497
540
  end
498
541
 
499
542
  context '#execute_transforms' do