remi 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/remi/loader.rb CHANGED
@@ -4,11 +4,12 @@ module Remi
4
4
  # define specific ways to load data.
5
5
  class Loader
6
6
 
7
- def initialize(*args, logger: Remi::Settings.logger, **kargs, &block)
7
+ def initialize(*args, context: nil, logger: Remi::Settings.logger, **kargs, &block)
8
+ @context = context
8
9
  @logger = logger
9
10
  end
10
11
 
11
- attr_accessor :logger
12
+ attr_accessor :logger, :context
12
13
 
13
14
  # Any child classes need to define a load method that loads data from
14
15
  # the given dataframe into the target system.
@@ -18,5 +19,16 @@ module Remi
18
19
  raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
19
20
  end
20
21
 
22
+ # If autoload is set to true, then any loaders are called at the moment
23
+ # a dataframe is assigned to a target (e.g., `my_target.df = some_df` will
24
+ # call `#load` on any loaders associated with `my_target`).
25
+ def autoload
26
+ false
27
+ end
28
+
29
+ # @return [Remi::Fields] The fields defined in the context
30
+ def fields
31
+ context && context.respond_to?(:fields) ? context.fields : Remi::Fields.new({})
32
+ end
21
33
  end
22
34
  end
@@ -521,19 +521,22 @@ module Remi::Testing::BusinessRules
521
521
  @table = table
522
522
  end
523
523
 
524
+ def parse_formula(value)
525
+ parsed_value = ParseFormula.parse(value)
526
+ case parsed_value
527
+ when '\nil'
528
+ nil
529
+ else
530
+ parsed_value
531
+ end
532
+ end
533
+
524
534
  def to_df(seed_hash, field_symbolizer:)
525
535
  table_headers = @table.headers.map { |h| h.symbolize(field_symbolizer) }
526
536
  df = Daru::DataFrame.new([], order: seed_hash.keys | table_headers)
527
537
  @table.hashes.each do |example_row|
528
538
  example_row_sym = example_row.reduce({}) do |h, (k,v)|
529
- formula_value = ParseFormula.parse(v)
530
- value = case formula_value
531
- when '\nil'
532
- nil
533
- else
534
- formula_value
535
- end
536
- h[k.symbolize(field_symbolizer)] = value
539
+ h[k.symbolize(field_symbolizer)] = parse_formula(v)
537
540
  h
538
541
  end
539
542
  df.add_row(seed_hash.merge(example_row_sym))
@@ -546,7 +549,7 @@ module Remi::Testing::BusinessRules
546
549
  def column_hash
547
550
  @table.hashes.reduce({}) do |h, row|
548
551
  row.each do |k,v|
549
- (h[k.symbolize] ||= []) << v
552
+ (h[k.symbolize] ||= []) << parse_formula(v)
550
553
  end
551
554
  h
552
555
  end
@@ -514,6 +514,11 @@ module Remi
514
514
  type == :string ? '' : nil
515
515
  end
516
516
 
517
+ def truthy(value)
518
+ @truthy ||= Truthy.new(allow_nils: false).to_proc
519
+ @truthy.call(value)
520
+ end
521
+
517
522
  def transform(value)
518
523
  if value.blank? && type != :json
519
524
  blank_handler(value)
@@ -537,6 +542,10 @@ module Remi
537
542
  else
538
543
  value.is_a?(Hash) || value.is_a?(Array) ? value : JSON.parse(value)
539
544
  end
545
+ when :boolean
546
+ # Ugh, there is a bug with Daru 0.1.4 that converts false values to nil when joining
547
+ # For now, we'll just standardize boolean values (#to_s)
548
+ truthy(value).to_s
540
549
  else
541
550
  raise ArgumentError, "Unknown type enforcement: #{type}"
542
551
  end
data/lib/remi/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Remi
2
- VERSION = '0.3.0'
2
+ VERSION = '0.3.1'
3
3
  end
@@ -339,7 +339,9 @@ describe DataTarget do
339
339
 
340
340
  before do
341
341
  allow(my_loader).to receive(:load)
342
+ allow(my_loader).to receive(:context=)
342
343
  allow(my_loader2).to receive(:load)
344
+ allow(my_loader2).to receive(:context=)
343
345
  allow(my_encoder).to receive(:encode) .and_return 'encoded data'
344
346
  end
345
347
 
@@ -378,7 +380,6 @@ describe DataTarget do
378
380
  end
379
381
  end
380
382
 
381
-
382
383
  context '#field_symbolizer' do
383
384
  context 'field_symbolizer called before encoder' do
384
385
  let(:before_encoder) do
@@ -448,15 +449,15 @@ describe DataTarget do
448
449
  end
449
450
 
450
451
  context '#loader' do
451
- before { data_target.loader 'my_loader' }
452
+ before { data_target.loader my_loader }
452
453
 
453
454
  it 'adds a loader to the list of loaders' do
454
- expect(data_target.loaders).to eq ['my_loader']
455
+ expect(data_target.loaders).to eq [my_loader]
455
456
  end
456
457
 
457
458
  it 'allows for multiple loaders to be defined' do
458
- data_target.loader 'my_loader2'
459
- expect(data_target.loaders).to eq ['my_loader', 'my_loader2']
459
+ data_target.loader my_loader2
460
+ expect(data_target.loaders).to eq [my_loader, my_loader2]
460
461
  end
461
462
  end
462
463
 
@@ -505,4 +506,21 @@ describe DataTarget do
505
506
  end
506
507
  end
507
508
  end
509
+
510
+ context '#df=' do
511
+ before do
512
+ data_target.encoder my_encoder
513
+ data_target.loader my_loader
514
+ data_target.loader my_loader2
515
+
516
+ allow(my_loader).to receive(:autoload) { false }
517
+ allow(my_loader2).to receive(:autoload) { true }
518
+ end
519
+
520
+ it 'loads any loaders set to autoload' do
521
+ expect(my_loader).not_to receive :load
522
+ expect(my_loader2).to receive :load
523
+ data_target.df = Remi::DataFrame::Daru.new([])
524
+ end
525
+ end
508
526
  end
@@ -5,15 +5,15 @@ describe Extractor::FileSystem do
5
5
  now = Time.new
6
6
 
7
7
  example_files = [
8
- { pathname: "pdir/ApplicantsA-9.csv", create_time: now - 10.minutes },
9
- { pathname: "pdir/ApplicantsA-3.csv", create_time: now - 5.minutes },
10
- { pathname: "pdir/ApplicantsA-5.csv", create_time: now - 1.minutes },
11
- { pathname: "pdir/ApplicantsB-7.csv", create_time: now - 10.minutes },
12
- { pathname: "pdir/ApplicantsB-6.csv", create_time: now - 5.minutes },
13
- { pathname: "pdir/ApplicantsB-2.csv", create_time: now - 1.minutes },
14
- { pathname: "pdir/ApplicantsB-2.txt", create_time: now - 0.minutes },
15
- { pathname: "pdir/Apples.csv", createtime: now - 1.minutes },
16
- { pathname: "otherdir/ApplicantsA-11.csv", createtime: now - 1.minutes },
8
+ { pathname: "pdir/ApplicantsA-9.csv", create_time: now - 10.minutes },
9
+ { pathname: "pdir/ApplicantsA-3.csv", create_time: now - 5.minutes },
10
+ { pathname: "pdir/ApplicantsA-5.csv", create_time: now - 1.minutes },
11
+ { pathname: "pdir/ApplicantsB-7.csv", create_time: now - 10.minutes },
12
+ { pathname: "pdir/ApplicantsB-6.csv", create_time: now - 5.minutes },
13
+ { pathname: "pdir/ApplicantsB-2.csv", create_time: now - 1.minutes },
14
+ { pathname: "pdir/ApplicantsB-2.txt", create_time: now - 0.minutes },
15
+ { pathname: "pdir/Apples.csv", create_time: now - 1.minutes },
16
+ { pathname: "otherdir/ApplicantsA-11.csv", create_time: now - 1.minutes },
17
17
  ]
18
18
 
19
19
  remote_path = 'pdir'
@@ -89,6 +89,40 @@ describe Extractor::FileSystem do
89
89
  end
90
90
 
91
91
 
92
+ context 'extracting the most recent file by create time' do
93
+ before do
94
+ @params.merge!({
95
+ most_recent_within_n: 1.hour,
96
+ most_recent_only: true
97
+ })
98
+ end
99
+
100
+ it 'extracts the files within n hours of creation' do
101
+ expect(file_system.entries.map(&:name)).to match_array([
102
+ "ApplicantsB-2.txt"
103
+ ])
104
+ end
105
+ end
106
+
107
+ context 'extracting all recent files by create time' do
108
+ before do
109
+ @params.merge!({
110
+ created_within: 0.02.hours,
111
+ most_recent_only: false
112
+ })
113
+ end
114
+
115
+ it 'extracts the files within n hours of creation' do
116
+ puts @params
117
+ expect(file_system.entries.map(&:name)).to match_array([
118
+ "Apples.csv",
119
+ "ApplicantsA-5.csv",
120
+ "ApplicantsB-2.csv",
121
+ "ApplicantsB-2.txt"
122
+ ])
123
+ end
124
+ end
125
+
92
126
  context 'extracting files matching a pattern with a by group' do
93
127
  before do
94
128
  @params.merge!({
@@ -0,0 +1,133 @@
1
+ require 'remi_spec'
2
+ require 'remi/data_subjects/gsheet'
3
+
4
+ describe Extractor::Gsheet do
5
+
6
+ let(:remote_path) { '' }
7
+ let(:credentials) {
8
+ {
9
+ :client_id => 'some_client_id',
10
+ :access_token => 'some_access_token',
11
+ :refresh_token => 'some_refresh_token',
12
+ :client_secret => 'some_client_secret',
13
+ :application_name => 'some_application_name',
14
+ :project_id => 'some_project_id',
15
+ :expiration_time => '123456789'
16
+ }
17
+ }
18
+
19
+
20
+ let(:params) {
21
+ {
22
+ credentials: credentials,
23
+ folder_id: 'some_google_folder_id',
24
+ remote_path: remote_path
25
+ }
26
+ }
27
+
28
+ let(:gsheet_file) {
29
+ Extractor::Gsheet.new(params)
30
+ }
31
+
32
+ let(:response) { double('response') }
33
+ let(:remote_filenames) {["test_file_1","test_file_2"]}
34
+ let(:remote_files) do
35
+ [{name: "test_file_1", create_time:Date.current, id: "1234"},
36
+ {name: "test_file_2", create_time:Date.current, id: "5678"}]
37
+
38
+ end
39
+
40
+ context '.new' do
41
+ it 'creates an instance with valid parameters' do
42
+ gsheet_file
43
+ end
44
+
45
+ it 'requires a client_id' do
46
+ credentials.delete(:client_id)
47
+ expect { gsheet_file }.to raise_error KeyError
48
+ end
49
+
50
+ it 'requires an access_token' do
51
+ credentials.delete(:access_token)
52
+ expect { gsheet_file }.to raise_error KeyError
53
+ end
54
+
55
+ it 'requires a client_secret' do
56
+ credentials.delete(:client_secret)
57
+ expect { gsheet_file }.to raise_error KeyError
58
+ end
59
+
60
+ it 'requires a refresh_token' do
61
+ credentials.delete(:refresh_token)
62
+ expect { gsheet_file }.to raise_error KeyError
63
+ end
64
+
65
+ it 'requires a folder id' do
66
+ params.delete(:credentials)
67
+ expect { gsheet_file }.to raise_error ArgumentError
68
+ end
69
+
70
+ it 'requires an application name' do
71
+ credentials.delete(:application_name)
72
+ expect { gsheet_file }.to raise_error KeyError
73
+ end
74
+
75
+ it 'requires a project id' do
76
+ credentials.delete(:project_id)
77
+ expect { gsheet_file }.to raise_error KeyError
78
+ end
79
+
80
+ end
81
+
82
+ context '#all_entires' do
83
+ it 'returns all entries' do
84
+
85
+ allow(response).to receive(:files) { remote_files }
86
+ allow(gsheet_file).to receive(:service_list_files) { response }
87
+
88
+ expect(gsheet_file.all_entries.map(&:name)).to eq remote_filenames
89
+
90
+ end
91
+ end
92
+
93
+ context '#extract' do
94
+ it 'downloads files from google' do
95
+
96
+ allow(response).to receive(:files) { remote_files }
97
+ allow(gsheet_file).to receive(:service_list_files) { response }
98
+ expect(gsheet_file).to receive(:get_spreadsheet_vals).exactly(remote_filenames.size).times
99
+ gsheet_file.extract
100
+
101
+ end
102
+
103
+ end
104
+ end
105
+
106
+ describe Parser::Gsheet do
107
+
108
+ let(:parser) { Parser::Gsheet.new }
109
+ let(:gs_extract) { double('gs_extract') }
110
+ let(:example_data) do
111
+ [{"headers" => ["header_1", "header_2", "header_3"],
112
+ "row 1" => ["value 1", "value 2", "value 3"]
113
+ }]
114
+ end
115
+
116
+ before do
117
+ allow(gs_extract).to receive(:data) { example_data }
118
+ end
119
+
120
+ it 'converts Google Sheets response data into a dataframe' do
121
+ expect(parser.parse gs_extract).to be_a Remi::DataFrame::Daru
122
+ end
123
+
124
+ it 'converted data into the correct dataframe' do
125
+ expected_df = Daru::DataFrame.new(
126
+ :header_1 => ['value 1'],
127
+ :header_2 => ['value 2'],
128
+ :header_3 => ['value 3'],
129
+ )
130
+ expect(parser.parse(gs_extract).to_a).to eq expected_df.to_a
131
+ end
132
+
133
+ end
@@ -4,7 +4,10 @@ describe 'sub jobs' do
4
4
  before :each do
5
5
  Object.send(:remove_const, :MySubJob) if Object.constants.include?(:MySubJob)
6
6
  class MySubJob < Job
7
- source(:sub_source) {}
7
+ source :sub_source do
8
+ extractor Extractor::None.new
9
+ fields({ a: { from_sub_job: true, to_overwrite: 'from_sub_job' } })
10
+ end
8
11
  target(:sub_target) {}
9
12
  end
10
13
  end
@@ -13,21 +16,50 @@ describe 'sub jobs' do
13
16
 
14
17
 
15
18
  describe Extractor::SubJob do
16
- let(:extractor) { Extractor::SubJob.new(sub_job: sub_job, data_subject: :sub_target) }
19
+ let(:target_extractor) { Extractor::SubJob.new(sub_job: sub_job, data_subject: :sub_target) }
20
+ let(:source_extractor) { Extractor::SubJob.new(sub_job: sub_job, data_subject: :sub_source) }
17
21
 
18
22
  it 'returns the data from the sub-job' do
19
- allow(sub_job.job.sub_target).to receive(:df) { 'sub target df' }
20
- expect(extractor.extract).to eq 'sub target df'
23
+ allow(sub_job.sub_job.sub_target).to receive(:df) { 'sub target df' }
24
+ expect(target_extractor.extract).to eq 'sub target df'
25
+ end
26
+
27
+ it 'executes the sub job when target data is requested' do
28
+ expect(sub_job).to receive(:execute).once
29
+ target_extractor.extract
30
+ end
31
+
32
+ it 'does not execute the sub job when source data is requested' do
33
+ expect(sub_job).not_to receive(:execute)
34
+ source_extractor.extract
21
35
  end
22
36
  end
23
37
 
24
38
  describe Loader::SubJob do
25
- let(:loader) { Loader::SubJob.new(sub_job: sub_job, data_subject: :sub_source) }
39
+ let(:data_target) { DataTarget.new }
40
+ let(:loader) { Loader::SubJob.new(context: data_target, sub_job: sub_job, data_subject: :sub_source) }
41
+ let(:my_data_frame) { Daru::DataFrame.new({ a: [1,2,3] }) }
26
42
 
27
43
  it 'populates the sub-job data frame' do
28
- some_data_frame = Daru::DataFrame.new({ a: [1,2,3] })
29
- loader.load(some_data_frame)
30
- expect(sub_job.job.sub_source.df).to eq some_data_frame
44
+ loader.load(my_data_frame)
45
+ expect(sub_job.sub_job.sub_source.df).to eq my_data_frame
46
+ end
47
+
48
+ it 'merges fields from the parent source when requested' do
49
+ data_target.fields({ a: { from_parent: :true, to_overwrite: 'from_parent' } })
50
+ loader.load(my_data_frame)
51
+ expect(sub_job.sub_job.sub_source.fields).to eq MySubJob.new.sub_source.fields.merge data_target.fields
52
+ end
53
+
54
+ it 'does not merge fields from the parent source when requested' do
55
+ loader.merge_fields = false
56
+ data_target.fields({ a: { from_parent: :true, to_overwrite: 'from_parent' } })
57
+ loader.load(my_data_frame)
58
+ expect(sub_job.sub_job.sub_source.fields).to eq MySubJob.new.sub_source.fields
59
+ end
60
+
61
+ it 'autoloads the target' do
62
+ expect(loader.autoload).to be true
31
63
  end
32
64
  end
33
65
  end
data/spec/job_spec.rb CHANGED
@@ -70,11 +70,11 @@ describe Job do
70
70
  class MyJob
71
71
  sub_job(:my_sub_job) { 'something' }
72
72
  end
73
- expect { job.my_sub_job.job }.to raise_error ArgumentError
73
+ expect { job.my_sub_job.sub_job }.to raise_error ArgumentError
74
74
  end
75
75
 
76
76
  it 'returns a Remi job' do
77
- expect(job.my_sub_job.job).to be_a Remi::Job
77
+ expect(job.my_sub_job.sub_job).to be_a Remi::Job
78
78
  end
79
79
  end
80
80
 
@@ -276,6 +276,11 @@ describe Job do
276
276
  expect(job.params[:my_param]).to eq 'instantiated'
277
277
  end
278
278
 
279
+ it 'works with booleans too' do
280
+ other_job = MyJob.new(my_param: false)
281
+ expect(other_job.params[:my_param]).to eq false
282
+ end
283
+
279
284
  it 'does not affect the values of other instances' do
280
285
  job
281
286
  other_job = MyJob.new
@@ -295,17 +300,12 @@ describe Job do
295
300
  context '#execute' do
296
301
  before do
297
302
  class MyJob
298
- transform :transform_one do
299
- end
300
-
301
- transform :transform_two do
302
- end
303
-
304
- target :target_one do
305
- end
306
-
307
- target :target_two do
308
- end
303
+ transform(:transform_one) {}
304
+ transform(:transform_two) {}
305
+ sub_job(:sub_job_one) { Remi::Job.new }
306
+ sub_job(:sub_job_two) { Remi::Job.new }
307
+ target(:target_one) {}
308
+ target(:target_two) {}
309
309
  end
310
310
  end
311
311
 
@@ -314,6 +314,11 @@ describe Job do
314
314
  job.execute
315
315
  end
316
316
 
317
+ it 'executes all subjobs' do
318
+ expect(job).to receive(:execute_sub_jobs)
319
+ job.execute
320
+ end
321
+
317
322
  it 'executes load all targets' do
318
323
  expect(job).to receive(:execute_load_targets)
319
324
  job.execute
@@ -330,12 +335,39 @@ describe Job do
330
335
  job.execute(:transforms)
331
336
  end
332
337
 
338
+ it 'does not execute all sub jobs' do
339
+ expect(job).not_to receive(:execute_sub_jobs)
340
+ job.execute(:transforms)
341
+ end
342
+
333
343
  it 'does not load all targets' do
334
344
  expect(job).not_to receive(:execute_load_targets)
335
345
  job.execute(:transforms)
336
346
  end
337
347
  end
338
348
 
349
+ context '#execute(:sub_jobs)' do
350
+ it 'executes all sub_jobs' do
351
+ [:sub_job_one, :sub_job_two].each do |sub_job_name|
352
+ sub_job = instance_double(Job::SubJob)
353
+ expect(sub_job).to receive(:execute)
354
+ expect(job).to receive(sub_job_name) .and_return(sub_job)
355
+ end
356
+
357
+ job.execute(:sub_jobs)
358
+ end
359
+
360
+ it 'does not execute all transforms' do
361
+ expect(job).not_to receive(:execute_transforms)
362
+ job.execute(:sub_jobs)
363
+ end
364
+
365
+ it 'does not load all targets' do
366
+ expect(job).not_to receive(:execute_load_targets)
367
+ job.execute(:sub_jobs)
368
+ end
369
+ end
370
+
339
371
  context '#execute(:load_targets)' do
340
372
  it 'loads all targets' do
341
373
  [:target_one, :target_two].each do |target_name|
@@ -351,6 +383,11 @@ describe Job do
351
383
  expect(job).not_to receive(:execute_transforms)
352
384
  job.execute(:load_targets)
353
385
  end
386
+
387
+ it 'does not execute all sub jobs' do
388
+ expect(job).not_to receive(:execute_sub_jobs)
389
+ job.execute(:load_targets)
390
+ end
354
391
  end
355
392
  end
356
393
 
@@ -469,9 +506,9 @@ describe Job do
469
506
  Job::SubJob.new { scoped_sub_job }
470
507
  end
471
508
 
472
- context '#job' do
509
+ context '#sub_job' do
473
510
  it 'returns the job instance for the sub job' do
474
- expect(job_sub_job.job).to eq sub_job
511
+ expect(job_sub_job.sub_job).to eq sub_job
475
512
  end
476
513
  end
477
514
 
@@ -494,6 +531,12 @@ describe Job do
494
531
  expect(sub_job).to receive(:execute)
495
532
  job_sub_job.execute
496
533
  end
534
+
535
+ it 'only executes the sub job once' do
536
+ expect(sub_job).to receive(:execute).once
537
+ job_sub_job.execute
538
+ job_sub_job.execute
539
+ end
497
540
  end
498
541
 
499
542
  context '#execute_transforms' do