remi 0.2.42 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +7 -0
  3. data/Gemfile +1 -1
  4. data/Gemfile.lock +13 -26
  5. data/README.md +1 -1
  6. data/features/step_definitions/remi_step.rb +33 -13
  7. data/features/sub_job_example.feature +24 -0
  8. data/features/sub_transform_example.feature +35 -0
  9. data/features/sub_transform_many_to_many.feature +49 -0
  10. data/features/support/env_app.rb +1 -1
  11. data/jobs/all_jobs_shared.rb +19 -16
  12. data/jobs/copy_source_job.rb +11 -9
  13. data/jobs/csv_file_target_job.rb +10 -9
  14. data/jobs/json_job.rb +18 -14
  15. data/jobs/metadata_job.rb +33 -28
  16. data/jobs/parameters_job.rb +14 -11
  17. data/jobs/sample_job.rb +106 -77
  18. data/jobs/sftp_file_target_job.rb +14 -13
  19. data/jobs/sub_job_example_job.rb +86 -0
  20. data/jobs/sub_transform_example_job.rb +43 -0
  21. data/jobs/sub_transform_many_to_many_job.rb +46 -0
  22. data/jobs/transforms/concatenate_job.rb +16 -12
  23. data/jobs/transforms/data_frame_sieve_job.rb +24 -19
  24. data/jobs/transforms/date_diff_job.rb +15 -11
  25. data/jobs/transforms/nvl_job.rb +16 -12
  26. data/jobs/transforms/parse_date_job.rb +17 -14
  27. data/jobs/transforms/partitioner_job.rb +27 -19
  28. data/jobs/transforms/prefix_job.rb +13 -10
  29. data/jobs/transforms/truncate_job.rb +14 -10
  30. data/jobs/transforms/truthy_job.rb +11 -8
  31. data/lib/remi.rb +25 -11
  32. data/lib/remi/data_frame.rb +4 -4
  33. data/lib/remi/data_frame/daru.rb +1 -37
  34. data/lib/remi/data_subject.rb +234 -48
  35. data/lib/remi/data_subjects/csv_file.rb +171 -0
  36. data/lib/remi/data_subjects/data_frame.rb +106 -0
  37. data/lib/remi/data_subjects/file_system.rb +115 -0
  38. data/lib/remi/data_subjects/local_file.rb +109 -0
  39. data/lib/remi/data_subjects/none.rb +31 -0
  40. data/lib/remi/data_subjects/postgres.rb +186 -0
  41. data/lib/remi/data_subjects/s3_file.rb +84 -0
  42. data/lib/remi/data_subjects/salesforce.rb +211 -0
  43. data/lib/remi/data_subjects/sftp_file.rb +196 -0
  44. data/lib/remi/data_subjects/sub_job.rb +50 -0
  45. data/lib/remi/dsl.rb +74 -0
  46. data/lib/remi/encoder.rb +45 -0
  47. data/lib/remi/extractor.rb +21 -0
  48. data/lib/remi/field_symbolizers.rb +1 -0
  49. data/lib/remi/job.rb +279 -113
  50. data/lib/remi/job/parameters.rb +90 -0
  51. data/lib/remi/job/sub_job.rb +35 -0
  52. data/lib/remi/job/transform.rb +165 -0
  53. data/lib/remi/loader.rb +22 -0
  54. data/lib/remi/monkeys/daru.rb +4 -0
  55. data/lib/remi/parser.rb +44 -0
  56. data/lib/remi/testing/business_rules.rb +17 -23
  57. data/lib/remi/testing/data_stub.rb +2 -2
  58. data/lib/remi/version.rb +1 -1
  59. data/remi.gemspec +3 -0
  60. data/spec/data_subject_spec.rb +475 -11
  61. data/spec/data_subjects/csv_file_spec.rb +69 -0
  62. data/spec/data_subjects/data_frame_spec.rb +52 -0
  63. data/spec/{extractor → data_subjects}/file_system_spec.rb +0 -0
  64. data/spec/{extractor → data_subjects}/local_file_spec.rb +0 -0
  65. data/spec/data_subjects/none_spec.rb +41 -0
  66. data/spec/data_subjects/postgres_spec.rb +80 -0
  67. data/spec/{extractor → data_subjects}/s3_file_spec.rb +0 -0
  68. data/spec/data_subjects/salesforce_spec.rb +117 -0
  69. data/spec/{extractor → data_subjects}/sftp_file_spec.rb +16 -0
  70. data/spec/data_subjects/sub_job_spec.rb +33 -0
  71. data/spec/encoder_spec.rb +38 -0
  72. data/spec/extractor_spec.rb +11 -0
  73. data/spec/fixtures/sf_bulk_helper_stubs.rb +443 -0
  74. data/spec/job/transform_spec.rb +257 -0
  75. data/spec/job_spec.rb +507 -0
  76. data/spec/loader_spec.rb +11 -0
  77. data/spec/parser_spec.rb +38 -0
  78. data/spec/sf_bulk_helper_spec.rb +117 -0
  79. data/spec/testing/data_stub_spec.rb +5 -3
  80. metadata +109 -27
  81. data/features/aggregate.feature +0 -42
  82. data/jobs/aggregate_job.rb +0 -31
  83. data/jobs/transforms/transform_jobs.rb +0 -4
  84. data/lib/remi/data_subject/csv_file.rb +0 -162
  85. data/lib/remi/data_subject/data_frame.rb +0 -52
  86. data/lib/remi/data_subject/postgres.rb +0 -134
  87. data/lib/remi/data_subject/salesforce.rb +0 -136
  88. data/lib/remi/data_subject/sftp_file.rb +0 -65
  89. data/lib/remi/extractor/file_system.rb +0 -92
  90. data/lib/remi/extractor/local_file.rb +0 -43
  91. data/lib/remi/extractor/s3_file.rb +0 -57
  92. data/lib/remi/extractor/sftp_file.rb +0 -83
  93. data/spec/data_subject/csv_file_spec.rb +0 -79
  94. data/spec/data_subject/data_frame.rb +0 -27
@@ -0,0 +1,69 @@
1
+ require_relative '../remi_spec'
2
+
3
+ describe Parser::CsvFile do
4
+
5
+ let(:basic_file) { 'spec/fixtures/basic.csv' }
6
+ let(:basic_dataframe) do
7
+ Remi::DataFrame::Daru.new(
8
+ {
9
+ column_a: ['value 1A', 'value 2A'],
10
+ column_b: ['value 1B', 'value 2B']
11
+ }
12
+ )
13
+ end
14
+
15
+ it 'converts a CSV into a dataframe' do
16
+ csv = Parser::CsvFile.new
17
+ expect(csv.parse(basic_file).to_a).to eq basic_dataframe.to_a
18
+ end
19
+
20
+ it 'adds filename when requested' do
21
+ csv = Parser::CsvFile.new(
22
+ filename_field: :from_file
23
+ )
24
+
25
+ expected_files = [Pathname.new(basic_file).to_s] * 2
26
+ expect(csv.parse(basic_file)[:from_file].to_a).to eq expected_files
27
+ end
28
+
29
+ it 'preprocesses records when required' do
30
+ csv = Parser::CsvFile.new(
31
+ preprocessor: ->(line) { line.gsub(/\\"/,'""') }
32
+ )
33
+
34
+ bad_escape_file = 'spec/fixtures/unsupported_escape.csv'
35
+
36
+ expected_df = Remi::DataFrame::Daru.new(
37
+ {
38
+ column_a: ['value 1A', 'value 2A'],
39
+ column_b: ['value "1B"', 'value "2B"']
40
+ }
41
+ )
42
+ expect(csv.parse(bad_escape_file).to_a).to eq expected_df.to_a
43
+ end
44
+
45
+ it 'accepts standard Ruby CSV options' do
46
+ csv = Parser::CsvFile.new(
47
+ preprocessor: ->(line) { line.gsub(/,/,'|') },
48
+ csv_options: { col_sep: '|' }
49
+ )
50
+
51
+ expect(csv.parse(basic_file).to_a).to eq basic_dataframe.to_a
52
+ end
53
+
54
+ it 'combines multiple csv files into a single dataframe' do
55
+ csv = Parser::CsvFile.new
56
+ two_files = ['spec/fixtures/basic.csv', 'spec/fixtures/basic2.csv']
57
+
58
+ expected_df = Remi::DataFrame::Daru.new(
59
+ {
60
+ column_a: ['value 1A', 'value 2A', 'value 1A', 'value 2A'],
61
+ column_b: ['value 1B', 'value 2B', nil, nil],
62
+ column_c: [nil, nil, 'value 1C', 'value 2C']
63
+ }
64
+ )
65
+
66
+ expect(csv.parse(two_files).to_a).to eq expected_df.to_a
67
+ end
68
+
69
+ end
@@ -0,0 +1,52 @@
1
+ require_relative '../remi_spec'
2
+
3
+ describe Extractor::DataFrame do
4
+ let(:extractor) { Extractor::DataFrame.new(data: data) }
5
+ let(:data) { 'some_data' }
6
+
7
+ context '#data' do
8
+ it 'returns the raw data' do
9
+ expect(extractor.extract.data).to eq data
10
+ end
11
+ end
12
+ end
13
+
14
+ describe Parser::DataFrame do
15
+ let(:fields) do
16
+ {
17
+ brewer: { type: 'text' },
18
+ style: { type: 'text' }
19
+ }
20
+ end
21
+ let(:parser) { Parser::DataFrame.new(fields: fields) }
22
+ let(:df_extract) { double('df_extract') }
23
+ let(:data) {
24
+ [
25
+ [ 'Baerlic', 'IPA' ],
26
+ [ 'Ex Novo', 'Red' ]
27
+ ]
28
+ }
29
+
30
+ before do
31
+ allow(df_extract).to receive(:data) { data }
32
+ end
33
+
34
+ it 'converts the data array into a dataframe' do
35
+ expect(parser.parse df_extract).to be_a Remi::DataFrame::Daru
36
+ end
37
+
38
+ it 'converts the data array into the dataframe' do
39
+ expected_df = Daru::DataFrame.new(
40
+ :brewer => ['Baerlic', 'Ex Novo'],
41
+ :style => ['IPA', 'Red']
42
+ )
43
+ expect(parser.parse(df_extract).to_a).to eq expected_df.to_a
44
+ end
45
+
46
+ end
47
+
48
+ describe Encoder::DataFrame, skip: 'todo' do
49
+ end
50
+
51
+ describe Loader::DataFrame, skip: 'todo' do
52
+ end
@@ -0,0 +1,41 @@
1
+ require_relative '../remi_spec'
2
+
3
+ describe Extractor::None do
4
+ let(:extractor) { Extractor::None.new }
5
+
6
+ context '#extract' do
7
+ it 'does nothing' do
8
+ expect(extractor.extract).to be nil
9
+ end
10
+ end
11
+ end
12
+
13
+ describe Parser::None do
14
+ let(:parser) { Parser::None.new }
15
+
16
+ context '#parse' do
17
+ it 'returns what it is given' do
18
+ expect(parser.parse('some data')).to eq 'some data'
19
+ end
20
+ end
21
+ end
22
+
23
+ describe Encoder::None do
24
+ let(:encoder) { Encoder::None.new }
25
+
26
+ context '#encode' do
27
+ it 'returns what it is given' do
28
+ expect(encoder.encode('some data')).to eq 'some data'
29
+ end
30
+ end
31
+ end
32
+
33
+ describe Loader::None do
34
+ let(:loader) { Loader::None.new }
35
+
36
+ context '#loader' do
37
+ it 'does nothing' do
38
+ expect(loader.load('some data')).to be true
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,80 @@
1
+ require_relative '../remi_spec'
2
+
3
+ describe Extractor::Postgres do
4
+ let(:extractor) { Extractor::Postgres.new(credentials: {}, query: 'some_query') }
5
+ let(:pg_conn) { double('pg_conn') }
6
+ let(:data) { 'some postgres data' }
7
+
8
+ before do
9
+ allow(pg_conn).to receive(:exec) { data }
10
+ allow(extractor).to receive(:connection) { pg_conn }
11
+ end
12
+
13
+ context '#data' do
14
+ it 'returns extracted data' do
15
+ expect(extractor.extract.data).to eq data
16
+ end
17
+ end
18
+ end
19
+
20
+
21
+ describe Parser::Postgres do
22
+ let(:parser) { Parser::Postgres.new }
23
+ let(:pg_extract) { double('pg_extract') }
24
+ let(:data) do
25
+ [
26
+ { 'brewer' => 'Baerlic', 'style' => 'IPA', 'quantity' => 5 },
27
+ { 'brewer' => 'Ex Novo', 'style' => 'Red', 'quantity' => 3 }
28
+ ]
29
+ end
30
+
31
+ before do
32
+ allow(pg_extract).to receive(:data) { data }
33
+ end
34
+
35
+ it 'converts postgres response data into a dataframe' do
36
+ expect(parser.parse pg_extract).to be_a Remi::DataFrame::Daru
37
+ end
38
+
39
+ it 'converted data into the correct dataframe' do
40
+ expected_df = Daru::DataFrame.new(
41
+ :brewer => ['Baerlic', 'Ex Novo'],
42
+ :style => ['IPA', 'Red'],
43
+ :quantity => [5, 3]
44
+ )
45
+ expect(parser.parse(pg_extract).to_a).to eq expected_df.to_a
46
+ end
47
+ end
48
+
49
+
50
+ describe Encoder::Postgres do
51
+ let(:fields) do
52
+ {
53
+ brewer: { type: 'text' },
54
+ style: { type: 'text' },
55
+ quantity: { type: 'integer' }
56
+ }
57
+ end
58
+ let(:encoder) { Encoder::Postgres.new(fields: fields) }
59
+ let(:dataframe) do
60
+ expected_df = Daru::DataFrame.new(
61
+ :brewer => ['Baerlic', 'Ex Novo'],
62
+ :style => ['IPA', 'Red'],
63
+ :quantity => [5, 3]
64
+ )
65
+ end
66
+
67
+ it 'converts the dataframe into an array of strings to be used by the loader' do
68
+ expect(encoder.encode(dataframe).values).to eq [
69
+ "Baerlic\tIPA\t5",
70
+ "Ex Novo\tRed\t3"
71
+ ]
72
+ end
73
+
74
+ it 'builds the field ddl' do
75
+ expect(encoder.encode(dataframe).ddl_fields).to eq 'brewer text, style text, quantity integer'
76
+ end
77
+ end
78
+
79
+ describe Loader::Postgres, skip: 'todo' do
80
+ end
@@ -0,0 +1,117 @@
1
+ require_relative '../remi_spec'
2
+ require 'remi/data_subjects/salesforce.rb'
3
+
4
+ describe Extractor::Salesforce do
5
+ let(:extractor) { Extractor::Salesforce.new(object: :Contact, credentials: {}, query: '') }
6
+ let(:sf_bulk) { double('sf_bulk') }
7
+ let(:data) do
8
+ {
9
+ 'batches' => [
10
+ {
11
+ 'id' => ['751160000065e2BAAQ'],
12
+ 'state' => [ 'Completed' ]
13
+ }
14
+ ]
15
+ }
16
+ end
17
+
18
+ before do
19
+ allow(extractor).to receive(:sf_bulk) { sf_bulk }
20
+ allow(sf_bulk).to receive(:query) { data }
21
+ end
22
+
23
+ context '#data' do
24
+ it 'returns extracted data' do
25
+ expect(extractor.extract.data).to eq data
26
+ end
27
+ end
28
+
29
+
30
+ it 'raises an error if the batch fails' do
31
+ data['batches'].first['state'] = ['Error']
32
+ expect { extractor.extract }.to raise_error Extractor::Salesforce::ExtractError
33
+ end
34
+ end
35
+
36
+
37
+ describe Parser::Salesforce do
38
+ let(:parser) { Parser::Salesforce.new }
39
+ let(:sf_extract) { double('sf_extract') }
40
+ let(:data) do
41
+ {
42
+ 'batches' => [
43
+ {
44
+ 'id' => ['751160000065e2BAAQ'],
45
+ 'state' => [ 'Completed' ],
46
+ 'response' => [
47
+ {
48
+ "xsi:type" => "sObject",
49
+ "type" => [
50
+ "Contact"
51
+ ],
52
+ "Id" => [
53
+ "003G000001cKYaUIA4",
54
+ "003G000001cKYaUIA4"
55
+ ],
56
+ "Student_ID__c" => [
57
+ "FJD385628"
58
+ ]
59
+ },
60
+ {
61
+ "xsi:type" => "sObject",
62
+ "type" => [
63
+ "Contact"
64
+ ],
65
+ "Id" => [
66
+ "003G000001cKYbXIA4",
67
+ "003G000001cKYbXIA4"
68
+ ],
69
+ "Student_ID__c" => [
70
+ { 'xsi:nil' => 'true' }
71
+ ]
72
+ }
73
+ ]
74
+ }
75
+ ]
76
+ }
77
+ end
78
+
79
+ before do
80
+ allow(sf_extract).to receive(:data) { data }
81
+ end
82
+
83
+ it 'converts SalesforceBulkApi response data into a dataframe' do
84
+ expect(parser.parse sf_extract).to be_a Remi::DataFrame::Daru
85
+ end
86
+
87
+ it 'converted data into the correct dataframe' do
88
+ expected_df = Daru::DataFrame.new(
89
+ :Id => ['003G000001cKYaUIA4', '003G000001cKYbXIA4'],
90
+ :Student_ID__c => ['FJD385628', nil]
91
+ )
92
+ expect(parser.parse(sf_extract).to_a).to eq expected_df.to_a
93
+ end
94
+ end
95
+
96
+
97
+ describe Encoder::Salesforce do
98
+ let(:encoder) { Encoder::Salesforce.new }
99
+ let(:dataframe) do
100
+ Daru::DataFrame.new(
101
+ :Id => ['003G000001cKYaUIA4', '003G000001cKYbXIA4'],
102
+ :Student_ID__c => ['FJD385628', nil]
103
+ )
104
+ end
105
+
106
+ it 'converts the dataframe into an array of hashes' do
107
+ expected_result = [
108
+ { :Id => '003G000001cKYaUIA4', :Student_ID__c => 'FJD385628' },
109
+ { :Id => '003G000001cKYbXIA4', :Student_ID__c => nil },
110
+ ]
111
+ expect(encoder.encode dataframe).to eq expected_result
112
+ end
113
+ end
114
+
115
+
116
+ describe Loader::Salesforce, skip: 'todo' do
117
+ end
@@ -82,3 +82,19 @@ describe Extractor::SftpFile do
82
82
  end
83
83
  end
84
84
  end
85
+
86
+
87
+ describe Loader::SftpFile do
88
+ let(:loader) { Loader::SftpFile.new(credentials: {}, remote_path: 'some_path') }
89
+ let(:data) { double('some_data') }
90
+ let(:sftp_session) { instance_double('Net:SFTP::Session') }
91
+
92
+ before do
93
+ allow(Net::SFTP).to receive(:start).and_yield sftp_session
94
+ end
95
+
96
+ it 'loads a csv to a target sftp filesystem' do
97
+ expect(sftp_session).to receive(:upload!).with(data, 'some_path')
98
+ loader.load data
99
+ end
100
+ end
@@ -0,0 +1,33 @@
1
+ require_relative '../remi_spec'
2
+
3
+ describe 'sub jobs' do
4
+ before :each do
5
+ Object.send(:remove_const, :MySubJob) if Object.constants.include?(:MySubJob)
6
+ class MySubJob < Job
7
+ source(:sub_source) {}
8
+ target(:sub_target) {}
9
+ end
10
+ end
11
+
12
+ let(:sub_job) { Job::SubJob.new { MySubJob.new } }
13
+
14
+
15
+ describe Extractor::SubJob do
16
+ let(:extractor) { Extractor::SubJob.new(sub_job: sub_job, data_subject: :sub_target) }
17
+
18
+ it 'returns the data from the sub-job' do
19
+ allow(sub_job.job.sub_target).to receive(:df) { 'sub target df' }
20
+ expect(extractor.extract).to eq 'sub target df'
21
+ end
22
+ end
23
+
24
+ describe Loader::SubJob do
25
+ let(:loader) { Loader::SubJob.new(sub_job: sub_job, data_subject: :sub_source) }
26
+
27
+ it 'populates the sub-job data frame' do
28
+ some_data_frame = Daru::DataFrame.new({ a: [1,2,3] })
29
+ loader.load(some_data_frame)
30
+ expect(sub_job.job.sub_source.df).to eq some_data_frame
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,38 @@
1
+ require_relative 'remi_spec'
2
+
3
+ describe Remi::Encoder do
4
+ let(:field_symbolizer) { double('field_symbolizer') }
5
+ let(:context) { double('context') }
6
+ let(:fields) { double('fields') }
7
+ let(:encoder) { Encoder.new(context: context, fields: fields, field_symbolizer: field_symbolizer) }
8
+
9
+ context '#encode' do
10
+ it 'has an encode method' do
11
+ expect(encoder).respond_to? :encode
12
+ end
13
+ end
14
+
15
+ context '#field_symbolizer' do
16
+ it 'can be set in the constructor' do
17
+ expect(encoder.field_symbolizer).to eq field_symbolizer
18
+ end
19
+
20
+ it 'the field_symbolizer defined in the context takes priority' do
21
+ symbolizer_from_context = double('symbolizer_from_context')
22
+ allow(context).to receive(:field_symbolizer) { symbolizer_from_context }
23
+ expect(encoder.field_symbolizer).to eq symbolizer_from_context
24
+ end
25
+ end
26
+
27
+ context '#fields' do
28
+ it 'can be set in the constructor' do
29
+ expect(encoder.fields).to eq fields
30
+ end
31
+
32
+ it 'the field_symbolizer defined in the context takes priority' do
33
+ fields_from_context = double('fields_from_context')
34
+ allow(context).to receive(:fields) { fields_from_context }
35
+ expect(encoder.fields).to eq fields_from_context
36
+ end
37
+ end
38
+ end