remi 0.2.39 → 0.2.40

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fc73b233d7181b3e40c96c5da2d52bf8d81d99a8
4
- data.tar.gz: 536421bb23bf564eeb4394f3046467fe1dd9adf4
3
+ metadata.gz: d9f919d918cc6c2b83a8f6849a30a111ddf07e87
4
+ data.tar.gz: 7eeaee62f683ee9fd0851e8c61cd47fab8a7f1bd
5
5
  SHA512:
6
- metadata.gz: 08307536b89a25d60ddfdaad8595073b90e67e9094f22c709b3005abca6fec4bed5a838270579b4f81df1e803b8fba31c75f5ddb4bb1d263a3d5157006396d90
7
- data.tar.gz: 344321318f9b76b95af19b8ff0c58d0c528da3bdeed6e15f6f60276513a2c24677cca360cc930d26087394a66082d5047b40706e0a87c8a4b9d5f20788e1ee64
6
+ metadata.gz: eecc73e562cf266445cf85f7c144424a730f71d95ea588b9ad7c75fc2aba0a09c0aba7440c78639c7433340e3e89888c329aec17660471038996763593e50e45
7
+ data.tar.gz: 96852fba98c17a79ef1763ece9f5b5cb9893cca9acf5c21da4ac09e352a1fcc2cb8e8f760836f3111493ef3f4b6258f01688b10b8c14952a7978e51623f70653
data/Gemfile.lock CHANGED
@@ -19,7 +19,7 @@ GIT
19
19
  PATH
20
20
  remote: .
21
21
  specs:
22
- remi (0.2.39)
22
+ remi (0.2.40)
23
23
  activesupport (~> 4.2)
24
24
  bond (~> 0.5)
25
25
  cucumber (~> 2.1)
@@ -9,9 +9,35 @@ Feature: This tests the application of metadata.
9
9
  And the source 'Source Data'
10
10
  And the target 'Target Data'
11
11
 
12
- And the following example record for 'Source Data':
13
- | activity_id | student_id | student_dob | activity_type | activity_counter | activity_score | activity_cost | activity_date | source_filename |
14
- | 1 | 1 | 3/3/1998 | A | 1 | 3.8 | 12.23 | 1/3/2016 03:22:36 | one.csv |
15
12
 
16
13
  Scenario: Metadata is used to parse date fields
17
- Then the target field 'student_dob' is set to the value "1998-03-03"
14
+
15
+ Given the following example record for 'Source Data':
16
+ | activity_id | student_id | student_dob | activity_type | activity_counter | activity_score | activity_cost | activity_date | source_filename |
17
+ | 1 | 1 | 3/3/1998 | A | 1 | 3.8 | 12.23 | 1/3/2016 03:22:36 | one.csv |
18
+
19
+ Then the target should match the example:
20
+ | activity_id | student_id | student_dob | activity_type | activity_counter | activity_score | activity_cost | activity_date | source_filename |
21
+ | 1 | 1 | 1998-03-03 | A | 1 | 3.8 | 12.23 | 2016-01-03 03:22:36 +0000 | one.csv |
22
+
23
+ Scenario Outline: Metadata is used to stub records with values that conform to the metadata
24
+
25
+ Then the target field '<Field>' is set to the value "<Class>"
26
+
27
+ Examples:
28
+ | Field | Class |
29
+ | activity_id_class | String |
30
+ | student_id_class | String |
31
+ | student_dob_class | Date |
32
+ | activity_type_class | String |
33
+ | activity_counter_class | Fixnum |
34
+ | activity_score_class | Float |
35
+ | activity_cost_class | Float |
36
+ | activity_date_class | Time |
37
+ | source_filename_class | String |
38
+
39
+
40
+ Scenario: Metadata for decimals is stubbed
41
+
42
+ Then the target field 'activity_cost_precision' is populated with "8"
43
+ And the target field 'activity_cost_scale' is populated with "2"
@@ -4,7 +4,7 @@
4
4
  ### Job and background setup
5
5
 
6
6
  Given /^the job is '([[:alnum:]\s]+)'$/ do |arg|
7
- @brt = Remi::BusinessRules::Tester.new(arg)
7
+ @brt = Remi::Testing::BusinessRules::Tester.new(arg)
8
8
  end
9
9
 
10
10
  Given /^the job source '([[:alnum:]\s\-_]+)'$/ do |arg|
@@ -64,13 +64,13 @@ Then /^the file that comes last in an alphanumeric sort by group will be downloa
64
64
  end
65
65
 
66
66
  Then /^the file is uploaded to the remote path "([^"]+)"$/ do |remote_path|
67
- expect(@brt.target.get_attrib(:remote_path)).to eq Remi::BusinessRules::ParseFormula.parse(remote_path)
67
+ expect(@brt.target.get_attrib(:remote_path)).to eq Remi::Testing::BusinessRules::ParseFormula.parse(remote_path)
68
68
  end
69
69
 
70
70
  ## CSV Options
71
71
 
72
72
  Given /^the (source|target) file is delimited with a (\w+)$/ do |st, delimiter|
73
- expect(@brt.send(st.to_sym).csv_options[:col_sep]).to eq Remi::BusinessRules.csv_opt_map[delimiter]
73
+ expect(@brt.send(st.to_sym).csv_options[:col_sep]).to eq Remi::Testing::BusinessRules.csv_opt_map[delimiter]
74
74
  end
75
75
 
76
76
  Given /^the (source|target) file is encoded using "([^"]+)" format$/ do |st, encoding|
@@ -78,15 +78,15 @@ Given /^the (source|target) file is encoded using "([^"]+)" format$/ do |st, enc
78
78
  end
79
79
 
80
80
  Given /^the (source|target) file uses a ([\w ]+) to quote embedded delimiters$/ do |st, quote_char|
81
- expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::BusinessRules.csv_opt_map[quote_char]
81
+ expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::Testing::BusinessRules.csv_opt_map[quote_char]
82
82
  end
83
83
 
84
84
  Given /^the (source|target) file uses a preceding ([\w ]+) to escape an embedded quoting character$/ do |st, escape_char|
85
- expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::BusinessRules.csv_opt_map[escape_char]
85
+ expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::Testing::BusinessRules.csv_opt_map[escape_char]
86
86
  end
87
87
 
88
88
  Given /^the (source|target) file uses ([\w ]+) line endings$/ do |st, line_endings|
89
- expect(@brt.send(st.to_sym).csv_options[:row_sep]).to eq Remi::BusinessRules.csv_opt_map[line_endings]
89
+ expect(@brt.send(st.to_sym).csv_options[:row_sep]).to eq Remi::Testing::BusinessRules.csv_opt_map[line_endings]
90
90
  end
91
91
 
92
92
  Given /^the (source|target) file uses "([^"]+)" as a record separator$/ do |st, line_endings|
@@ -130,7 +130,7 @@ Given /^the source field '([^']+)' (?:has|is set to) the value "([^"]*)"$/ do |s
130
130
  step "the source field '#{source_field}'"
131
131
 
132
132
  source_name, source_field_name = @brt.sources.parse_full_field(source_field)
133
- @brt.sources[source_name].fields[source_field_name].value = Remi::BusinessRules::ParseFormula.parse(value)
133
+ @brt.sources[source_name].fields[source_field_name].value = Remi::Testing::BusinessRules::ParseFormula.parse(value)
134
134
  end
135
135
 
136
136
  Given /^the source field (?:has|is set to) the value "([^"]*)"$/ do |value|
@@ -143,7 +143,7 @@ Given /^the source field '([^']+)' (?:has|is set to) the multiline value$/ do |s
143
143
  step "the source field '#{source_field}'"
144
144
 
145
145
  source_name, source_field_name = @brt.sources.parse_full_field(source_field)
146
- @brt.sources[source_name].fields[source_field_name].value = Remi::BusinessRules::ParseFormula.parse(value)
146
+ @brt.sources[source_name].fields[source_field_name].value = Remi::Testing::BusinessRules::ParseFormula.parse(value)
147
147
  end
148
148
 
149
149
  Given /^the source field (?:has|is set to) the multiline value$/ do |value|
@@ -266,7 +266,7 @@ Then /^the target field '([^']+)' is (?:set to the value|populated with) "([^"]*
266
266
  @brt.run_transforms
267
267
  }.not_to raise_error
268
268
  Array(target_names).each do |target_name|
269
- expect(@brt.targets[target_name].fields[target_field_name].values.uniq).to eq [Remi::BusinessRules::ParseFormula.parse(value)]
269
+ expect(@brt.targets[target_name].fields[target_field_name].values.uniq).to eq [Remi::Testing::BusinessRules::ParseFormula.parse(value)]
270
270
  end
271
271
  }
272
272
  end
@@ -12,6 +12,7 @@ Feature: Tests the parse_date transform
12
12
  Given the source field 'Date String' has the value "<Date String>"
13
13
  And the job parameter 'format' is "<Format>"
14
14
  Then the target field 'Parsed Date' is set to the value "<Parsed Date>"
15
+
15
16
  Examples:
16
17
  | Date String | Format | Parsed Date |
17
18
  | 2015-10-21 | %Y-%m-%d | 2015-10-21 |
data/jobs/metadata_job.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require_relative 'all_jobs_shared'
2
+ ENV['TZ'] = 'UTC'
2
3
 
3
4
  class MetadataJob
4
5
  include AllJobsShared
@@ -11,7 +12,7 @@ class MetadataJob
11
12
  :activity_type => { from: 'in', in: true, type: :string, valid_values: ['A', 'B', 'C'], cdc_type: 2 },
12
13
  :activity_counter => { from: 'in', in: true, type: :integer, cdc_type: 2 },
13
14
  :activity_score => { from: 'in', in: true, type: :float, cdc_type: 2 },
14
- :activity_cost => { from: 'in', in: true, type: :decimal, precision: 16, scale: 2, cdc_type: 2 },
15
+ :activity_cost => { from: 'in', in: true, type: :decimal, precision: 8, scale: 2, cdc_type: 2 },
15
16
  :activity_date => { from: 'in', in: true, type: :datetime, in_format: '%m/%d/%Y %H:%M:%S', out_format: '%Y-%m-%dT%H:%M:%S', cdc_type: 2 },
16
17
  :source_filename => { from: 'in', in: true, type: :string, cdc_type: 1 }
17
18
  }
@@ -25,36 +26,28 @@ class MetadataJob
25
26
  :activity_type => { from: 'out', out: true, type: :string, valid_values: ['A', 'B', 'C'] },
26
27
  :activity_counter => { from: 'out', out: true, type: :integer },
27
28
  :activity_score => { from: 'out', out: true, type: :float },
28
- :activity_cost => { from: 'out', out: true, type: :decimal, precision: 16, scale: 2 },
29
+ :activity_cost => { from: 'out', out: true, type: :decimal, precision: 8, scale: 2 },
29
30
  :activity_date => { from: 'out', out: true, type: :datetime, in_format: '%m/%d/%Y %H:%M:%S', out_format: '%Y-%m-%dT%H:%M:%S' },
30
31
  :source_filename => { from: 'out', out: true, type: :string, cdc_type: 1 }
31
32
  }
32
33
 
33
34
  define_transform :main do
35
+ source_data.enforce_types
34
36
 
35
- =begin
36
- source_data.df = Remi::DataFrame.daru([
37
- ['1','1','3/3/1998','A','1','3.8','12.23','1/3/2016 03:22:36','one.csv'],
38
- ['2','1','3/3/1998','B','3','4.2','10.53','1/3/2016 03:58:22','one.csv'],
39
- ['2','1','','B','2','4.23','10.539','1/3/2016 03:58:22','one.csv']
40
- ].transpose, order: [
41
- :activity_id,
42
- :student_id,
43
- :student_dob,
44
- :activity_type,
45
- :activity_counter,
46
- :activity_score,
47
- :activity_cost,
48
- :activity_date,
49
- :source_filename
50
- ])
51
- =end
52
-
53
- Remi::SourceToTargetMap.apply(source_data.df, target_data.df, source_metadata: source_data.fields) do
37
+ Remi::SourceToTargetMap.apply(source_data.df, target_data.df, source_metadata: source_data.fields, target_metadata: target_data.fields) do
54
38
  target_data.fields.keys.each do |field|
55
39
  map source(field) .target(field)
56
- .transform(Remi::Transform::EnforceType.new)
40
+
41
+ map source(field) .target("#{field}_class".to_sym)
42
+ .transform(->(v) { v.class })
57
43
  end
44
+
45
+ map source(:activity_cost) .target(:activity_cost_precision, :activity_cost_scale)
46
+ .transform(->(row) {
47
+ components = row[:activity_cost].to_s.split('.')
48
+ row[:activity_cost_precision] = components.first.size
49
+ row[:activity_cost_scale] = components.last.size
50
+ })
58
51
  end
59
52
  end
60
53
  end
@@ -22,7 +22,7 @@ class ParseDateJob
22
22
  .transform(Remi::Transform::ParseDate.new(in_format: params[:format], if_blank: params[:if_blank]))
23
23
 
24
24
  map source(:stubbed_date) .target(:parsed_stubbed_date)
25
- .transform(Remi::Transform::ParseDate.new(in_format: params[:format], if_blank: params[:if_blank]))
25
+ .transform(Remi::Transform::ParseDate.new(in_format: source_data.fields[:stubbed_date][:in_format], if_blank: params[:if_blank]))
26
26
  end
27
27
  end
28
28
  end
data/lib/remi/cucumber.rb CHANGED
@@ -3,5 +3,9 @@ require 'cucumber/rspec/doubles'
3
3
 
4
4
  require 'regexp-examples'
5
5
 
6
- require_relative 'cucumber/data_source'
7
- require_relative 'cucumber/business_rules'
6
+ require_relative 'testing/data_stub'
7
+ require_relative 'testing/business_rules'
8
+
9
+ class Remi::DataSource
10
+ include Remi::Testing::DataStub
11
+ end
@@ -1,9 +1,4 @@
1
1
  module Remi
2
-
3
- # Namespaces for specific sources/targets
4
- module DataSource; end
5
- module DataTarget; end
6
-
7
2
  class DataSubject
8
3
  def initialize(*args, fields: Remi::Fields.new, remi_df_type: :daru, logger: Remi::Settings.logger, **kargs, &block)
9
4
  @fields = fields
@@ -11,16 +6,24 @@ module Remi
11
6
  @logger = logger
12
7
  end
13
8
 
9
+ # Public: Fields defined for this data subject
14
10
  attr_accessor :fields
15
11
 
12
+ # Public: The default method for symbolizing field names
16
13
  def field_symbolizer
17
14
  Remi::FieldSymbolizers[:standard]
18
15
  end
19
16
 
17
+ # Public: Access the dataframe from a DataSource
18
+ #
19
+ # Returns a Remi::DataFrame
20
20
  def df
21
21
  @dataframe ||= Remi::DataFrame.create(@remi_df_type, [], order: @fields.keys)
22
22
  end
23
23
 
24
+ # Public: Reassigns the dataframe associated with this subject
25
+ #
26
+ # Returns the assigned dataframe
24
27
  def df=(new_dataframe)
25
28
  if new_dataframe.respond_to? :remi_df_type
26
29
  @dataframe = new_dataframe
@@ -29,58 +32,78 @@ module Remi
29
32
  end
30
33
  end
31
34
 
32
- module DataSource
33
-
34
- # Public: Access the dataframe from a DataSource
35
- #
36
- # Returns a Remi::DataFrame
37
- def df
38
- @dataframe ||= to_dataframe
35
+ # Public: Enforces types defined in the field metadata.
36
+ # For example, if a field has metadata with type: :date, then the
37
+ # type enforcer will convert data in that field into a date, and will
38
+ # throw an error if it is unable to parse any of the values.
39
+ #
40
+ # types - If set, restricts the data types that are enforced to just those listed.
41
+ #
42
+ # Returns nothing.
43
+ def enforce_types(*types)
44
+ sttm = SourceToTargetMap.new(df, source_metadata: fields)
45
+ fields.keys.each do |field|
46
+ next unless (types.size == 0 || types.include?(fields[field][:type])) && df.vectors.include?(field)
47
+ sttm.source(field).target(field).transform(Remi::Transform::EnforceType.new).execute
39
48
  end
40
49
 
41
- # Public: Memoized version of extract!
42
- def extract
43
- @extract ||= extract!
44
- end
50
+ nil
51
+ end
52
+ end
45
53
 
46
- # Public: Called to extract data from the source.
47
- #
48
- # Returns data in a format that can be used to create a dataframe.
49
- def extract!
50
- raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
51
- @extract
52
- end
53
54
 
54
- # Public: Converts extracted data to a dataframe
55
- #
56
- # Returns a Remi::DataFrame
57
- def to_dataframe
58
- raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
59
- end
55
+ class DataSource < DataSubject
56
+
57
+ # Public: Access the dataframe from a DataSource
58
+ #
59
+ # Returns a Remi::DataFrame
60
+ def df
61
+ @dataframe ||= to_dataframe
60
62
  end
61
63
 
62
- module DataTarget
64
+ # Public: Memoized version of extract!
65
+ def extract
66
+ @extract ||= extract!
67
+ end
63
68
 
64
- # Public: Loads data to the target. This is automatically called
65
- # after all transforms have executed, but could also get called manually.
66
- # The actual load operation is only executed if hasn't already.
67
- #
68
- # Returns true if the load operation was successful.
69
- def load
70
- return true if @loaded || df.size == 0
69
+ # Public: Called to extract data from the source.
70
+ #
71
+ # Returns data in a format that can be used to create a dataframe.
72
+ def extract!
73
+ raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
74
+ @extract
75
+ end
71
76
 
72
- @loaded = load!
73
- end
77
+ # Public: Converts extracted data to a dataframe
78
+ #
79
+ # Returns a Remi::DataFrame
80
+ def to_dataframe
81
+ raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
82
+ end
83
+ end
74
84
 
75
- # Public: Performs the load operation, regardless of whether it has
76
- # already executed.
77
- #
78
- # Returns true if the load operation was successful
79
- def load!
80
- raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
81
85
 
82
- false
83
- end
86
+ class DataTarget < DataSubject
87
+
88
+ # Public: Loads data to the target. This is automatically called
89
+ # after all transforms have executed, but could also get called manually.
90
+ # The actual load operation is only executed if hasn't already.
91
+ #
92
+ # Returns true if the load operation was successful.
93
+ def load
94
+ return true if @loaded || df.size == 0
95
+
96
+ @loaded = load!
97
+ end
98
+
99
+ # Public: Performs the load operation, regardless of whether it has
100
+ # already executed.
101
+ #
102
+ # Returns true if the load operation was successful
103
+ def load!
104
+ raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
105
+
106
+ false
84
107
  end
85
108
  end
86
109
  end
@@ -26,8 +26,7 @@ module Remi
26
26
 
27
27
 
28
28
 
29
- class DataSource::CsvFile < Remi::DataSubject
30
- include Remi::DataSubject::DataSource
29
+ class DataSource::CsvFile < DataSource
31
30
  include Remi::DataSubject::CsvFile
32
31
 
33
32
  def initialize(*args, **kargs, &block)
@@ -130,8 +129,7 @@ module Remi
130
129
 
131
130
 
132
131
 
133
- class DataTarget::CsvFile < Remi::DataSubject
134
- include ::Remi::DataSubject::DataTarget
132
+ class DataTarget::CsvFile < DataTarget
135
133
  include ::Remi::DataSubject::CsvFile
136
134
 
137
135
  default_csv_options[:row_sep] = "\n"
@@ -1,7 +1,6 @@
1
1
  module Remi
2
2
 
3
- class DataSource::DataFrame < Remi::DataSubject
4
- include Remi::DataSubject::DataSource
3
+ class DataSource::DataFrame < DataSource
5
4
 
6
5
  def initialize(*args, **kargs, &block)
7
6
  super
@@ -30,8 +29,7 @@ module Remi
30
29
  end
31
30
 
32
31
 
33
- class DataTarget::DataFrame < Remi::DataSubject
34
- include Remi::DataSubject::DataTarget
32
+ class DataTarget::DataFrame < DataTarget
35
33
 
36
34
  def initialize(*args, **kargs, &block)
37
35
  super
@@ -13,8 +13,7 @@ module Remi
13
13
  end
14
14
 
15
15
 
16
- class DataSource::Postgres < Remi::DataSubject
17
- include Remi::DataSubject::DataSource
16
+ class DataSource::Postgres < DataSource
18
17
  include Remi::DataSubject::Postgres
19
18
 
20
19
 
@@ -65,8 +64,7 @@ module Remi
65
64
 
66
65
  # VERY PRELIMINARY IMPLEMENTAtION - ONLY LOADS TO TEMP TABLES
67
66
  # IT IS THEN UP TO THE USER TO DO ELT TO LOAD THE FINAL TABLE
68
- class DataTarget::Postgres < Remi::DataSubject
69
- include Remi::DataSubject::DataTarget
67
+ class DataTarget::Postgres < DataTarget
70
68
  include Remi::DataSubject::Postgres
71
69
 
72
70
  def initialize(*args, **kargs, &block)
@@ -21,8 +21,7 @@ module Remi
21
21
  end
22
22
 
23
23
 
24
- class DataSource::Salesforce < Remi::DataSubject
25
- include Remi::DataSubject::DataSource
24
+ class DataSource::Salesforce < DataSource
26
25
  include Remi::DataSubject::Salesforce
27
26
 
28
27
  def initialize(*args, **kargs, &block)
@@ -92,8 +91,7 @@ module Remi
92
91
  end
93
92
 
94
93
 
95
- class DataTarget::Salesforce < Remi::DataSubject
96
- include Remi::DataSubject::DataTarget
94
+ class DataTarget::Salesforce < DataTarget
97
95
  include Remi::DataSubject::Salesforce
98
96
 
99
97
  def initialize(*args, **kargs, &block)
@@ -1,7 +1,6 @@
1
1
  module Remi
2
2
 
3
- class DataTarget::SftpFile < Remi::DataSubject
4
- include Remi::DataSubject::DataTarget
3
+ class DataTarget::SftpFile < DataTarget
5
4
 
6
5
  def initialize(*args, **kargs, &block)
7
6
  super
@@ -1,4 +1,4 @@
1
- module Remi::BusinessRules
1
+ module Remi::Testing::BusinessRules
2
2
  using Remi::Refinements::Symbolizer
3
3
 
4
4
  def self.csv_opt_map
@@ -0,0 +1,72 @@
1
+ module Remi
2
+ module Testing
3
+ module DataStub
4
+ def stub_row_array
5
+ @fields.values.map do |attribs|
6
+ stub_values(attribs)
7
+ end
8
+ end
9
+
10
+ def empty_stub_df
11
+ self.df = Daru::DataFrame.new([], order: @fields.keys)
12
+ end
13
+
14
+ def stub_df
15
+ empty_stub_df
16
+ self.df.add_row(stub_row_array)
17
+ end
18
+
19
+ def stub_values(**attribs)
20
+ stub_type = "stub_#{attribs[:type]}".to_sym
21
+ if respond_to?(stub_type)
22
+ send(stub_type, attribs)
23
+ else
24
+ stub_string(attribs)
25
+ end
26
+ end
27
+
28
+ def stub_string(**attribs)
29
+ Faker::Hipster.word
30
+ end
31
+
32
+ def stub_float(**attribs)
33
+ Faker::Number.decimal(2,3)
34
+ end
35
+
36
+ def stub_decimal(**attribs)
37
+ Faker::Number.decimal(attribs[:precision],attribs[:scale])
38
+ end
39
+
40
+ def stub_integer(**attribs)
41
+ Faker::Number.number(4).to_s
42
+ end
43
+
44
+ def stub_date(**attribs)
45
+ in_format = attribs[:in_format]
46
+ result = Faker::Date.backward(3650)
47
+ result = result.strftime(in_format) if in_format
48
+ result
49
+ end
50
+
51
+ def stub_datetime(**attribs)
52
+ in_format = attribs[:in_format]
53
+ result = Faker::Time.backward(3650)
54
+ result = result.strftime(in_format) if in_format
55
+ result
56
+ end
57
+
58
+ def stub_boolean(**attribs)
59
+ ['T','F'].shuffle.first
60
+ end
61
+
62
+ def stub_json(**attribs)
63
+ if attribs[:json_array]
64
+ [ stub_string ]
65
+ else
66
+ { Faker::Hipster.words(1, true, true) => stub_string }
67
+ end.to_json
68
+ end
69
+
70
+ end
71
+ end
72
+ end
@@ -218,6 +218,7 @@ module Remi
218
218
  # This transform is metadata aware and will use :in_format metadata
219
219
  # from the source
220
220
  #
221
+ # type - Specify either :date, or :datetime type (default: date)
221
222
  # in_format - The date format to use to convert the string (default: uses :in_format
222
223
  # from the source metadata. If that is not defined, use '%Y-%m-%d').
223
224
  # if_blank - Value to use if the the incoming value is blank (default: uses :if_blank
@@ -232,20 +233,33 @@ module Remi
232
233
  # tform.source_metadata = { in_format: '%m/%d/%Y' }
233
234
  # tform.to_proc.call('02/22/2013') # => Date.new(2013,2,22)
234
235
  class ParseDate < Transform
235
- def initialize(*args, in_format: nil, if_blank: nil, **kargs, &block)
236
+ def initialize(*args, type: nil, in_format: nil, if_blank: nil, **kargs, &block)
236
237
  super
238
+ @type = type
237
239
  @in_format = in_format
238
240
  @if_blank = if_blank
239
241
  end
240
242
 
243
+ def type
244
+ @type ||= @source_metadata.fetch(:type, :date)
245
+ end
246
+
241
247
  def in_format
242
- @in_format ||= @source_metadata.fetch(:in_format, '%Y-%m-%d')
248
+ @in_format ||= @source_metadata.fetch(:in_format, default_date_format)
243
249
  end
244
250
 
245
251
  def if_blank
246
252
  @if_blank ||= @source_metadata.fetch(:if_blank, nil)
247
253
  end
248
254
 
255
+ def default_date_format
256
+ if type == :datetime
257
+ '%Y-%m-%d %H:%M:%S'
258
+ else
259
+ '%Y-%m-%d'
260
+ end
261
+ end
262
+
249
263
  def transform(value)
250
264
  begin
251
265
  if value.respond_to?(:strftime)
@@ -260,15 +274,19 @@ module Remi
260
274
  end
261
275
  end
262
276
 
277
+ def class_type
278
+ @class_type ||= type == :datetime ? Time : Date
279
+ end
280
+
263
281
  def string_to_date(value)
264
- Date.strptime(value, in_format)
282
+ class_type.strptime(value, in_format)
265
283
  end
266
284
 
267
285
  def blank_handler(value)
268
286
  if if_blank == :low
269
- Date.new(1900,01,01)
287
+ class_type.new(1900,01,01)
270
288
  elsif if_blank == :high
271
- Date.new(2999,12,31)
289
+ class_type.new(2999,12,31)
272
290
  elsif if_blank.respond_to? :call
273
291
  if_blank.call(value)
274
292
  else
@@ -282,6 +300,7 @@ module Remi
282
300
  # This transform is metadata aware and will use :in_format/:out_format metadata
283
301
  # from the source.
284
302
  #
303
+ # type - Specify either :date, or :datetime type (default: date)
285
304
  # in_format - The date format to used to parse the input value. If the input value
286
305
  # is a date, then then parameter is ignored. (default: uses :in_format
287
306
  # from the source metadata. If that is not defined, use '%Y-%m-%d')
@@ -297,18 +316,35 @@ module Remi
297
316
  # tform.source_metadata = { in_format: '%m/%d/%Y', out_format: '%Y-%m-%d' }
298
317
  # tform.to_proc.call('02/22/2013') # => "2013-02-22"
299
318
  class FormatDate < Transform
300
- def initialize(*args, in_format: nil, out_format: nil, **kargs, &block)
319
+ def initialize(*args, type: nil, in_format: nil, out_format: nil, **kargs, &block)
301
320
  super
321
+ @type = type
302
322
  @in_format = in_format
303
323
  @out_format = out_format
304
324
  end
305
325
 
326
+ def type
327
+ @type ||= @source_metadata.fetch(:type, :date)
328
+ end
329
+
306
330
  def in_format
307
- @in_format ||= @source_metadata.fetch(:in_format, '%Y-%m-%d')
331
+ @in_format ||= @source_metadata.fetch(:in_format, default_date_format)
308
332
  end
309
333
 
310
334
  def out_format
311
- @out_format ||= @source_metadata.fetch(:out_format, '%Y-%m-%d')
335
+ @out_format ||= @source_metadata.fetch(:out_format, default_date_format)
336
+ end
337
+
338
+ def default_date_format
339
+ if type == :datetime
340
+ '%Y-%m-%d %H:%M:%S'
341
+ else
342
+ '%Y-%m-%d'
343
+ end
344
+ end
345
+
346
+ def class_type
347
+ @class_type ||= type == :datetime ? Time : Date
312
348
  end
313
349
 
314
350
  def transform(value)
@@ -318,7 +354,7 @@ module Remi
318
354
  elsif value.respond_to? :strftime
319
355
  value.strftime(out_format)
320
356
  else
321
- Date.strptime(value, in_format).strftime(out_format)
357
+ class_type.strptime(value, in_format).strftime(out_format)
322
358
  end
323
359
  rescue ArgumentError => err
324
360
  raise err, "Error parsing date (#{value.class}): '#{value}' using the format #{in_format} => #{out_format}"
@@ -461,7 +497,7 @@ module Remi
461
497
  def if_blank
462
498
  return @if_blank if @if_blank_set
463
499
  @if_blank_set = true
464
- @if_blank = @source_metadata.fetch(:if_blank, nil)
500
+ @if_blank = @source_metadata.fetch(:if_blank, default_if_blank)
465
501
  end
466
502
 
467
503
  def blank_handler(value)
@@ -474,8 +510,12 @@ module Remi
474
510
  end
475
511
  end
476
512
 
513
+ def default_if_blank
514
+ type == :string ? '' : nil
515
+ end
516
+
477
517
  def transform(value)
478
- if value.blank?
518
+ if value.blank? && type != :json
479
519
  blank_handler(value)
480
520
  else
481
521
  case type
@@ -488,9 +528,15 @@ module Remi
488
528
  when :decimal
489
529
  Float("%.#{scale}f" % Float(value))
490
530
  when :date
491
- value.is_a?(Date) ? value : Date.strptime(value, in_format) # value.is_a?(Date) is only needed becuase we stub date types with actual dates, rather than strings like we probably should
531
+ value.is_a?(Date) ? value : Date.strptime(value, in_format)
492
532
  when :datetime
493
- Time.strptime(value, in_format)
533
+ value.is_a?(Time) ? value : Time.strptime(value, in_format)
534
+ when :json
535
+ if value.blank? && value != [] && value != {}
536
+ blank_handler(value)
537
+ else
538
+ value.is_a?(Hash) || value.is_a?(Array) ? value : JSON.parse(value)
539
+ end
494
540
  else
495
541
  raise ArgumentError, "Unknown type enforcement: #{type}"
496
542
  end
data/lib/remi/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Remi
2
- VERSION = '0.2.39'
2
+ VERSION = '0.2.40'
3
3
  end
@@ -0,0 +1,44 @@
1
+ require_relative 'remi_spec'
2
+
3
+ # VERY SPARSE TESTING! DO MORE!
4
+
5
+ describe DataSubject do
6
+
7
+ describe 'enforcing types' do
8
+ let(:dataframe) do
9
+ Remi::DataFrame::Daru.new({ my_date: ['10/21/2015'] })
10
+ end
11
+
12
+ let(:data_subject) do
13
+ DataSubject.new(fields: fields).tap { |ds| ds.df = dataframe }
14
+ end
15
+
16
+ let(:fields) do
17
+ Fields.new({
18
+ my_date: { type: :date, in_format: '%m/%d/%Y' },
19
+ other_date: { type: :date, in_format: '%m/%d/%Y' }
20
+ })
21
+ end
22
+
23
+ it 'converts a date string to a date using an in_format' do
24
+ data_subject.enforce_types
25
+ expect(data_subject.df[:my_date].to_a).to eq [Date.new(2015, 10, 21)]
26
+ end
27
+
28
+ it 'does not do any conversion if the type is not specified' do
29
+ fields[:my_date].delete(:type)
30
+ data_subject.enforce_types
31
+ expect(data_subject.df[:my_date].to_a).to eq ['10/21/2015']
32
+ end
33
+
34
+ it 'throws an error if the data does not conform to its type' do
35
+ dataframe[:my_date].recode! { |v| '2015-10-21' }
36
+ expect { data_subject.enforce_types }.to raise_error ArgumentError
37
+ end
38
+
39
+ it 'does not create new vectors during enforcement' do
40
+ data_subject.enforce_types
41
+ expect(dataframe.vectors.to_a).to eq [:my_date]
42
+ end
43
+ end
44
+ end
@@ -49,7 +49,7 @@ describe SourceToTargetMap do
49
49
  it_behaves_like 'one-to-one map'
50
50
  end
51
51
 
52
- context 'without any transforms', wip: true do
52
+ context 'without any transforms' do
53
53
  before { map.source(:a) .target(:aprime) }
54
54
 
55
55
  let(:result) do
@@ -0,0 +1,171 @@
1
+ require_relative '../remi_spec'
2
+ require 'remi/testing/data_stub'
3
+
4
+ describe Testing::DataStub do
5
+ class StubTester < DataSubject
6
+ include Testing::DataStub
7
+ end
8
+
9
+ context 'data type stubs' do
10
+ let(:stub_tester) { StubTester.new }
11
+
12
+ context '#stub_string' do
13
+ let(:stub) { stub_tester.stub_string }
14
+
15
+ it 'stubs as strings' do
16
+ expect(stub).to be_a String
17
+ end
18
+ end
19
+
20
+ context '#stub_float' do
21
+ let(:stub) { stub_tester.stub_float }
22
+
23
+ it 'stubs as strings' do
24
+ expect(stub).to be_a String
25
+ end
26
+
27
+ it 'represents a floating point number' do
28
+ expect(Float(stub) % 1).not_to eq 0.0
29
+ end
30
+ end
31
+
32
+ context '#stub_decimal' do
33
+ let(:stub) { stub_tester.stub_decimal(precision: 8, scale: 2) }
34
+
35
+ it 'stubs as strings' do
36
+ expect(stub).to be_a String
37
+ end
38
+
39
+ it 'represents a floating point number' do
40
+ expect(Float(stub) % 1).not_to eq 0.0
41
+ end
42
+
43
+ it 'comes with the specified precision' do
44
+ expect(Float(stub).to_s.split('.').first.size).to eq 8
45
+ end
46
+
47
+ it 'comes with the specified scale' do
48
+ expect(Float(stub).to_s.split('.').last.size).to eq 2
49
+ end
50
+ end
51
+
52
+ context '#stub_integer' do
53
+ let(:stub) { stub_tester.stub_integer }
54
+
55
+ it 'stubs as strings' do
56
+ expect(stub).to be_a String
57
+ end
58
+
59
+ it 'represents an integer' do
60
+ expect(Float(stub) % 1).to eq 0.0
61
+ end
62
+
63
+ it 'converts to an integer' do
64
+ expect { Integer(stub) }.not_to raise_error
65
+ end
66
+ end
67
+
68
+
69
+ context '#stub_date' do
70
+ context 'without an in_format' do
71
+ let(:stub) { stub_tester.stub_date }
72
+
73
+ it 'stubs as a date' do
74
+ expect(stub).to be_a Date
75
+ end
76
+ end
77
+
78
+ context 'with an in_format' do
79
+ let(:stub) { stub_tester.stub_date(in_format: '%m/%d/%Y') }
80
+
81
+ it 'stubs as strings' do
82
+ expect(stub).to be_a String
83
+ end
84
+
85
+ it 'can parsed as a date using the specified in_format' do
86
+ expect { Date.strptime(stub, '%m/%d/%Y') }.not_to raise_error
87
+ end
88
+ end
89
+ end
90
+
91
+ context '#stub_datetime' do
92
+ context 'without an in_format' do
93
+ let(:stub) { stub_tester.stub_datetime }
94
+
95
+ it 'stubs as a time' do
96
+ expect(stub).to be_a Time
97
+ end
98
+ end
99
+
100
+ context 'with an in_format' do
101
+ let(:stub) { stub_tester.stub_datetime(in_format: '%m/%d/%Y %H:%M:%S') }
102
+
103
+ it 'stubs as strings' do
104
+ expect(stub).to be_a String
105
+ end
106
+
107
+ it 'can parsed as a time using the specified in_format' do
108
+ expect { Time.strptime(stub, '%m/%d/%Y %H:%M:%S') }.not_to raise_error
109
+ end
110
+ end
111
+ end
112
+
113
+ context '#stub_boolean' do
114
+ let(:stub) { stub_tester.stub_boolean }
115
+
116
+ it 'stubs as strings' do
117
+ expect(stub).to be_a String
118
+ end
119
+
120
+ it 'is either T or F' do
121
+ expect(stub).to eq('T').or eq('F')
122
+ end
123
+ end
124
+
125
+ context '#stub_json' do
126
+ let(:stub) { stub_tester.stub_json }
127
+
128
+ it 'stubs as strings' do
129
+ expect(stub).to be_a String
130
+ end
131
+
132
+ it 'can be parsed as JSON' do
133
+ expect { JSON.parse(stub) }.not_to raise_error
134
+ end
135
+ end
136
+ end
137
+
138
+
139
+ context 'stubbed dataframe data' do
140
+ let(:stub_tester) do
141
+ StubTester.new(fields: {
142
+ my_date: { type: :date, in_format: '%m/%d/%Y' },
143
+ my_str: {}
144
+ })
145
+ end
146
+
147
+ context '#empty_stub_df' do
148
+ before { stub_tester.empty_stub_df }
149
+
150
+ it 'creates a dataframe with no data' do
151
+ expect(stub_tester.df.size).to eq 0
152
+ end
153
+
154
+ it 'creates a dataframe with the right number of vectors' do
155
+ expect(stub_tester.df.vectors.size).to eq 2
156
+ end
157
+ end
158
+
159
+ context '#stub_df' do
160
+ before { stub_tester.stub_df }
161
+
162
+ it 'creates a row of data' do
163
+ expect(stub_tester.df.size).to eq 1
164
+ end
165
+
166
+ it 'creates data according to the supplied metadata' do
167
+ expect { Date.strptime(stub_tester.df[:my_date].first, '%m/%d/%Y') }.not_to raise_error
168
+ end
169
+ end
170
+ end
171
+ end
@@ -50,4 +50,79 @@ describe Transform do
50
50
  expect(transform.call(1, 2)).to eq [1, 2]
51
51
  end
52
52
  end
53
+
54
+ describe Transform::ParseDate do
55
+ it 'parses a date using the specified in_format' do
56
+ parser = Transform::ParseDate.new(in_format: '%m/%d/%Y')
57
+ expect(parser.call('03/22/2011')).to eq Date.new(2011,3,22)
58
+ end
59
+
60
+ it 'returns a date if it has already been parsed' do
61
+ parser = Transform::ParseDate.new(in_format: '%m/%d/%Y')
62
+ expect(Date.new(2011,3,22)).to eq Date.new(2011,3,22)
63
+ end
64
+
65
+ it 'uses ISO 8601 as the default date parser' do
66
+ parser = Transform::ParseDate.new
67
+ expect(parser.call('2011-03-22')).to eq Date.new(2011,3,22)
68
+ end
69
+
70
+ it 'fails when an unparseable date is provided' do
71
+ parser = Transform::ParseDate.new
72
+ expect { parser.call('03/22/2011') }.to raise_error ArgumentError
73
+ end
74
+
75
+ it 'parses datetimes when the type is specified' do
76
+ parser = Transform::ParseDate.new(type: :datetime, in_format: '%m/%d/%Y %H:%M:%S')
77
+ expect(parser.call('03/22/2011 04:22:00')).to eq Time.new(2011,3,22,4,22,0)
78
+ end
79
+
80
+ it 'uses ISO 8601 as the default datetime parser' do
81
+ parser = Transform::ParseDate.new(type: :datetime)
82
+ expect(parser.call('2011-03-22 04:22:00')).to eq Time.new(2011,3,22,4,22,0)
83
+ end
84
+ end
85
+
86
+ context Transform::FormatDate do
87
+ it 'formats a date using the specified out_format' do
88
+ formatter = Transform::FormatDate.new(out_format: '%m/%d/%Y')
89
+ expect(formatter.call(Date.new(2011,3,22))).to eq '03/22/2011'
90
+ end
91
+
92
+ it 'formats a datetime using the specified out_format' do
93
+ formatter = Transform::FormatDate.new(type: :datetime, out_format: '%m/%d/%Y %H:%M:%S')
94
+ expect(formatter.call(Time.new(2011,3,22,4,22,0))).to eq '03/22/2011 04:22:00'
95
+ end
96
+
97
+ it 'uses the in_format to parse strings when the source is not already a date' do
98
+ formatter = Transform::FormatDate.new(in_format: '%d/%m/%Y', out_format: '%m/%d/%Y')
99
+ expect(formatter.call('22/03/2011')).to eq '03/22/2011'
100
+ end
101
+
102
+ it 'fails when an unparseable date is provided' do
103
+ formatter = Transform::FormatDate.new(in_format: '%d/%m/%Y', out_format: '%m/%d/%Y')
104
+ expect { formatter.call('22/22/2011') }.to raise_error ArgumentError
105
+ end
106
+
107
+ it 'uses ISO 8601 as the default date parser' do
108
+ formatter = Transform::FormatDate.new(out_format: '%m/%d/%Y')
109
+ expect(formatter.call('2011-03-22')).to eq '03/22/2011'
110
+ end
111
+
112
+ it 'uses ISO 8601 as the default date formatter' do
113
+ formatter = Transform::FormatDate.new(in_format: '%m/%d/%Y')
114
+ expect(formatter.call('03/22/2011')).to eq '2011-03-22'
115
+ end
116
+
117
+ it 'uses ISO 8601 as the default datetime parser' do
118
+ formatter = Transform::FormatDate.new(type: :datetime, out_format: '%m/%d/%Y %H:%M:%S')
119
+ expect(formatter.call('2011-03-22 04:22:00')).to eq '03/22/2011 04:22:00'
120
+ end
121
+
122
+ it 'uses ISO 8601 as the default datetime formatter' do
123
+ formatter = Transform::FormatDate.new(type: :datetime, in_format: '%m/%d/%Y %H:%M:%S')
124
+ expect(formatter.call('03/22/2011 04:22:00')).to eq '2011-03-22 04:22:00'
125
+ end
126
+ end
127
+
53
128
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remi
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.39
4
+ version: 0.2.40
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sterling Paramore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-30 00:00:00.000000000 Z
11
+ date: 2016-07-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bond
@@ -225,8 +225,6 @@ files:
225
225
  - lib/remi.rb
226
226
  - lib/remi/cli.rb
227
227
  - lib/remi/cucumber.rb
228
- - lib/remi/cucumber/business_rules.rb
229
- - lib/remi/cucumber/data_source.rb
230
228
  - lib/remi/data_frame.rb
231
229
  - lib/remi/data_frame/daru.rb
232
230
  - lib/remi/data_subject.rb
@@ -248,11 +246,14 @@ files:
248
246
  - lib/remi/source_to_target_map.rb
249
247
  - lib/remi/source_to_target_map/map.rb
250
248
  - lib/remi/source_to_target_map/row.rb
249
+ - lib/remi/testing/business_rules.rb
250
+ - lib/remi/testing/data_stub.rb
251
251
  - lib/remi/transform.rb
252
252
  - lib/remi/version.rb
253
253
  - remi.gemspec
254
254
  - spec/data_subject/csv_file_spec.rb
255
255
  - spec/data_subject/data_frame.rb
256
+ - spec/data_subject_spec.rb
256
257
  - spec/extractor/file_system_spec.rb
257
258
  - spec/extractor/local_file_spec.rb
258
259
  - spec/extractor/s3_file_spec.rb
@@ -264,6 +265,7 @@ files:
264
265
  - spec/metadata_spec.rb
265
266
  - spec/remi_spec.rb
266
267
  - spec/source_to_target_map_spec.rb
268
+ - spec/testing/data_stub_spec.rb
267
269
  - spec/transform_spec.rb
268
270
  - workbooks/sample_workbook.ipynb
269
271
  - workbooks/workbook_helper.rb
@@ -315,6 +317,7 @@ test_files:
315
317
  - features/transforms/truthy.feature
316
318
  - spec/data_subject/csv_file_spec.rb
317
319
  - spec/data_subject/data_frame.rb
320
+ - spec/data_subject_spec.rb
318
321
  - spec/extractor/file_system_spec.rb
319
322
  - spec/extractor/local_file_spec.rb
320
323
  - spec/extractor/s3_file_spec.rb
@@ -326,4 +329,5 @@ test_files:
326
329
  - spec/metadata_spec.rb
327
330
  - spec/remi_spec.rb
328
331
  - spec/source_to_target_map_spec.rb
332
+ - spec/testing/data_stub_spec.rb
329
333
  - spec/transform_spec.rb
@@ -1,70 +0,0 @@
1
- module Remi
2
- module DataSource
3
- module DataStub
4
- def stub_row_array
5
- @fields.values.map do |attrib|
6
- stub_values[attrib[:type]].call
7
- end
8
- end
9
-
10
- def empty_stub_df
11
- self.df = Daru::DataFrame.new([], order: @fields.keys)
12
- end
13
-
14
- def stub_df
15
- empty_stub_df
16
- self.df.add_row(stub_row_array)
17
- end
18
-
19
- def stub_values
20
- @stub_values ||= Hash.new(->() { Faker::Hipster.word }).merge({
21
- string: ->() { Faker::Hipster.word },
22
- number: ->() { Faker::Number.decimal(4,4) },
23
- float: ->() { Faker::Number.decimal(2,2) },
24
- integer: ->() { Faker::Number.number(4) },
25
- date: ->() { Faker::Date.backward(3650) },
26
- datetime: ->() { Faker::Time.backward(3650).to_datetime },
27
- boolean: ->() { ['T','F'].shuffle.first }
28
- })
29
- end
30
- end
31
-
32
-
33
- class CsvFile
34
- include DataStub
35
- def stub_tmp_file
36
- @stub_tmp_file ||= Tempfile.new('stub_tmp_file.csv').path
37
- end
38
-
39
- def write_stub_tmp_file
40
- File.open(stub_tmp_file, "wb") do |file|
41
- file.puts stub_header
42
- file.puts stub_row_csv
43
- end
44
-
45
- stub_tmp_file
46
- end
47
-
48
- def stub_header
49
- @fields.keys.join(@csv_options[:col_sep])
50
- end
51
-
52
- def stub_row_csv
53
- stub_row_array.join(@csv_options[:col_sep])
54
- end
55
- end
56
-
57
- # Hmmm.... this gets called first because I'm trying to split SF off as a "plugin"
58
- class Salesforce < Remi::DataSubject
59
- include DataStub
60
- end
61
-
62
- class DataFrame
63
- include DataStub
64
- end
65
-
66
- class Postgres
67
- include DataStub
68
- end
69
- end
70
- end