remi 0.2.39 → 0.2.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fc73b233d7181b3e40c96c5da2d52bf8d81d99a8
4
- data.tar.gz: 536421bb23bf564eeb4394f3046467fe1dd9adf4
3
+ metadata.gz: d9f919d918cc6c2b83a8f6849a30a111ddf07e87
4
+ data.tar.gz: 7eeaee62f683ee9fd0851e8c61cd47fab8a7f1bd
5
5
  SHA512:
6
- metadata.gz: 08307536b89a25d60ddfdaad8595073b90e67e9094f22c709b3005abca6fec4bed5a838270579b4f81df1e803b8fba31c75f5ddb4bb1d263a3d5157006396d90
7
- data.tar.gz: 344321318f9b76b95af19b8ff0c58d0c528da3bdeed6e15f6f60276513a2c24677cca360cc930d26087394a66082d5047b40706e0a87c8a4b9d5f20788e1ee64
6
+ metadata.gz: eecc73e562cf266445cf85f7c144424a730f71d95ea588b9ad7c75fc2aba0a09c0aba7440c78639c7433340e3e89888c329aec17660471038996763593e50e45
7
+ data.tar.gz: 96852fba98c17a79ef1763ece9f5b5cb9893cca9acf5c21da4ac09e352a1fcc2cb8e8f760836f3111493ef3f4b6258f01688b10b8c14952a7978e51623f70653
data/Gemfile.lock CHANGED
@@ -19,7 +19,7 @@ GIT
19
19
  PATH
20
20
  remote: .
21
21
  specs:
22
- remi (0.2.39)
22
+ remi (0.2.40)
23
23
  activesupport (~> 4.2)
24
24
  bond (~> 0.5)
25
25
  cucumber (~> 2.1)
@@ -9,9 +9,35 @@ Feature: This tests the application of metadata.
9
9
  And the source 'Source Data'
10
10
  And the target 'Target Data'
11
11
 
12
- And the following example record for 'Source Data':
13
- | activity_id | student_id | student_dob | activity_type | activity_counter | activity_score | activity_cost | activity_date | source_filename |
14
- | 1 | 1 | 3/3/1998 | A | 1 | 3.8 | 12.23 | 1/3/2016 03:22:36 | one.csv |
15
12
 
16
13
  Scenario: Metadata is used to parse date fields
17
- Then the target field 'student_dob' is set to the value "1998-03-03"
14
+
15
+ Given the following example record for 'Source Data':
16
+ | activity_id | student_id | student_dob | activity_type | activity_counter | activity_score | activity_cost | activity_date | source_filename |
17
+ | 1 | 1 | 3/3/1998 | A | 1 | 3.8 | 12.23 | 1/3/2016 03:22:36 | one.csv |
18
+
19
+ Then the target should match the example:
20
+ | activity_id | student_id | student_dob | activity_type | activity_counter | activity_score | activity_cost | activity_date | source_filename |
21
+ | 1 | 1 | 1998-03-03 | A | 1 | 3.8 | 12.23 | 2016-01-03 03:22:36 +0000 | one.csv |
22
+
23
+ Scenario Outline: Metadata is used to stub records with values that conform to the metadata
24
+
25
+ Then the target field '<Field>' is set to the value "<Class>"
26
+
27
+ Examples:
28
+ | Field | Class |
29
+ | activity_id_class | String |
30
+ | student_id_class | String |
31
+ | student_dob_class | Date |
32
+ | activity_type_class | String |
33
+ | activity_counter_class | Fixnum |
34
+ | activity_score_class | Float |
35
+ | activity_cost_class | Float |
36
+ | activity_date_class | Time |
37
+ | source_filename_class | String |
38
+
39
+
40
+ Scenario: Metadata for decimals is stubbed
41
+
42
+ Then the target field 'activity_cost_precision' is populated with "8"
43
+ And the target field 'activity_cost_scale' is populated with "2"
@@ -4,7 +4,7 @@
4
4
  ### Job and background setup
5
5
 
6
6
  Given /^the job is '([[:alnum:]\s]+)'$/ do |arg|
7
- @brt = Remi::BusinessRules::Tester.new(arg)
7
+ @brt = Remi::Testing::BusinessRules::Tester.new(arg)
8
8
  end
9
9
 
10
10
  Given /^the job source '([[:alnum:]\s\-_]+)'$/ do |arg|
@@ -64,13 +64,13 @@ Then /^the file that comes last in an alphanumeric sort by group will be downloa
64
64
  end
65
65
 
66
66
  Then /^the file is uploaded to the remote path "([^"]+)"$/ do |remote_path|
67
- expect(@brt.target.get_attrib(:remote_path)).to eq Remi::BusinessRules::ParseFormula.parse(remote_path)
67
+ expect(@brt.target.get_attrib(:remote_path)).to eq Remi::Testing::BusinessRules::ParseFormula.parse(remote_path)
68
68
  end
69
69
 
70
70
  ## CSV Options
71
71
 
72
72
  Given /^the (source|target) file is delimited with a (\w+)$/ do |st, delimiter|
73
- expect(@brt.send(st.to_sym).csv_options[:col_sep]).to eq Remi::BusinessRules.csv_opt_map[delimiter]
73
+ expect(@brt.send(st.to_sym).csv_options[:col_sep]).to eq Remi::Testing::BusinessRules.csv_opt_map[delimiter]
74
74
  end
75
75
 
76
76
  Given /^the (source|target) file is encoded using "([^"]+)" format$/ do |st, encoding|
@@ -78,15 +78,15 @@ Given /^the (source|target) file is encoded using "([^"]+)" format$/ do |st, enc
78
78
  end
79
79
 
80
80
  Given /^the (source|target) file uses a ([\w ]+) to quote embedded delimiters$/ do |st, quote_char|
81
- expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::BusinessRules.csv_opt_map[quote_char]
81
+ expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::Testing::BusinessRules.csv_opt_map[quote_char]
82
82
  end
83
83
 
84
84
  Given /^the (source|target) file uses a preceding ([\w ]+) to escape an embedded quoting character$/ do |st, escape_char|
85
- expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::BusinessRules.csv_opt_map[escape_char]
85
+ expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::Testing::BusinessRules.csv_opt_map[escape_char]
86
86
  end
87
87
 
88
88
  Given /^the (source|target) file uses ([\w ]+) line endings$/ do |st, line_endings|
89
- expect(@brt.send(st.to_sym).csv_options[:row_sep]).to eq Remi::BusinessRules.csv_opt_map[line_endings]
89
+ expect(@brt.send(st.to_sym).csv_options[:row_sep]).to eq Remi::Testing::BusinessRules.csv_opt_map[line_endings]
90
90
  end
91
91
 
92
92
  Given /^the (source|target) file uses "([^"]+)" as a record separator$/ do |st, line_endings|
@@ -130,7 +130,7 @@ Given /^the source field '([^']+)' (?:has|is set to) the value "([^"]*)"$/ do |s
130
130
  step "the source field '#{source_field}'"
131
131
 
132
132
  source_name, source_field_name = @brt.sources.parse_full_field(source_field)
133
- @brt.sources[source_name].fields[source_field_name].value = Remi::BusinessRules::ParseFormula.parse(value)
133
+ @brt.sources[source_name].fields[source_field_name].value = Remi::Testing::BusinessRules::ParseFormula.parse(value)
134
134
  end
135
135
 
136
136
  Given /^the source field (?:has|is set to) the value "([^"]*)"$/ do |value|
@@ -143,7 +143,7 @@ Given /^the source field '([^']+)' (?:has|is set to) the multiline value$/ do |s
143
143
  step "the source field '#{source_field}'"
144
144
 
145
145
  source_name, source_field_name = @brt.sources.parse_full_field(source_field)
146
- @brt.sources[source_name].fields[source_field_name].value = Remi::BusinessRules::ParseFormula.parse(value)
146
+ @brt.sources[source_name].fields[source_field_name].value = Remi::Testing::BusinessRules::ParseFormula.parse(value)
147
147
  end
148
148
 
149
149
  Given /^the source field (?:has|is set to) the multiline value$/ do |value|
@@ -266,7 +266,7 @@ Then /^the target field '([^']+)' is (?:set to the value|populated with) "([^"]*
266
266
  @brt.run_transforms
267
267
  }.not_to raise_error
268
268
  Array(target_names).each do |target_name|
269
- expect(@brt.targets[target_name].fields[target_field_name].values.uniq).to eq [Remi::BusinessRules::ParseFormula.parse(value)]
269
+ expect(@brt.targets[target_name].fields[target_field_name].values.uniq).to eq [Remi::Testing::BusinessRules::ParseFormula.parse(value)]
270
270
  end
271
271
  }
272
272
  end
@@ -12,6 +12,7 @@ Feature: Tests the parse_date transform
12
12
  Given the source field 'Date String' has the value "<Date String>"
13
13
  And the job parameter 'format' is "<Format>"
14
14
  Then the target field 'Parsed Date' is set to the value "<Parsed Date>"
15
+
15
16
  Examples:
16
17
  | Date String | Format | Parsed Date |
17
18
  | 2015-10-21 | %Y-%m-%d | 2015-10-21 |
data/jobs/metadata_job.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require_relative 'all_jobs_shared'
2
+ ENV['TZ'] = 'UTC'
2
3
 
3
4
  class MetadataJob
4
5
  include AllJobsShared
@@ -11,7 +12,7 @@ class MetadataJob
11
12
  :activity_type => { from: 'in', in: true, type: :string, valid_values: ['A', 'B', 'C'], cdc_type: 2 },
12
13
  :activity_counter => { from: 'in', in: true, type: :integer, cdc_type: 2 },
13
14
  :activity_score => { from: 'in', in: true, type: :float, cdc_type: 2 },
14
- :activity_cost => { from: 'in', in: true, type: :decimal, precision: 16, scale: 2, cdc_type: 2 },
15
+ :activity_cost => { from: 'in', in: true, type: :decimal, precision: 8, scale: 2, cdc_type: 2 },
15
16
  :activity_date => { from: 'in', in: true, type: :datetime, in_format: '%m/%d/%Y %H:%M:%S', out_format: '%Y-%m-%dT%H:%M:%S', cdc_type: 2 },
16
17
  :source_filename => { from: 'in', in: true, type: :string, cdc_type: 1 }
17
18
  }
@@ -25,36 +26,28 @@ class MetadataJob
25
26
  :activity_type => { from: 'out', out: true, type: :string, valid_values: ['A', 'B', 'C'] },
26
27
  :activity_counter => { from: 'out', out: true, type: :integer },
27
28
  :activity_score => { from: 'out', out: true, type: :float },
28
- :activity_cost => { from: 'out', out: true, type: :decimal, precision: 16, scale: 2 },
29
+ :activity_cost => { from: 'out', out: true, type: :decimal, precision: 8, scale: 2 },
29
30
  :activity_date => { from: 'out', out: true, type: :datetime, in_format: '%m/%d/%Y %H:%M:%S', out_format: '%Y-%m-%dT%H:%M:%S' },
30
31
  :source_filename => { from: 'out', out: true, type: :string, cdc_type: 1 }
31
32
  }
32
33
 
33
34
  define_transform :main do
35
+ source_data.enforce_types
34
36
 
35
- =begin
36
- source_data.df = Remi::DataFrame.daru([
37
- ['1','1','3/3/1998','A','1','3.8','12.23','1/3/2016 03:22:36','one.csv'],
38
- ['2','1','3/3/1998','B','3','4.2','10.53','1/3/2016 03:58:22','one.csv'],
39
- ['2','1','','B','2','4.23','10.539','1/3/2016 03:58:22','one.csv']
40
- ].transpose, order: [
41
- :activity_id,
42
- :student_id,
43
- :student_dob,
44
- :activity_type,
45
- :activity_counter,
46
- :activity_score,
47
- :activity_cost,
48
- :activity_date,
49
- :source_filename
50
- ])
51
- =end
52
-
53
- Remi::SourceToTargetMap.apply(source_data.df, target_data.df, source_metadata: source_data.fields) do
37
+ Remi::SourceToTargetMap.apply(source_data.df, target_data.df, source_metadata: source_data.fields, target_metadata: target_data.fields) do
54
38
  target_data.fields.keys.each do |field|
55
39
  map source(field) .target(field)
56
- .transform(Remi::Transform::EnforceType.new)
40
+
41
+ map source(field) .target("#{field}_class".to_sym)
42
+ .transform(->(v) { v.class })
57
43
  end
44
+
45
+ map source(:activity_cost) .target(:activity_cost_precision, :activity_cost_scale)
46
+ .transform(->(row) {
47
+ components = row[:activity_cost].to_s.split('.')
48
+ row[:activity_cost_precision] = components.first.size
49
+ row[:activity_cost_scale] = components.last.size
50
+ })
58
51
  end
59
52
  end
60
53
  end
@@ -22,7 +22,7 @@ class ParseDateJob
22
22
  .transform(Remi::Transform::ParseDate.new(in_format: params[:format], if_blank: params[:if_blank]))
23
23
 
24
24
  map source(:stubbed_date) .target(:parsed_stubbed_date)
25
- .transform(Remi::Transform::ParseDate.new(in_format: params[:format], if_blank: params[:if_blank]))
25
+ .transform(Remi::Transform::ParseDate.new(in_format: source_data.fields[:stubbed_date][:in_format], if_blank: params[:if_blank]))
26
26
  end
27
27
  end
28
28
  end
data/lib/remi/cucumber.rb CHANGED
@@ -3,5 +3,9 @@ require 'cucumber/rspec/doubles'
3
3
 
4
4
  require 'regexp-examples'
5
5
 
6
- require_relative 'cucumber/data_source'
7
- require_relative 'cucumber/business_rules'
6
+ require_relative 'testing/data_stub'
7
+ require_relative 'testing/business_rules'
8
+
9
+ class Remi::DataSource
10
+ include Remi::Testing::DataStub
11
+ end
@@ -1,9 +1,4 @@
1
1
  module Remi
2
-
3
- # Namespaces for specific sources/targets
4
- module DataSource; end
5
- module DataTarget; end
6
-
7
2
  class DataSubject
8
3
  def initialize(*args, fields: Remi::Fields.new, remi_df_type: :daru, logger: Remi::Settings.logger, **kargs, &block)
9
4
  @fields = fields
@@ -11,16 +6,24 @@ module Remi
11
6
  @logger = logger
12
7
  end
13
8
 
9
+ # Public: Fields defined for this data subject
14
10
  attr_accessor :fields
15
11
 
12
+ # Public: The default method for symbolizing field names
16
13
  def field_symbolizer
17
14
  Remi::FieldSymbolizers[:standard]
18
15
  end
19
16
 
17
+ # Public: Access the dataframe from a DataSource
18
+ #
19
+ # Returns a Remi::DataFrame
20
20
  def df
21
21
  @dataframe ||= Remi::DataFrame.create(@remi_df_type, [], order: @fields.keys)
22
22
  end
23
23
 
24
+ # Public: Reassigns the dataframe associated with this subject
25
+ #
26
+ # Returns the assigned dataframe
24
27
  def df=(new_dataframe)
25
28
  if new_dataframe.respond_to? :remi_df_type
26
29
  @dataframe = new_dataframe
@@ -29,58 +32,78 @@ module Remi
29
32
  end
30
33
  end
31
34
 
32
- module DataSource
33
-
34
- # Public: Access the dataframe from a DataSource
35
- #
36
- # Returns a Remi::DataFrame
37
- def df
38
- @dataframe ||= to_dataframe
35
+ # Public: Enforces types defined in the field metadata.
36
+ # For example, if a field has metadata with type: :date, then the
37
+ # type enforcer will convert data in that field into a date, and will
38
+ # throw an error if it is unable to parse any of the values.
39
+ #
40
+ # types - If set, restricts the data types that are enforced to just those listed.
41
+ #
42
+ # Returns nothing.
43
+ def enforce_types(*types)
44
+ sttm = SourceToTargetMap.new(df, source_metadata: fields)
45
+ fields.keys.each do |field|
46
+ next unless (types.size == 0 || types.include?(fields[field][:type])) && df.vectors.include?(field)
47
+ sttm.source(field).target(field).transform(Remi::Transform::EnforceType.new).execute
39
48
  end
40
49
 
41
- # Public: Memoized version of extract!
42
- def extract
43
- @extract ||= extract!
44
- end
50
+ nil
51
+ end
52
+ end
45
53
 
46
- # Public: Called to extract data from the source.
47
- #
48
- # Returns data in a format that can be used to create a dataframe.
49
- def extract!
50
- raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
51
- @extract
52
- end
53
54
 
54
- # Public: Converts extracted data to a dataframe
55
- #
56
- # Returns a Remi::DataFrame
57
- def to_dataframe
58
- raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
59
- end
55
+ class DataSource < DataSubject
56
+
57
+ # Public: Access the dataframe from a DataSource
58
+ #
59
+ # Returns a Remi::DataFrame
60
+ def df
61
+ @dataframe ||= to_dataframe
60
62
  end
61
63
 
62
- module DataTarget
64
+ # Public: Memoized version of extract!
65
+ def extract
66
+ @extract ||= extract!
67
+ end
63
68
 
64
- # Public: Loads data to the target. This is automatically called
65
- # after all transforms have executed, but could also get called manually.
66
- # The actual load operation is only executed if hasn't already.
67
- #
68
- # Returns true if the load operation was successful.
69
- def load
70
- return true if @loaded || df.size == 0
69
+ # Public: Called to extract data from the source.
70
+ #
71
+ # Returns data in a format that can be used to create a dataframe.
72
+ def extract!
73
+ raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
74
+ @extract
75
+ end
71
76
 
72
- @loaded = load!
73
- end
77
+ # Public: Converts extracted data to a dataframe
78
+ #
79
+ # Returns a Remi::DataFrame
80
+ def to_dataframe
81
+ raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
82
+ end
83
+ end
74
84
 
75
- # Public: Performs the load operation, regardless of whether it has
76
- # already executed.
77
- #
78
- # Returns true if the load operation was successful
79
- def load!
80
- raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
81
85
 
82
- false
83
- end
86
+ class DataTarget < DataSubject
87
+
88
+ # Public: Loads data to the target. This is automatically called
89
+ # after all transforms have executed, but could also get called manually.
90
+ # The actual load operation is only executed if hasn't already.
91
+ #
92
+ # Returns true if the load operation was successful.
93
+ def load
94
+ return true if @loaded || df.size == 0
95
+
96
+ @loaded = load!
97
+ end
98
+
99
+ # Public: Performs the load operation, regardless of whether it has
100
+ # already executed.
101
+ #
102
+ # Returns true if the load operation was successful
103
+ def load!
104
+ raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
105
+
106
+ false
84
107
  end
85
108
  end
86
109
  end
@@ -26,8 +26,7 @@ module Remi
26
26
 
27
27
 
28
28
 
29
- class DataSource::CsvFile < Remi::DataSubject
30
- include Remi::DataSubject::DataSource
29
+ class DataSource::CsvFile < DataSource
31
30
  include Remi::DataSubject::CsvFile
32
31
 
33
32
  def initialize(*args, **kargs, &block)
@@ -130,8 +129,7 @@ module Remi
130
129
 
131
130
 
132
131
 
133
- class DataTarget::CsvFile < Remi::DataSubject
134
- include ::Remi::DataSubject::DataTarget
132
+ class DataTarget::CsvFile < DataTarget
135
133
  include ::Remi::DataSubject::CsvFile
136
134
 
137
135
  default_csv_options[:row_sep] = "\n"
@@ -1,7 +1,6 @@
1
1
  module Remi
2
2
 
3
- class DataSource::DataFrame < Remi::DataSubject
4
- include Remi::DataSubject::DataSource
3
+ class DataSource::DataFrame < DataSource
5
4
 
6
5
  def initialize(*args, **kargs, &block)
7
6
  super
@@ -30,8 +29,7 @@ module Remi
30
29
  end
31
30
 
32
31
 
33
- class DataTarget::DataFrame < Remi::DataSubject
34
- include Remi::DataSubject::DataTarget
32
+ class DataTarget::DataFrame < DataTarget
35
33
 
36
34
  def initialize(*args, **kargs, &block)
37
35
  super
@@ -13,8 +13,7 @@ module Remi
13
13
  end
14
14
 
15
15
 
16
- class DataSource::Postgres < Remi::DataSubject
17
- include Remi::DataSubject::DataSource
16
+ class DataSource::Postgres < DataSource
18
17
  include Remi::DataSubject::Postgres
19
18
 
20
19
 
@@ -65,8 +64,7 @@ module Remi
65
64
 
66
65
  # VERY PRELIMINARY IMPLEMENTAtION - ONLY LOADS TO TEMP TABLES
67
66
  # IT IS THEN UP TO THE USER TO DO ELT TO LOAD THE FINAL TABLE
68
- class DataTarget::Postgres < Remi::DataSubject
69
- include Remi::DataSubject::DataTarget
67
+ class DataTarget::Postgres < DataTarget
70
68
  include Remi::DataSubject::Postgres
71
69
 
72
70
  def initialize(*args, **kargs, &block)
@@ -21,8 +21,7 @@ module Remi
21
21
  end
22
22
 
23
23
 
24
- class DataSource::Salesforce < Remi::DataSubject
25
- include Remi::DataSubject::DataSource
24
+ class DataSource::Salesforce < DataSource
26
25
  include Remi::DataSubject::Salesforce
27
26
 
28
27
  def initialize(*args, **kargs, &block)
@@ -92,8 +91,7 @@ module Remi
92
91
  end
93
92
 
94
93
 
95
- class DataTarget::Salesforce < Remi::DataSubject
96
- include Remi::DataSubject::DataTarget
94
+ class DataTarget::Salesforce < DataTarget
97
95
  include Remi::DataSubject::Salesforce
98
96
 
99
97
  def initialize(*args, **kargs, &block)
@@ -1,7 +1,6 @@
1
1
  module Remi
2
2
 
3
- class DataTarget::SftpFile < Remi::DataSubject
4
- include Remi::DataSubject::DataTarget
3
+ class DataTarget::SftpFile < DataTarget
5
4
 
6
5
  def initialize(*args, **kargs, &block)
7
6
  super
@@ -1,4 +1,4 @@
1
- module Remi::BusinessRules
1
+ module Remi::Testing::BusinessRules
2
2
  using Remi::Refinements::Symbolizer
3
3
 
4
4
  def self.csv_opt_map
@@ -0,0 +1,72 @@
1
+ module Remi
2
+ module Testing
3
+ module DataStub
4
+ def stub_row_array
5
+ @fields.values.map do |attribs|
6
+ stub_values(attribs)
7
+ end
8
+ end
9
+
10
+ def empty_stub_df
11
+ self.df = Daru::DataFrame.new([], order: @fields.keys)
12
+ end
13
+
14
+ def stub_df
15
+ empty_stub_df
16
+ self.df.add_row(stub_row_array)
17
+ end
18
+
19
+ def stub_values(**attribs)
20
+ stub_type = "stub_#{attribs[:type]}".to_sym
21
+ if respond_to?(stub_type)
22
+ send(stub_type, attribs)
23
+ else
24
+ stub_string(attribs)
25
+ end
26
+ end
27
+
28
+ def stub_string(**attribs)
29
+ Faker::Hipster.word
30
+ end
31
+
32
+ def stub_float(**attribs)
33
+ Faker::Number.decimal(2,3)
34
+ end
35
+
36
+ def stub_decimal(**attribs)
37
+ Faker::Number.decimal(attribs[:precision],attribs[:scale])
38
+ end
39
+
40
+ def stub_integer(**attribs)
41
+ Faker::Number.number(4).to_s
42
+ end
43
+
44
+ def stub_date(**attribs)
45
+ in_format = attribs[:in_format]
46
+ result = Faker::Date.backward(3650)
47
+ result = result.strftime(in_format) if in_format
48
+ result
49
+ end
50
+
51
+ def stub_datetime(**attribs)
52
+ in_format = attribs[:in_format]
53
+ result = Faker::Time.backward(3650)
54
+ result = result.strftime(in_format) if in_format
55
+ result
56
+ end
57
+
58
+ def stub_boolean(**attribs)
59
+ ['T','F'].shuffle.first
60
+ end
61
+
62
+ def stub_json(**attribs)
63
+ if attribs[:json_array]
64
+ [ stub_string ]
65
+ else
66
+ { Faker::Hipster.words(1, true, true) => stub_string }
67
+ end.to_json
68
+ end
69
+
70
+ end
71
+ end
72
+ end
@@ -218,6 +218,7 @@ module Remi
218
218
  # This transform is metadata aware and will use :in_format metadata
219
219
  # from the source
220
220
  #
221
+ # type - Specify either :date, or :datetime type (default: date)
221
222
  # in_format - The date format to use to convert the string (default: uses :in_format
222
223
  # from the source metadata. If that is not defined, use '%Y-%m-%d').
223
224
  # if_blank - Value to use if the the incoming value is blank (default: uses :if_blank
@@ -232,20 +233,33 @@ module Remi
232
233
  # tform.source_metadata = { in_format: '%m/%d/%Y' }
233
234
  # tform.to_proc.call('02/22/2013') # => Date.new(2013,2,22)
234
235
  class ParseDate < Transform
235
- def initialize(*args, in_format: nil, if_blank: nil, **kargs, &block)
236
+ def initialize(*args, type: nil, in_format: nil, if_blank: nil, **kargs, &block)
236
237
  super
238
+ @type = type
237
239
  @in_format = in_format
238
240
  @if_blank = if_blank
239
241
  end
240
242
 
243
+ def type
244
+ @type ||= @source_metadata.fetch(:type, :date)
245
+ end
246
+
241
247
  def in_format
242
- @in_format ||= @source_metadata.fetch(:in_format, '%Y-%m-%d')
248
+ @in_format ||= @source_metadata.fetch(:in_format, default_date_format)
243
249
  end
244
250
 
245
251
  def if_blank
246
252
  @if_blank ||= @source_metadata.fetch(:if_blank, nil)
247
253
  end
248
254
 
255
+ def default_date_format
256
+ if type == :datetime
257
+ '%Y-%m-%d %H:%M:%S'
258
+ else
259
+ '%Y-%m-%d'
260
+ end
261
+ end
262
+
249
263
  def transform(value)
250
264
  begin
251
265
  if value.respond_to?(:strftime)
@@ -260,15 +274,19 @@ module Remi
260
274
  end
261
275
  end
262
276
 
277
+ def class_type
278
+ @class_type ||= type == :datetime ? Time : Date
279
+ end
280
+
263
281
  def string_to_date(value)
264
- Date.strptime(value, in_format)
282
+ class_type.strptime(value, in_format)
265
283
  end
266
284
 
267
285
  def blank_handler(value)
268
286
  if if_blank == :low
269
- Date.new(1900,01,01)
287
+ class_type.new(1900,01,01)
270
288
  elsif if_blank == :high
271
- Date.new(2999,12,31)
289
+ class_type.new(2999,12,31)
272
290
  elsif if_blank.respond_to? :call
273
291
  if_blank.call(value)
274
292
  else
@@ -282,6 +300,7 @@ module Remi
282
300
  # This transform is metadata aware and will use :in_format/:out_format metadata
283
301
  # from the source.
284
302
  #
303
+ # type - Specify either :date, or :datetime type (default: date)
285
304
  # in_format - The date format to used to parse the input value. If the input value
286
305
  # is a date, then then parameter is ignored. (default: uses :in_format
287
306
  # from the source metadata. If that is not defined, use '%Y-%m-%d')
@@ -297,18 +316,35 @@ module Remi
297
316
  # tform.source_metadata = { in_format: '%m/%d/%Y', out_format: '%Y-%m-%d' }
298
317
  # tform.to_proc.call('02/22/2013') # => "2013-02-22"
299
318
  class FormatDate < Transform
300
- def initialize(*args, in_format: nil, out_format: nil, **kargs, &block)
319
+ def initialize(*args, type: nil, in_format: nil, out_format: nil, **kargs, &block)
301
320
  super
321
+ @type = type
302
322
  @in_format = in_format
303
323
  @out_format = out_format
304
324
  end
305
325
 
326
+ def type
327
+ @type ||= @source_metadata.fetch(:type, :date)
328
+ end
329
+
306
330
  def in_format
307
- @in_format ||= @source_metadata.fetch(:in_format, '%Y-%m-%d')
331
+ @in_format ||= @source_metadata.fetch(:in_format, default_date_format)
308
332
  end
309
333
 
310
334
  def out_format
311
- @out_format ||= @source_metadata.fetch(:out_format, '%Y-%m-%d')
335
+ @out_format ||= @source_metadata.fetch(:out_format, default_date_format)
336
+ end
337
+
338
+ def default_date_format
339
+ if type == :datetime
340
+ '%Y-%m-%d %H:%M:%S'
341
+ else
342
+ '%Y-%m-%d'
343
+ end
344
+ end
345
+
346
+ def class_type
347
+ @class_type ||= type == :datetime ? Time : Date
312
348
  end
313
349
 
314
350
  def transform(value)
@@ -318,7 +354,7 @@ module Remi
318
354
  elsif value.respond_to? :strftime
319
355
  value.strftime(out_format)
320
356
  else
321
- Date.strptime(value, in_format).strftime(out_format)
357
+ class_type.strptime(value, in_format).strftime(out_format)
322
358
  end
323
359
  rescue ArgumentError => err
324
360
  raise err, "Error parsing date (#{value.class}): '#{value}' using the format #{in_format} => #{out_format}"
@@ -461,7 +497,7 @@ module Remi
461
497
  def if_blank
462
498
  return @if_blank if @if_blank_set
463
499
  @if_blank_set = true
464
- @if_blank = @source_metadata.fetch(:if_blank, nil)
500
+ @if_blank = @source_metadata.fetch(:if_blank, default_if_blank)
465
501
  end
466
502
 
467
503
  def blank_handler(value)
@@ -474,8 +510,12 @@ module Remi
474
510
  end
475
511
  end
476
512
 
513
+ def default_if_blank
514
+ type == :string ? '' : nil
515
+ end
516
+
477
517
  def transform(value)
478
- if value.blank?
518
+ if value.blank? && type != :json
479
519
  blank_handler(value)
480
520
  else
481
521
  case type
@@ -488,9 +528,15 @@ module Remi
488
528
  when :decimal
489
529
  Float("%.#{scale}f" % Float(value))
490
530
  when :date
491
- value.is_a?(Date) ? value : Date.strptime(value, in_format) # value.is_a?(Date) is only needed becuase we stub date types with actual dates, rather than strings like we probably should
531
+ value.is_a?(Date) ? value : Date.strptime(value, in_format)
492
532
  when :datetime
493
- Time.strptime(value, in_format)
533
+ value.is_a?(Time) ? value : Time.strptime(value, in_format)
534
+ when :json
535
+ if value.blank? && value != [] && value != {}
536
+ blank_handler(value)
537
+ else
538
+ value.is_a?(Hash) || value.is_a?(Array) ? value : JSON.parse(value)
539
+ end
494
540
  else
495
541
  raise ArgumentError, "Unknown type enforcement: #{type}"
496
542
  end
data/lib/remi/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Remi
2
- VERSION = '0.2.39'
2
+ VERSION = '0.2.40'
3
3
  end
@@ -0,0 +1,44 @@
1
+ require_relative 'remi_spec'
2
+
3
+ # VERY SPARSE TESTING! DO MORE!
4
+
5
+ describe DataSubject do
6
+
7
+ describe 'enforcing types' do
8
+ let(:dataframe) do
9
+ Remi::DataFrame::Daru.new({ my_date: ['10/21/2015'] })
10
+ end
11
+
12
+ let(:data_subject) do
13
+ DataSubject.new(fields: fields).tap { |ds| ds.df = dataframe }
14
+ end
15
+
16
+ let(:fields) do
17
+ Fields.new({
18
+ my_date: { type: :date, in_format: '%m/%d/%Y' },
19
+ other_date: { type: :date, in_format: '%m/%d/%Y' }
20
+ })
21
+ end
22
+
23
+ it 'converts a date string to a date using an in_format' do
24
+ data_subject.enforce_types
25
+ expect(data_subject.df[:my_date].to_a).to eq [Date.new(2015, 10, 21)]
26
+ end
27
+
28
+ it 'does not do any conversion if the type is not specified' do
29
+ fields[:my_date].delete(:type)
30
+ data_subject.enforce_types
31
+ expect(data_subject.df[:my_date].to_a).to eq ['10/21/2015']
32
+ end
33
+
34
+ it 'throws an error if the data does not conform to its type' do
35
+ dataframe[:my_date].recode! { |v| '2015-10-21' }
36
+ expect { data_subject.enforce_types }.to raise_error ArgumentError
37
+ end
38
+
39
+ it 'does not create new vectors during enforcement' do
40
+ data_subject.enforce_types
41
+ expect(dataframe.vectors.to_a).to eq [:my_date]
42
+ end
43
+ end
44
+ end
@@ -49,7 +49,7 @@ describe SourceToTargetMap do
49
49
  it_behaves_like 'one-to-one map'
50
50
  end
51
51
 
52
- context 'without any transforms', wip: true do
52
+ context 'without any transforms' do
53
53
  before { map.source(:a) .target(:aprime) }
54
54
 
55
55
  let(:result) do
@@ -0,0 +1,171 @@
1
+ require_relative '../remi_spec'
2
+ require 'remi/testing/data_stub'
3
+
4
+ describe Testing::DataStub do
5
+ class StubTester < DataSubject
6
+ include Testing::DataStub
7
+ end
8
+
9
+ context 'data type stubs' do
10
+ let(:stub_tester) { StubTester.new }
11
+
12
+ context '#stub_string' do
13
+ let(:stub) { stub_tester.stub_string }
14
+
15
+ it 'stubs as strings' do
16
+ expect(stub).to be_a String
17
+ end
18
+ end
19
+
20
+ context '#stub_float' do
21
+ let(:stub) { stub_tester.stub_float }
22
+
23
+ it 'stubs as strings' do
24
+ expect(stub).to be_a String
25
+ end
26
+
27
+ it 'represents a floating point number' do
28
+ expect(Float(stub) % 1).not_to eq 0.0
29
+ end
30
+ end
31
+
32
+ context '#stub_decimal' do
33
+ let(:stub) { stub_tester.stub_decimal(precision: 8, scale: 2) }
34
+
35
+ it 'stubs as strings' do
36
+ expect(stub).to be_a String
37
+ end
38
+
39
+ it 'represents a floating point number' do
40
+ expect(Float(stub) % 1).not_to eq 0.0
41
+ end
42
+
43
+ it 'comes with the specified precision' do
44
+ expect(Float(stub).to_s.split('.').first.size).to eq 8
45
+ end
46
+
47
+ it 'comes with the specified scale' do
48
+ expect(Float(stub).to_s.split('.').last.size).to eq 2
49
+ end
50
+ end
51
+
52
+ context '#stub_integer' do
53
+ let(:stub) { stub_tester.stub_integer }
54
+
55
+ it 'stubs as strings' do
56
+ expect(stub).to be_a String
57
+ end
58
+
59
+ it 'represents an integer' do
60
+ expect(Float(stub) % 1).to eq 0.0
61
+ end
62
+
63
+ it 'converts to an integer' do
64
+ expect { Integer(stub) }.not_to raise_error
65
+ end
66
+ end
67
+
68
+
69
+ context '#stub_date' do
70
+ context 'without an in_format' do
71
+ let(:stub) { stub_tester.stub_date }
72
+
73
+ it 'stubs as a date' do
74
+ expect(stub).to be_a Date
75
+ end
76
+ end
77
+
78
+ context 'with an in_format' do
79
+ let(:stub) { stub_tester.stub_date(in_format: '%m/%d/%Y') }
80
+
81
+ it 'stubs as strings' do
82
+ expect(stub).to be_a String
83
+ end
84
+
85
+ it 'can parsed as a date using the specified in_format' do
86
+ expect { Date.strptime(stub, '%m/%d/%Y') }.not_to raise_error
87
+ end
88
+ end
89
+ end
90
+
91
+ context '#stub_datetime' do
92
+ context 'without an in_format' do
93
+ let(:stub) { stub_tester.stub_datetime }
94
+
95
+ it 'stubs as a time' do
96
+ expect(stub).to be_a Time
97
+ end
98
+ end
99
+
100
+ context 'with an in_format' do
101
+ let(:stub) { stub_tester.stub_datetime(in_format: '%m/%d/%Y %H:%M:%S') }
102
+
103
+ it 'stubs as strings' do
104
+ expect(stub).to be_a String
105
+ end
106
+
107
+ it 'can parsed as a time using the specified in_format' do
108
+ expect { Time.strptime(stub, '%m/%d/%Y %H:%M:%S') }.not_to raise_error
109
+ end
110
+ end
111
+ end
112
+
113
+ context '#stub_boolean' do
114
+ let(:stub) { stub_tester.stub_boolean }
115
+
116
+ it 'stubs as strings' do
117
+ expect(stub).to be_a String
118
+ end
119
+
120
+ it 'is either T or F' do
121
+ expect(stub).to eq('T').or eq('F')
122
+ end
123
+ end
124
+
125
+ context '#stub_json' do
126
+ let(:stub) { stub_tester.stub_json }
127
+
128
+ it 'stubs as strings' do
129
+ expect(stub).to be_a String
130
+ end
131
+
132
+ it 'can be parsed as JSON' do
133
+ expect { JSON.parse(stub) }.not_to raise_error
134
+ end
135
+ end
136
+ end
137
+
138
+
139
+ context 'stubbed dataframe data' do
140
+ let(:stub_tester) do
141
+ StubTester.new(fields: {
142
+ my_date: { type: :date, in_format: '%m/%d/%Y' },
143
+ my_str: {}
144
+ })
145
+ end
146
+
147
+ context '#empty_stub_df' do
148
+ before { stub_tester.empty_stub_df }
149
+
150
+ it 'creates a dataframe with no data' do
151
+ expect(stub_tester.df.size).to eq 0
152
+ end
153
+
154
+ it 'creates a dataframe with the right number of vectors' do
155
+ expect(stub_tester.df.vectors.size).to eq 2
156
+ end
157
+ end
158
+
159
+ context '#stub_df' do
160
+ before { stub_tester.stub_df }
161
+
162
+ it 'creates a row of data' do
163
+ expect(stub_tester.df.size).to eq 1
164
+ end
165
+
166
+ it 'creates data according to the supplied metadata' do
167
+ expect { Date.strptime(stub_tester.df[:my_date].first, '%m/%d/%Y') }.not_to raise_error
168
+ end
169
+ end
170
+ end
171
+ end
@@ -50,4 +50,79 @@ describe Transform do
50
50
  expect(transform.call(1, 2)).to eq [1, 2]
51
51
  end
52
52
  end
53
+
54
+ describe Transform::ParseDate do
55
+ it 'parses a date using the specified in_format' do
56
+ parser = Transform::ParseDate.new(in_format: '%m/%d/%Y')
57
+ expect(parser.call('03/22/2011')).to eq Date.new(2011,3,22)
58
+ end
59
+
60
+ it 'returns a date if it has already been parsed' do
61
+ parser = Transform::ParseDate.new(in_format: '%m/%d/%Y')
62
+ expect(Date.new(2011,3,22)).to eq Date.new(2011,3,22)
63
+ end
64
+
65
+ it 'uses ISO 8601 as the default date parser' do
66
+ parser = Transform::ParseDate.new
67
+ expect(parser.call('2011-03-22')).to eq Date.new(2011,3,22)
68
+ end
69
+
70
+ it 'fails when an unparseable date is provided' do
71
+ parser = Transform::ParseDate.new
72
+ expect { parser.call('03/22/2011') }.to raise_error ArgumentError
73
+ end
74
+
75
+ it 'parses datetimes when the type is specified' do
76
+ parser = Transform::ParseDate.new(type: :datetime, in_format: '%m/%d/%Y %H:%M:%S')
77
+ expect(parser.call('03/22/2011 04:22:00')).to eq Time.new(2011,3,22,4,22,0)
78
+ end
79
+
80
+ it 'uses ISO 8601 as the default datetime parser' do
81
+ parser = Transform::ParseDate.new(type: :datetime)
82
+ expect(parser.call('2011-03-22 04:22:00')).to eq Time.new(2011,3,22,4,22,0)
83
+ end
84
+ end
85
+
86
+ context Transform::FormatDate do
87
+ it 'formats a date using the specified out_format' do
88
+ formatter = Transform::FormatDate.new(out_format: '%m/%d/%Y')
89
+ expect(formatter.call(Date.new(2011,3,22))).to eq '03/22/2011'
90
+ end
91
+
92
+ it 'formats a datetime using the specified out_format' do
93
+ formatter = Transform::FormatDate.new(type: :datetime, out_format: '%m/%d/%Y %H:%M:%S')
94
+ expect(formatter.call(Time.new(2011,3,22,4,22,0))).to eq '03/22/2011 04:22:00'
95
+ end
96
+
97
+ it 'uses the in_format to parse strings when the source is not already a date' do
98
+ formatter = Transform::FormatDate.new(in_format: '%d/%m/%Y', out_format: '%m/%d/%Y')
99
+ expect(formatter.call('22/03/2011')).to eq '03/22/2011'
100
+ end
101
+
102
+ it 'fails when an unparseable date is provided' do
103
+ formatter = Transform::FormatDate.new(in_format: '%d/%m/%Y', out_format: '%m/%d/%Y')
104
+ expect { formatter.call('22/22/2011') }.to raise_error ArgumentError
105
+ end
106
+
107
+ it 'uses ISO 8601 as the default date parser' do
108
+ formatter = Transform::FormatDate.new(out_format: '%m/%d/%Y')
109
+ expect(formatter.call('2011-03-22')).to eq '03/22/2011'
110
+ end
111
+
112
+ it 'uses ISO 8601 as the default date formatter' do
113
+ formatter = Transform::FormatDate.new(in_format: '%m/%d/%Y')
114
+ expect(formatter.call('03/22/2011')).to eq '2011-03-22'
115
+ end
116
+
117
+ it 'uses ISO 8601 as the default datetime parser' do
118
+ formatter = Transform::FormatDate.new(type: :datetime, out_format: '%m/%d/%Y %H:%M:%S')
119
+ expect(formatter.call('2011-03-22 04:22:00')).to eq '03/22/2011 04:22:00'
120
+ end
121
+
122
+ it 'uses ISO 8601 as the default datetime formatter' do
123
+ formatter = Transform::FormatDate.new(type: :datetime, in_format: '%m/%d/%Y %H:%M:%S')
124
+ expect(formatter.call('03/22/2011 04:22:00')).to eq '2011-03-22 04:22:00'
125
+ end
126
+ end
127
+
53
128
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remi
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.39
4
+ version: 0.2.40
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sterling Paramore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-30 00:00:00.000000000 Z
11
+ date: 2016-07-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bond
@@ -225,8 +225,6 @@ files:
225
225
  - lib/remi.rb
226
226
  - lib/remi/cli.rb
227
227
  - lib/remi/cucumber.rb
228
- - lib/remi/cucumber/business_rules.rb
229
- - lib/remi/cucumber/data_source.rb
230
228
  - lib/remi/data_frame.rb
231
229
  - lib/remi/data_frame/daru.rb
232
230
  - lib/remi/data_subject.rb
@@ -248,11 +246,14 @@ files:
248
246
  - lib/remi/source_to_target_map.rb
249
247
  - lib/remi/source_to_target_map/map.rb
250
248
  - lib/remi/source_to_target_map/row.rb
249
+ - lib/remi/testing/business_rules.rb
250
+ - lib/remi/testing/data_stub.rb
251
251
  - lib/remi/transform.rb
252
252
  - lib/remi/version.rb
253
253
  - remi.gemspec
254
254
  - spec/data_subject/csv_file_spec.rb
255
255
  - spec/data_subject/data_frame.rb
256
+ - spec/data_subject_spec.rb
256
257
  - spec/extractor/file_system_spec.rb
257
258
  - spec/extractor/local_file_spec.rb
258
259
  - spec/extractor/s3_file_spec.rb
@@ -264,6 +265,7 @@ files:
264
265
  - spec/metadata_spec.rb
265
266
  - spec/remi_spec.rb
266
267
  - spec/source_to_target_map_spec.rb
268
+ - spec/testing/data_stub_spec.rb
267
269
  - spec/transform_spec.rb
268
270
  - workbooks/sample_workbook.ipynb
269
271
  - workbooks/workbook_helper.rb
@@ -315,6 +317,7 @@ test_files:
315
317
  - features/transforms/truthy.feature
316
318
  - spec/data_subject/csv_file_spec.rb
317
319
  - spec/data_subject/data_frame.rb
320
+ - spec/data_subject_spec.rb
318
321
  - spec/extractor/file_system_spec.rb
319
322
  - spec/extractor/local_file_spec.rb
320
323
  - spec/extractor/s3_file_spec.rb
@@ -326,4 +329,5 @@ test_files:
326
329
  - spec/metadata_spec.rb
327
330
  - spec/remi_spec.rb
328
331
  - spec/source_to_target_map_spec.rb
332
+ - spec/testing/data_stub_spec.rb
329
333
  - spec/transform_spec.rb
@@ -1,70 +0,0 @@
1
- module Remi
2
- module DataSource
3
- module DataStub
4
- def stub_row_array
5
- @fields.values.map do |attrib|
6
- stub_values[attrib[:type]].call
7
- end
8
- end
9
-
10
- def empty_stub_df
11
- self.df = Daru::DataFrame.new([], order: @fields.keys)
12
- end
13
-
14
- def stub_df
15
- empty_stub_df
16
- self.df.add_row(stub_row_array)
17
- end
18
-
19
- def stub_values
20
- @stub_values ||= Hash.new(->() { Faker::Hipster.word }).merge({
21
- string: ->() { Faker::Hipster.word },
22
- number: ->() { Faker::Number.decimal(4,4) },
23
- float: ->() { Faker::Number.decimal(2,2) },
24
- integer: ->() { Faker::Number.number(4) },
25
- date: ->() { Faker::Date.backward(3650) },
26
- datetime: ->() { Faker::Time.backward(3650).to_datetime },
27
- boolean: ->() { ['T','F'].shuffle.first }
28
- })
29
- end
30
- end
31
-
32
-
33
- class CsvFile
34
- include DataStub
35
- def stub_tmp_file
36
- @stub_tmp_file ||= Tempfile.new('stub_tmp_file.csv').path
37
- end
38
-
39
- def write_stub_tmp_file
40
- File.open(stub_tmp_file, "wb") do |file|
41
- file.puts stub_header
42
- file.puts stub_row_csv
43
- end
44
-
45
- stub_tmp_file
46
- end
47
-
48
- def stub_header
49
- @fields.keys.join(@csv_options[:col_sep])
50
- end
51
-
52
- def stub_row_csv
53
- stub_row_array.join(@csv_options[:col_sep])
54
- end
55
- end
56
-
57
- # Hmmm.... this gets called first because I'm trying to split SF off as a "plugin"
58
- class Salesforce < Remi::DataSubject
59
- include DataStub
60
- end
61
-
62
- class DataFrame
63
- include DataStub
64
- end
65
-
66
- class Postgres
67
- include DataStub
68
- end
69
- end
70
- end