remi 0.2.39 → 0.2.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/features/metadata.feature +30 -4
- data/features/step_definitions/remi_step.rb +9 -9
- data/features/transforms/parse_date.feature +1 -0
- data/jobs/metadata_job.rb +15 -22
- data/jobs/transforms/parse_date_job.rb +1 -1
- data/lib/remi/cucumber.rb +6 -2
- data/lib/remi/data_subject.rb +70 -47
- data/lib/remi/data_subject/csv_file.rb +2 -4
- data/lib/remi/data_subject/data_frame.rb +2 -4
- data/lib/remi/data_subject/postgres.rb +2 -4
- data/lib/remi/data_subject/salesforce.rb +2 -4
- data/lib/remi/data_subject/sftp_file.rb +1 -2
- data/lib/remi/{cucumber → testing}/business_rules.rb +1 -1
- data/lib/remi/testing/data_stub.rb +72 -0
- data/lib/remi/transform.rb +59 -13
- data/lib/remi/version.rb +1 -1
- data/spec/data_subject_spec.rb +44 -0
- data/spec/source_to_target_map_spec.rb +1 -1
- data/spec/testing/data_stub_spec.rb +171 -0
- data/spec/transform_spec.rb +75 -0
- metadata +8 -4
- data/lib/remi/cucumber/data_source.rb +0 -70
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d9f919d918cc6c2b83a8f6849a30a111ddf07e87
|
4
|
+
data.tar.gz: 7eeaee62f683ee9fd0851e8c61cd47fab8a7f1bd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eecc73e562cf266445cf85f7c144424a730f71d95ea588b9ad7c75fc2aba0a09c0aba7440c78639c7433340e3e89888c329aec17660471038996763593e50e45
|
7
|
+
data.tar.gz: 96852fba98c17a79ef1763ece9f5b5cb9893cca9acf5c21da4ac09e352a1fcc2cb8e8f760836f3111493ef3f4b6258f01688b10b8c14952a7978e51623f70653
|
data/Gemfile.lock
CHANGED
data/features/metadata.feature
CHANGED
@@ -9,9 +9,35 @@ Feature: This tests the application of metadata.
|
|
9
9
|
And the source 'Source Data'
|
10
10
|
And the target 'Target Data'
|
11
11
|
|
12
|
-
And the following example record for 'Source Data':
|
13
|
-
| activity_id | student_id | student_dob | activity_type | activity_counter | activity_score | activity_cost | activity_date | source_filename |
|
14
|
-
| 1 | 1 | 3/3/1998 | A | 1 | 3.8 | 12.23 | 1/3/2016 03:22:36 | one.csv |
|
15
12
|
|
16
13
|
Scenario: Metadata is used to parse date fields
|
17
|
-
|
14
|
+
|
15
|
+
Given the following example record for 'Source Data':
|
16
|
+
| activity_id | student_id | student_dob | activity_type | activity_counter | activity_score | activity_cost | activity_date | source_filename |
|
17
|
+
| 1 | 1 | 3/3/1998 | A | 1 | 3.8 | 12.23 | 1/3/2016 03:22:36 | one.csv |
|
18
|
+
|
19
|
+
Then the target should match the example:
|
20
|
+
| activity_id | student_id | student_dob | activity_type | activity_counter | activity_score | activity_cost | activity_date | source_filename |
|
21
|
+
| 1 | 1 | 1998-03-03 | A | 1 | 3.8 | 12.23 | 2016-01-03 03:22:36 +0000 | one.csv |
|
22
|
+
|
23
|
+
Scenario Outline: Metadata is used to stub records with values that conform to the metadata
|
24
|
+
|
25
|
+
Then the target field '<Field>' is set to the value "<Class>"
|
26
|
+
|
27
|
+
Examples:
|
28
|
+
| Field | Class |
|
29
|
+
| activity_id_class | String |
|
30
|
+
| student_id_class | String |
|
31
|
+
| student_dob_class | Date |
|
32
|
+
| activity_type_class | String |
|
33
|
+
| activity_counter_class | Fixnum |
|
34
|
+
| activity_score_class | Float |
|
35
|
+
| activity_cost_class | Float |
|
36
|
+
| activity_date_class | Time |
|
37
|
+
| source_filename_class | String |
|
38
|
+
|
39
|
+
|
40
|
+
Scenario: Metadata for decimals is stubbed
|
41
|
+
|
42
|
+
Then the target field 'activity_cost_precision' is populated with "8"
|
43
|
+
And the target field 'activity_cost_scale' is populated with "2"
|
@@ -4,7 +4,7 @@
|
|
4
4
|
### Job and background setup
|
5
5
|
|
6
6
|
Given /^the job is '([[:alnum:]\s]+)'$/ do |arg|
|
7
|
-
@brt = Remi::BusinessRules::Tester.new(arg)
|
7
|
+
@brt = Remi::Testing::BusinessRules::Tester.new(arg)
|
8
8
|
end
|
9
9
|
|
10
10
|
Given /^the job source '([[:alnum:]\s\-_]+)'$/ do |arg|
|
@@ -64,13 +64,13 @@ Then /^the file that comes last in an alphanumeric sort by group will be downloa
|
|
64
64
|
end
|
65
65
|
|
66
66
|
Then /^the file is uploaded to the remote path "([^"]+)"$/ do |remote_path|
|
67
|
-
expect(@brt.target.get_attrib(:remote_path)).to eq Remi::BusinessRules::ParseFormula.parse(remote_path)
|
67
|
+
expect(@brt.target.get_attrib(:remote_path)).to eq Remi::Testing::BusinessRules::ParseFormula.parse(remote_path)
|
68
68
|
end
|
69
69
|
|
70
70
|
## CSV Options
|
71
71
|
|
72
72
|
Given /^the (source|target) file is delimited with a (\w+)$/ do |st, delimiter|
|
73
|
-
expect(@brt.send(st.to_sym).csv_options[:col_sep]).to eq Remi::BusinessRules.csv_opt_map[delimiter]
|
73
|
+
expect(@brt.send(st.to_sym).csv_options[:col_sep]).to eq Remi::Testing::BusinessRules.csv_opt_map[delimiter]
|
74
74
|
end
|
75
75
|
|
76
76
|
Given /^the (source|target) file is encoded using "([^"]+)" format$/ do |st, encoding|
|
@@ -78,15 +78,15 @@ Given /^the (source|target) file is encoded using "([^"]+)" format$/ do |st, enc
|
|
78
78
|
end
|
79
79
|
|
80
80
|
Given /^the (source|target) file uses a ([\w ]+) to quote embedded delimiters$/ do |st, quote_char|
|
81
|
-
expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::BusinessRules.csv_opt_map[quote_char]
|
81
|
+
expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::Testing::BusinessRules.csv_opt_map[quote_char]
|
82
82
|
end
|
83
83
|
|
84
84
|
Given /^the (source|target) file uses a preceding ([\w ]+) to escape an embedded quoting character$/ do |st, escape_char|
|
85
|
-
expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::BusinessRules.csv_opt_map[escape_char]
|
85
|
+
expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::Testing::BusinessRules.csv_opt_map[escape_char]
|
86
86
|
end
|
87
87
|
|
88
88
|
Given /^the (source|target) file uses ([\w ]+) line endings$/ do |st, line_endings|
|
89
|
-
expect(@brt.send(st.to_sym).csv_options[:row_sep]).to eq Remi::BusinessRules.csv_opt_map[line_endings]
|
89
|
+
expect(@brt.send(st.to_sym).csv_options[:row_sep]).to eq Remi::Testing::BusinessRules.csv_opt_map[line_endings]
|
90
90
|
end
|
91
91
|
|
92
92
|
Given /^the (source|target) file uses "([^"]+)" as a record separator$/ do |st, line_endings|
|
@@ -130,7 +130,7 @@ Given /^the source field '([^']+)' (?:has|is set to) the value "([^"]*)"$/ do |s
|
|
130
130
|
step "the source field '#{source_field}'"
|
131
131
|
|
132
132
|
source_name, source_field_name = @brt.sources.parse_full_field(source_field)
|
133
|
-
@brt.sources[source_name].fields[source_field_name].value = Remi::BusinessRules::ParseFormula.parse(value)
|
133
|
+
@brt.sources[source_name].fields[source_field_name].value = Remi::Testing::BusinessRules::ParseFormula.parse(value)
|
134
134
|
end
|
135
135
|
|
136
136
|
Given /^the source field (?:has|is set to) the value "([^"]*)"$/ do |value|
|
@@ -143,7 +143,7 @@ Given /^the source field '([^']+)' (?:has|is set to) the multiline value$/ do |s
|
|
143
143
|
step "the source field '#{source_field}'"
|
144
144
|
|
145
145
|
source_name, source_field_name = @brt.sources.parse_full_field(source_field)
|
146
|
-
@brt.sources[source_name].fields[source_field_name].value = Remi::BusinessRules::ParseFormula.parse(value)
|
146
|
+
@brt.sources[source_name].fields[source_field_name].value = Remi::Testing::BusinessRules::ParseFormula.parse(value)
|
147
147
|
end
|
148
148
|
|
149
149
|
Given /^the source field (?:has|is set to) the multiline value$/ do |value|
|
@@ -266,7 +266,7 @@ Then /^the target field '([^']+)' is (?:set to the value|populated with) "([^"]*
|
|
266
266
|
@brt.run_transforms
|
267
267
|
}.not_to raise_error
|
268
268
|
Array(target_names).each do |target_name|
|
269
|
-
expect(@brt.targets[target_name].fields[target_field_name].values.uniq).to eq [Remi::BusinessRules::ParseFormula.parse(value)]
|
269
|
+
expect(@brt.targets[target_name].fields[target_field_name].values.uniq).to eq [Remi::Testing::BusinessRules::ParseFormula.parse(value)]
|
270
270
|
end
|
271
271
|
}
|
272
272
|
end
|
@@ -12,6 +12,7 @@ Feature: Tests the parse_date transform
|
|
12
12
|
Given the source field 'Date String' has the value "<Date String>"
|
13
13
|
And the job parameter 'format' is "<Format>"
|
14
14
|
Then the target field 'Parsed Date' is set to the value "<Parsed Date>"
|
15
|
+
|
15
16
|
Examples:
|
16
17
|
| Date String | Format | Parsed Date |
|
17
18
|
| 2015-10-21 | %Y-%m-%d | 2015-10-21 |
|
data/jobs/metadata_job.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative 'all_jobs_shared'
|
2
|
+
ENV['TZ'] = 'UTC'
|
2
3
|
|
3
4
|
class MetadataJob
|
4
5
|
include AllJobsShared
|
@@ -11,7 +12,7 @@ class MetadataJob
|
|
11
12
|
:activity_type => { from: 'in', in: true, type: :string, valid_values: ['A', 'B', 'C'], cdc_type: 2 },
|
12
13
|
:activity_counter => { from: 'in', in: true, type: :integer, cdc_type: 2 },
|
13
14
|
:activity_score => { from: 'in', in: true, type: :float, cdc_type: 2 },
|
14
|
-
:activity_cost => { from: 'in', in: true, type: :decimal, precision:
|
15
|
+
:activity_cost => { from: 'in', in: true, type: :decimal, precision: 8, scale: 2, cdc_type: 2 },
|
15
16
|
:activity_date => { from: 'in', in: true, type: :datetime, in_format: '%m/%d/%Y %H:%M:%S', out_format: '%Y-%m-%dT%H:%M:%S', cdc_type: 2 },
|
16
17
|
:source_filename => { from: 'in', in: true, type: :string, cdc_type: 1 }
|
17
18
|
}
|
@@ -25,36 +26,28 @@ class MetadataJob
|
|
25
26
|
:activity_type => { from: 'out', out: true, type: :string, valid_values: ['A', 'B', 'C'] },
|
26
27
|
:activity_counter => { from: 'out', out: true, type: :integer },
|
27
28
|
:activity_score => { from: 'out', out: true, type: :float },
|
28
|
-
:activity_cost => { from: 'out', out: true, type: :decimal, precision:
|
29
|
+
:activity_cost => { from: 'out', out: true, type: :decimal, precision: 8, scale: 2 },
|
29
30
|
:activity_date => { from: 'out', out: true, type: :datetime, in_format: '%m/%d/%Y %H:%M:%S', out_format: '%Y-%m-%dT%H:%M:%S' },
|
30
31
|
:source_filename => { from: 'out', out: true, type: :string, cdc_type: 1 }
|
31
32
|
}
|
32
33
|
|
33
34
|
define_transform :main do
|
35
|
+
source_data.enforce_types
|
34
36
|
|
35
|
-
|
36
|
-
source_data.df = Remi::DataFrame.daru([
|
37
|
-
['1','1','3/3/1998','A','1','3.8','12.23','1/3/2016 03:22:36','one.csv'],
|
38
|
-
['2','1','3/3/1998','B','3','4.2','10.53','1/3/2016 03:58:22','one.csv'],
|
39
|
-
['2','1','','B','2','4.23','10.539','1/3/2016 03:58:22','one.csv']
|
40
|
-
].transpose, order: [
|
41
|
-
:activity_id,
|
42
|
-
:student_id,
|
43
|
-
:student_dob,
|
44
|
-
:activity_type,
|
45
|
-
:activity_counter,
|
46
|
-
:activity_score,
|
47
|
-
:activity_cost,
|
48
|
-
:activity_date,
|
49
|
-
:source_filename
|
50
|
-
])
|
51
|
-
=end
|
52
|
-
|
53
|
-
Remi::SourceToTargetMap.apply(source_data.df, target_data.df, source_metadata: source_data.fields) do
|
37
|
+
Remi::SourceToTargetMap.apply(source_data.df, target_data.df, source_metadata: source_data.fields, target_metadata: target_data.fields) do
|
54
38
|
target_data.fields.keys.each do |field|
|
55
39
|
map source(field) .target(field)
|
56
|
-
|
40
|
+
|
41
|
+
map source(field) .target("#{field}_class".to_sym)
|
42
|
+
.transform(->(v) { v.class })
|
57
43
|
end
|
44
|
+
|
45
|
+
map source(:activity_cost) .target(:activity_cost_precision, :activity_cost_scale)
|
46
|
+
.transform(->(row) {
|
47
|
+
components = row[:activity_cost].to_s.split('.')
|
48
|
+
row[:activity_cost_precision] = components.first.size
|
49
|
+
row[:activity_cost_scale] = components.last.size
|
50
|
+
})
|
58
51
|
end
|
59
52
|
end
|
60
53
|
end
|
@@ -22,7 +22,7 @@ class ParseDateJob
|
|
22
22
|
.transform(Remi::Transform::ParseDate.new(in_format: params[:format], if_blank: params[:if_blank]))
|
23
23
|
|
24
24
|
map source(:stubbed_date) .target(:parsed_stubbed_date)
|
25
|
-
.transform(Remi::Transform::ParseDate.new(in_format:
|
25
|
+
.transform(Remi::Transform::ParseDate.new(in_format: source_data.fields[:stubbed_date][:in_format], if_blank: params[:if_blank]))
|
26
26
|
end
|
27
27
|
end
|
28
28
|
end
|
data/lib/remi/cucumber.rb
CHANGED
@@ -3,5 +3,9 @@ require 'cucumber/rspec/doubles'
|
|
3
3
|
|
4
4
|
require 'regexp-examples'
|
5
5
|
|
6
|
-
require_relative '
|
7
|
-
require_relative '
|
6
|
+
require_relative 'testing/data_stub'
|
7
|
+
require_relative 'testing/business_rules'
|
8
|
+
|
9
|
+
class Remi::DataSource
|
10
|
+
include Remi::Testing::DataStub
|
11
|
+
end
|
data/lib/remi/data_subject.rb
CHANGED
@@ -1,9 +1,4 @@
|
|
1
1
|
module Remi
|
2
|
-
|
3
|
-
# Namespaces for specific sources/targets
|
4
|
-
module DataSource; end
|
5
|
-
module DataTarget; end
|
6
|
-
|
7
2
|
class DataSubject
|
8
3
|
def initialize(*args, fields: Remi::Fields.new, remi_df_type: :daru, logger: Remi::Settings.logger, **kargs, &block)
|
9
4
|
@fields = fields
|
@@ -11,16 +6,24 @@ module Remi
|
|
11
6
|
@logger = logger
|
12
7
|
end
|
13
8
|
|
9
|
+
# Public: Fields defined for this data subject
|
14
10
|
attr_accessor :fields
|
15
11
|
|
12
|
+
# Public: The default method for symbolizing field names
|
16
13
|
def field_symbolizer
|
17
14
|
Remi::FieldSymbolizers[:standard]
|
18
15
|
end
|
19
16
|
|
17
|
+
# Public: Access the dataframe from a DataSource
|
18
|
+
#
|
19
|
+
# Returns a Remi::DataFrame
|
20
20
|
def df
|
21
21
|
@dataframe ||= Remi::DataFrame.create(@remi_df_type, [], order: @fields.keys)
|
22
22
|
end
|
23
23
|
|
24
|
+
# Public: Reassigns the dataframe associated with this subject
|
25
|
+
#
|
26
|
+
# Returns the assigned dataframe
|
24
27
|
def df=(new_dataframe)
|
25
28
|
if new_dataframe.respond_to? :remi_df_type
|
26
29
|
@dataframe = new_dataframe
|
@@ -29,58 +32,78 @@ module Remi
|
|
29
32
|
end
|
30
33
|
end
|
31
34
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
35
|
+
# Public: Enforces types defined in the field metadata.
|
36
|
+
# For example, if a field has metadata with type: :date, then the
|
37
|
+
# type enforcer will convert data in that field into a date, and will
|
38
|
+
# throw an error if it is unable to parse any of the values.
|
39
|
+
#
|
40
|
+
# types - If set, restricts the data types that are enforced to just those listed.
|
41
|
+
#
|
42
|
+
# Returns nothing.
|
43
|
+
def enforce_types(*types)
|
44
|
+
sttm = SourceToTargetMap.new(df, source_metadata: fields)
|
45
|
+
fields.keys.each do |field|
|
46
|
+
next unless (types.size == 0 || types.include?(fields[field][:type])) && df.vectors.include?(field)
|
47
|
+
sttm.source(field).target(field).transform(Remi::Transform::EnforceType.new).execute
|
39
48
|
end
|
40
49
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
end
|
50
|
+
nil
|
51
|
+
end
|
52
|
+
end
|
45
53
|
|
46
|
-
# Public: Called to extract data from the source.
|
47
|
-
#
|
48
|
-
# Returns data in a format that can be used to create a dataframe.
|
49
|
-
def extract!
|
50
|
-
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
51
|
-
@extract
|
52
|
-
end
|
53
54
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
55
|
+
class DataSource < DataSubject
|
56
|
+
|
57
|
+
# Public: Access the dataframe from a DataSource
|
58
|
+
#
|
59
|
+
# Returns a Remi::DataFrame
|
60
|
+
def df
|
61
|
+
@dataframe ||= to_dataframe
|
60
62
|
end
|
61
63
|
|
62
|
-
|
64
|
+
# Public: Memoized version of extract!
|
65
|
+
def extract
|
66
|
+
@extract ||= extract!
|
67
|
+
end
|
63
68
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
69
|
+
# Public: Called to extract data from the source.
|
70
|
+
#
|
71
|
+
# Returns data in a format that can be used to create a dataframe.
|
72
|
+
def extract!
|
73
|
+
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
74
|
+
@extract
|
75
|
+
end
|
71
76
|
|
72
|
-
|
73
|
-
|
77
|
+
# Public: Converts extracted data to a dataframe
|
78
|
+
#
|
79
|
+
# Returns a Remi::DataFrame
|
80
|
+
def to_dataframe
|
81
|
+
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
82
|
+
end
|
83
|
+
end
|
74
84
|
|
75
|
-
# Public: Performs the load operation, regardless of whether it has
|
76
|
-
# already executed.
|
77
|
-
#
|
78
|
-
# Returns true if the load operation was successful
|
79
|
-
def load!
|
80
|
-
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
81
85
|
|
82
|
-
|
83
|
-
|
86
|
+
class DataTarget < DataSubject
|
87
|
+
|
88
|
+
# Public: Loads data to the target. This is automatically called
|
89
|
+
# after all transforms have executed, but could also get called manually.
|
90
|
+
# The actual load operation is only executed if hasn't already.
|
91
|
+
#
|
92
|
+
# Returns true if the load operation was successful.
|
93
|
+
def load
|
94
|
+
return true if @loaded || df.size == 0
|
95
|
+
|
96
|
+
@loaded = load!
|
97
|
+
end
|
98
|
+
|
99
|
+
# Public: Performs the load operation, regardless of whether it has
|
100
|
+
# already executed.
|
101
|
+
#
|
102
|
+
# Returns true if the load operation was successful
|
103
|
+
def load!
|
104
|
+
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
105
|
+
|
106
|
+
false
|
84
107
|
end
|
85
108
|
end
|
86
109
|
end
|
@@ -26,8 +26,7 @@ module Remi
|
|
26
26
|
|
27
27
|
|
28
28
|
|
29
|
-
class DataSource::CsvFile <
|
30
|
-
include Remi::DataSubject::DataSource
|
29
|
+
class DataSource::CsvFile < DataSource
|
31
30
|
include Remi::DataSubject::CsvFile
|
32
31
|
|
33
32
|
def initialize(*args, **kargs, &block)
|
@@ -130,8 +129,7 @@ module Remi
|
|
130
129
|
|
131
130
|
|
132
131
|
|
133
|
-
class DataTarget::CsvFile <
|
134
|
-
include ::Remi::DataSubject::DataTarget
|
132
|
+
class DataTarget::CsvFile < DataTarget
|
135
133
|
include ::Remi::DataSubject::CsvFile
|
136
134
|
|
137
135
|
default_csv_options[:row_sep] = "\n"
|
@@ -1,7 +1,6 @@
|
|
1
1
|
module Remi
|
2
2
|
|
3
|
-
class DataSource::DataFrame <
|
4
|
-
include Remi::DataSubject::DataSource
|
3
|
+
class DataSource::DataFrame < DataSource
|
5
4
|
|
6
5
|
def initialize(*args, **kargs, &block)
|
7
6
|
super
|
@@ -30,8 +29,7 @@ module Remi
|
|
30
29
|
end
|
31
30
|
|
32
31
|
|
33
|
-
class DataTarget::DataFrame <
|
34
|
-
include Remi::DataSubject::DataTarget
|
32
|
+
class DataTarget::DataFrame < DataTarget
|
35
33
|
|
36
34
|
def initialize(*args, **kargs, &block)
|
37
35
|
super
|
@@ -13,8 +13,7 @@ module Remi
|
|
13
13
|
end
|
14
14
|
|
15
15
|
|
16
|
-
class DataSource::Postgres <
|
17
|
-
include Remi::DataSubject::DataSource
|
16
|
+
class DataSource::Postgres < DataSource
|
18
17
|
include Remi::DataSubject::Postgres
|
19
18
|
|
20
19
|
|
@@ -65,8 +64,7 @@ module Remi
|
|
65
64
|
|
66
65
|
# VERY PRELIMINARY IMPLEMENTAtION - ONLY LOADS TO TEMP TABLES
|
67
66
|
# IT IS THEN UP TO THE USER TO DO ELT TO LOAD THE FINAL TABLE
|
68
|
-
class DataTarget::Postgres <
|
69
|
-
include Remi::DataSubject::DataTarget
|
67
|
+
class DataTarget::Postgres < DataTarget
|
70
68
|
include Remi::DataSubject::Postgres
|
71
69
|
|
72
70
|
def initialize(*args, **kargs, &block)
|
@@ -21,8 +21,7 @@ module Remi
|
|
21
21
|
end
|
22
22
|
|
23
23
|
|
24
|
-
class DataSource::Salesforce <
|
25
|
-
include Remi::DataSubject::DataSource
|
24
|
+
class DataSource::Salesforce < DataSource
|
26
25
|
include Remi::DataSubject::Salesforce
|
27
26
|
|
28
27
|
def initialize(*args, **kargs, &block)
|
@@ -92,8 +91,7 @@ module Remi
|
|
92
91
|
end
|
93
92
|
|
94
93
|
|
95
|
-
class DataTarget::Salesforce <
|
96
|
-
include Remi::DataSubject::DataTarget
|
94
|
+
class DataTarget::Salesforce < DataTarget
|
97
95
|
include Remi::DataSubject::Salesforce
|
98
96
|
|
99
97
|
def initialize(*args, **kargs, &block)
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module Remi
|
2
|
+
module Testing
|
3
|
+
module DataStub
|
4
|
+
def stub_row_array
|
5
|
+
@fields.values.map do |attribs|
|
6
|
+
stub_values(attribs)
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
def empty_stub_df
|
11
|
+
self.df = Daru::DataFrame.new([], order: @fields.keys)
|
12
|
+
end
|
13
|
+
|
14
|
+
def stub_df
|
15
|
+
empty_stub_df
|
16
|
+
self.df.add_row(stub_row_array)
|
17
|
+
end
|
18
|
+
|
19
|
+
def stub_values(**attribs)
|
20
|
+
stub_type = "stub_#{attribs[:type]}".to_sym
|
21
|
+
if respond_to?(stub_type)
|
22
|
+
send(stub_type, attribs)
|
23
|
+
else
|
24
|
+
stub_string(attribs)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def stub_string(**attribs)
|
29
|
+
Faker::Hipster.word
|
30
|
+
end
|
31
|
+
|
32
|
+
def stub_float(**attribs)
|
33
|
+
Faker::Number.decimal(2,3)
|
34
|
+
end
|
35
|
+
|
36
|
+
def stub_decimal(**attribs)
|
37
|
+
Faker::Number.decimal(attribs[:precision],attribs[:scale])
|
38
|
+
end
|
39
|
+
|
40
|
+
def stub_integer(**attribs)
|
41
|
+
Faker::Number.number(4).to_s
|
42
|
+
end
|
43
|
+
|
44
|
+
def stub_date(**attribs)
|
45
|
+
in_format = attribs[:in_format]
|
46
|
+
result = Faker::Date.backward(3650)
|
47
|
+
result = result.strftime(in_format) if in_format
|
48
|
+
result
|
49
|
+
end
|
50
|
+
|
51
|
+
def stub_datetime(**attribs)
|
52
|
+
in_format = attribs[:in_format]
|
53
|
+
result = Faker::Time.backward(3650)
|
54
|
+
result = result.strftime(in_format) if in_format
|
55
|
+
result
|
56
|
+
end
|
57
|
+
|
58
|
+
def stub_boolean(**attribs)
|
59
|
+
['T','F'].shuffle.first
|
60
|
+
end
|
61
|
+
|
62
|
+
def stub_json(**attribs)
|
63
|
+
if attribs[:json_array]
|
64
|
+
[ stub_string ]
|
65
|
+
else
|
66
|
+
{ Faker::Hipster.words(1, true, true) => stub_string }
|
67
|
+
end.to_json
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/lib/remi/transform.rb
CHANGED
@@ -218,6 +218,7 @@ module Remi
|
|
218
218
|
# This transform is metadata aware and will use :in_format metadata
|
219
219
|
# from the source
|
220
220
|
#
|
221
|
+
# type - Specify either :date, or :datetime type (default: date)
|
221
222
|
# in_format - The date format to use to convert the string (default: uses :in_format
|
222
223
|
# from the source metadata. If that is not defined, use '%Y-%m-%d').
|
223
224
|
# if_blank - Value to use if the the incoming value is blank (default: uses :if_blank
|
@@ -232,20 +233,33 @@ module Remi
|
|
232
233
|
# tform.source_metadata = { in_format: '%m/%d/%Y' }
|
233
234
|
# tform.to_proc.call('02/22/2013') # => Date.new(2013,2,22)
|
234
235
|
class ParseDate < Transform
|
235
|
-
def initialize(*args, in_format: nil, if_blank: nil, **kargs, &block)
|
236
|
+
def initialize(*args, type: nil, in_format: nil, if_blank: nil, **kargs, &block)
|
236
237
|
super
|
238
|
+
@type = type
|
237
239
|
@in_format = in_format
|
238
240
|
@if_blank = if_blank
|
239
241
|
end
|
240
242
|
|
243
|
+
def type
|
244
|
+
@type ||= @source_metadata.fetch(:type, :date)
|
245
|
+
end
|
246
|
+
|
241
247
|
def in_format
|
242
|
-
@in_format ||= @source_metadata.fetch(:in_format,
|
248
|
+
@in_format ||= @source_metadata.fetch(:in_format, default_date_format)
|
243
249
|
end
|
244
250
|
|
245
251
|
def if_blank
|
246
252
|
@if_blank ||= @source_metadata.fetch(:if_blank, nil)
|
247
253
|
end
|
248
254
|
|
255
|
+
def default_date_format
|
256
|
+
if type == :datetime
|
257
|
+
'%Y-%m-%d %H:%M:%S'
|
258
|
+
else
|
259
|
+
'%Y-%m-%d'
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
249
263
|
def transform(value)
|
250
264
|
begin
|
251
265
|
if value.respond_to?(:strftime)
|
@@ -260,15 +274,19 @@ module Remi
|
|
260
274
|
end
|
261
275
|
end
|
262
276
|
|
277
|
+
def class_type
|
278
|
+
@class_type ||= type == :datetime ? Time : Date
|
279
|
+
end
|
280
|
+
|
263
281
|
def string_to_date(value)
|
264
|
-
|
282
|
+
class_type.strptime(value, in_format)
|
265
283
|
end
|
266
284
|
|
267
285
|
def blank_handler(value)
|
268
286
|
if if_blank == :low
|
269
|
-
|
287
|
+
class_type.new(1900,01,01)
|
270
288
|
elsif if_blank == :high
|
271
|
-
|
289
|
+
class_type.new(2999,12,31)
|
272
290
|
elsif if_blank.respond_to? :call
|
273
291
|
if_blank.call(value)
|
274
292
|
else
|
@@ -282,6 +300,7 @@ module Remi
|
|
282
300
|
# This transform is metadata aware and will use :in_format/:out_format metadata
|
283
301
|
# from the source.
|
284
302
|
#
|
303
|
+
# type - Specify either :date, or :datetime type (default: date)
|
285
304
|
# in_format - The date format to used to parse the input value. If the input value
|
286
305
|
# is a date, then then parameter is ignored. (default: uses :in_format
|
287
306
|
# from the source metadata. If that is not defined, use '%Y-%m-%d')
|
@@ -297,18 +316,35 @@ module Remi
|
|
297
316
|
# tform.source_metadata = { in_format: '%m/%d/%Y', out_format: '%Y-%m-%d' }
|
298
317
|
# tform.to_proc.call('02/22/2013') # => "2013-02-22"
|
299
318
|
class FormatDate < Transform
|
300
|
-
def initialize(*args, in_format: nil, out_format: nil, **kargs, &block)
|
319
|
+
def initialize(*args, type: nil, in_format: nil, out_format: nil, **kargs, &block)
|
301
320
|
super
|
321
|
+
@type = type
|
302
322
|
@in_format = in_format
|
303
323
|
@out_format = out_format
|
304
324
|
end
|
305
325
|
|
326
|
+
def type
|
327
|
+
@type ||= @source_metadata.fetch(:type, :date)
|
328
|
+
end
|
329
|
+
|
306
330
|
def in_format
|
307
|
-
@in_format ||= @source_metadata.fetch(:in_format,
|
331
|
+
@in_format ||= @source_metadata.fetch(:in_format, default_date_format)
|
308
332
|
end
|
309
333
|
|
310
334
|
def out_format
|
311
|
-
@out_format ||= @source_metadata.fetch(:out_format,
|
335
|
+
@out_format ||= @source_metadata.fetch(:out_format, default_date_format)
|
336
|
+
end
|
337
|
+
|
338
|
+
def default_date_format
|
339
|
+
if type == :datetime
|
340
|
+
'%Y-%m-%d %H:%M:%S'
|
341
|
+
else
|
342
|
+
'%Y-%m-%d'
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
def class_type
|
347
|
+
@class_type ||= type == :datetime ? Time : Date
|
312
348
|
end
|
313
349
|
|
314
350
|
def transform(value)
|
@@ -318,7 +354,7 @@ module Remi
|
|
318
354
|
elsif value.respond_to? :strftime
|
319
355
|
value.strftime(out_format)
|
320
356
|
else
|
321
|
-
|
357
|
+
class_type.strptime(value, in_format).strftime(out_format)
|
322
358
|
end
|
323
359
|
rescue ArgumentError => err
|
324
360
|
raise err, "Error parsing date (#{value.class}): '#{value}' using the format #{in_format} => #{out_format}"
|
@@ -461,7 +497,7 @@ module Remi
|
|
461
497
|
def if_blank
|
462
498
|
return @if_blank if @if_blank_set
|
463
499
|
@if_blank_set = true
|
464
|
-
@if_blank = @source_metadata.fetch(:if_blank,
|
500
|
+
@if_blank = @source_metadata.fetch(:if_blank, default_if_blank)
|
465
501
|
end
|
466
502
|
|
467
503
|
def blank_handler(value)
|
@@ -474,8 +510,12 @@ module Remi
|
|
474
510
|
end
|
475
511
|
end
|
476
512
|
|
513
|
+
def default_if_blank
|
514
|
+
type == :string ? '' : nil
|
515
|
+
end
|
516
|
+
|
477
517
|
def transform(value)
|
478
|
-
if value.blank?
|
518
|
+
if value.blank? && type != :json
|
479
519
|
blank_handler(value)
|
480
520
|
else
|
481
521
|
case type
|
@@ -488,9 +528,15 @@ module Remi
|
|
488
528
|
when :decimal
|
489
529
|
Float("%.#{scale}f" % Float(value))
|
490
530
|
when :date
|
491
|
-
value.is_a?(Date) ? value : Date.strptime(value, in_format)
|
531
|
+
value.is_a?(Date) ? value : Date.strptime(value, in_format)
|
492
532
|
when :datetime
|
493
|
-
Time.strptime(value, in_format)
|
533
|
+
value.is_a?(Time) ? value : Time.strptime(value, in_format)
|
534
|
+
when :json
|
535
|
+
if value.blank? && value != [] && value != {}
|
536
|
+
blank_handler(value)
|
537
|
+
else
|
538
|
+
value.is_a?(Hash) || value.is_a?(Array) ? value : JSON.parse(value)
|
539
|
+
end
|
494
540
|
else
|
495
541
|
raise ArgumentError, "Unknown type enforcement: #{type}"
|
496
542
|
end
|
data/lib/remi/version.rb
CHANGED
@@ -0,0 +1,44 @@
|
|
1
|
+
require_relative 'remi_spec'
|
2
|
+
|
3
|
+
# VERY SPARSE TESTING! DO MORE!
|
4
|
+
|
5
|
+
describe DataSubject do
|
6
|
+
|
7
|
+
describe 'enforcing types' do
|
8
|
+
let(:dataframe) do
|
9
|
+
Remi::DataFrame::Daru.new({ my_date: ['10/21/2015'] })
|
10
|
+
end
|
11
|
+
|
12
|
+
let(:data_subject) do
|
13
|
+
DataSubject.new(fields: fields).tap { |ds| ds.df = dataframe }
|
14
|
+
end
|
15
|
+
|
16
|
+
let(:fields) do
|
17
|
+
Fields.new({
|
18
|
+
my_date: { type: :date, in_format: '%m/%d/%Y' },
|
19
|
+
other_date: { type: :date, in_format: '%m/%d/%Y' }
|
20
|
+
})
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'converts a date string to a date using an in_format' do
|
24
|
+
data_subject.enforce_types
|
25
|
+
expect(data_subject.df[:my_date].to_a).to eq [Date.new(2015, 10, 21)]
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'does not do any conversion if the type is not specified' do
|
29
|
+
fields[:my_date].delete(:type)
|
30
|
+
data_subject.enforce_types
|
31
|
+
expect(data_subject.df[:my_date].to_a).to eq ['10/21/2015']
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'throws an error if the data does not conform to its type' do
|
35
|
+
dataframe[:my_date].recode! { |v| '2015-10-21' }
|
36
|
+
expect { data_subject.enforce_types }.to raise_error ArgumentError
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'does not create new vectors during enforcement' do
|
40
|
+
data_subject.enforce_types
|
41
|
+
expect(dataframe.vectors.to_a).to eq [:my_date]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
require_relative '../remi_spec'
|
2
|
+
require 'remi/testing/data_stub'
|
3
|
+
|
4
|
+
describe Testing::DataStub do
|
5
|
+
class StubTester < DataSubject
|
6
|
+
include Testing::DataStub
|
7
|
+
end
|
8
|
+
|
9
|
+
context 'data type stubs' do
|
10
|
+
let(:stub_tester) { StubTester.new }
|
11
|
+
|
12
|
+
context '#stub_string' do
|
13
|
+
let(:stub) { stub_tester.stub_string }
|
14
|
+
|
15
|
+
it 'stubs as strings' do
|
16
|
+
expect(stub).to be_a String
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
context '#stub_float' do
|
21
|
+
let(:stub) { stub_tester.stub_float }
|
22
|
+
|
23
|
+
it 'stubs as strings' do
|
24
|
+
expect(stub).to be_a String
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'represents a floating point number' do
|
28
|
+
expect(Float(stub) % 1).not_to eq 0.0
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
context '#stub_decimal' do
|
33
|
+
let(:stub) { stub_tester.stub_decimal(precision: 8, scale: 2) }
|
34
|
+
|
35
|
+
it 'stubs as strings' do
|
36
|
+
expect(stub).to be_a String
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'represents a floating point number' do
|
40
|
+
expect(Float(stub) % 1).not_to eq 0.0
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'comes with the specified precision' do
|
44
|
+
expect(Float(stub).to_s.split('.').first.size).to eq 8
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'comes with the specified scale' do
|
48
|
+
expect(Float(stub).to_s.split('.').last.size).to eq 2
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
context '#stub_integer' do
|
53
|
+
let(:stub) { stub_tester.stub_integer }
|
54
|
+
|
55
|
+
it 'stubs as strings' do
|
56
|
+
expect(stub).to be_a String
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'represents an integer' do
|
60
|
+
expect(Float(stub) % 1).to eq 0.0
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'converts to an integer' do
|
64
|
+
expect { Integer(stub) }.not_to raise_error
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
context '#stub_date' do
|
70
|
+
context 'without an in_format' do
|
71
|
+
let(:stub) { stub_tester.stub_date }
|
72
|
+
|
73
|
+
it 'stubs as a date' do
|
74
|
+
expect(stub).to be_a Date
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
context 'with an in_format' do
|
79
|
+
let(:stub) { stub_tester.stub_date(in_format: '%m/%d/%Y') }
|
80
|
+
|
81
|
+
it 'stubs as strings' do
|
82
|
+
expect(stub).to be_a String
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'can parsed as a date using the specified in_format' do
|
86
|
+
expect { Date.strptime(stub, '%m/%d/%Y') }.not_to raise_error
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
context '#stub_datetime' do
|
92
|
+
context 'without an in_format' do
|
93
|
+
let(:stub) { stub_tester.stub_datetime }
|
94
|
+
|
95
|
+
it 'stubs as a time' do
|
96
|
+
expect(stub).to be_a Time
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
context 'with an in_format' do
|
101
|
+
let(:stub) { stub_tester.stub_datetime(in_format: '%m/%d/%Y %H:%M:%S') }
|
102
|
+
|
103
|
+
it 'stubs as strings' do
|
104
|
+
expect(stub).to be_a String
|
105
|
+
end
|
106
|
+
|
107
|
+
it 'can parsed as a time using the specified in_format' do
|
108
|
+
expect { Time.strptime(stub, '%m/%d/%Y %H:%M:%S') }.not_to raise_error
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
context '#stub_boolean' do
|
114
|
+
let(:stub) { stub_tester.stub_boolean }
|
115
|
+
|
116
|
+
it 'stubs as strings' do
|
117
|
+
expect(stub).to be_a String
|
118
|
+
end
|
119
|
+
|
120
|
+
it 'is either T or F' do
|
121
|
+
expect(stub).to eq('T').or eq('F')
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
context '#stub_json' do
|
126
|
+
let(:stub) { stub_tester.stub_json }
|
127
|
+
|
128
|
+
it 'stubs as strings' do
|
129
|
+
expect(stub).to be_a String
|
130
|
+
end
|
131
|
+
|
132
|
+
it 'can be parsed as JSON' do
|
133
|
+
expect { JSON.parse(stub) }.not_to raise_error
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
context 'stubbed dataframe data' do
|
140
|
+
let(:stub_tester) do
|
141
|
+
StubTester.new(fields: {
|
142
|
+
my_date: { type: :date, in_format: '%m/%d/%Y' },
|
143
|
+
my_str: {}
|
144
|
+
})
|
145
|
+
end
|
146
|
+
|
147
|
+
context '#empty_stub_df' do
|
148
|
+
before { stub_tester.empty_stub_df }
|
149
|
+
|
150
|
+
it 'creates a dataframe with no data' do
|
151
|
+
expect(stub_tester.df.size).to eq 0
|
152
|
+
end
|
153
|
+
|
154
|
+
it 'creates a dataframe with the right number of vectors' do
|
155
|
+
expect(stub_tester.df.vectors.size).to eq 2
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
context '#stub_df' do
|
160
|
+
before { stub_tester.stub_df }
|
161
|
+
|
162
|
+
it 'creates a row of data' do
|
163
|
+
expect(stub_tester.df.size).to eq 1
|
164
|
+
end
|
165
|
+
|
166
|
+
it 'creates data according to the supplied metadata' do
|
167
|
+
expect { Date.strptime(stub_tester.df[:my_date].first, '%m/%d/%Y') }.not_to raise_error
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
data/spec/transform_spec.rb
CHANGED
@@ -50,4 +50,79 @@ describe Transform do
|
|
50
50
|
expect(transform.call(1, 2)).to eq [1, 2]
|
51
51
|
end
|
52
52
|
end
|
53
|
+
|
54
|
+
describe Transform::ParseDate do
|
55
|
+
it 'parses a date using the specified in_format' do
|
56
|
+
parser = Transform::ParseDate.new(in_format: '%m/%d/%Y')
|
57
|
+
expect(parser.call('03/22/2011')).to eq Date.new(2011,3,22)
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'returns a date if it has already been parsed' do
|
61
|
+
parser = Transform::ParseDate.new(in_format: '%m/%d/%Y')
|
62
|
+
expect(Date.new(2011,3,22)).to eq Date.new(2011,3,22)
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'uses ISO 8601 as the default date parser' do
|
66
|
+
parser = Transform::ParseDate.new
|
67
|
+
expect(parser.call('2011-03-22')).to eq Date.new(2011,3,22)
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'fails when an unparseable date is provided' do
|
71
|
+
parser = Transform::ParseDate.new
|
72
|
+
expect { parser.call('03/22/2011') }.to raise_error ArgumentError
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'parses datetimes when the type is specified' do
|
76
|
+
parser = Transform::ParseDate.new(type: :datetime, in_format: '%m/%d/%Y %H:%M:%S')
|
77
|
+
expect(parser.call('03/22/2011 04:22:00')).to eq Time.new(2011,3,22,4,22,0)
|
78
|
+
end
|
79
|
+
|
80
|
+
it 'uses ISO 8601 as the default datetime parser' do
|
81
|
+
parser = Transform::ParseDate.new(type: :datetime)
|
82
|
+
expect(parser.call('2011-03-22 04:22:00')).to eq Time.new(2011,3,22,4,22,0)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
context Transform::FormatDate do
|
87
|
+
it 'formats a date using the specified out_format' do
|
88
|
+
formatter = Transform::FormatDate.new(out_format: '%m/%d/%Y')
|
89
|
+
expect(formatter.call(Date.new(2011,3,22))).to eq '03/22/2011'
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'formats a datetime using the specified out_format' do
|
93
|
+
formatter = Transform::FormatDate.new(type: :datetime, out_format: '%m/%d/%Y %H:%M:%S')
|
94
|
+
expect(formatter.call(Time.new(2011,3,22,4,22,0))).to eq '03/22/2011 04:22:00'
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'uses the in_format to parse strings when the source is not already a date' do
|
98
|
+
formatter = Transform::FormatDate.new(in_format: '%d/%m/%Y', out_format: '%m/%d/%Y')
|
99
|
+
expect(formatter.call('22/03/2011')).to eq '03/22/2011'
|
100
|
+
end
|
101
|
+
|
102
|
+
it 'fails when an unparseable date is provided' do
|
103
|
+
formatter = Transform::FormatDate.new(in_format: '%d/%m/%Y', out_format: '%m/%d/%Y')
|
104
|
+
expect { formatter.call('22/22/2011') }.to raise_error ArgumentError
|
105
|
+
end
|
106
|
+
|
107
|
+
it 'uses ISO 8601 as the default date parser' do
|
108
|
+
formatter = Transform::FormatDate.new(out_format: '%m/%d/%Y')
|
109
|
+
expect(formatter.call('2011-03-22')).to eq '03/22/2011'
|
110
|
+
end
|
111
|
+
|
112
|
+
it 'uses ISO 8601 as the default date formatter' do
|
113
|
+
formatter = Transform::FormatDate.new(in_format: '%m/%d/%Y')
|
114
|
+
expect(formatter.call('03/22/2011')).to eq '2011-03-22'
|
115
|
+
end
|
116
|
+
|
117
|
+
it 'uses ISO 8601 as the default datetime parser' do
|
118
|
+
formatter = Transform::FormatDate.new(type: :datetime, out_format: '%m/%d/%Y %H:%M:%S')
|
119
|
+
expect(formatter.call('2011-03-22 04:22:00')).to eq '03/22/2011 04:22:00'
|
120
|
+
end
|
121
|
+
|
122
|
+
it 'uses ISO 8601 as the default datetime formatter' do
|
123
|
+
formatter = Transform::FormatDate.new(type: :datetime, in_format: '%m/%d/%Y %H:%M:%S')
|
124
|
+
expect(formatter.call('03/22/2011 04:22:00')).to eq '2011-03-22 04:22:00'
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
53
128
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.40
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sterling Paramore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-07-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bond
|
@@ -225,8 +225,6 @@ files:
|
|
225
225
|
- lib/remi.rb
|
226
226
|
- lib/remi/cli.rb
|
227
227
|
- lib/remi/cucumber.rb
|
228
|
-
- lib/remi/cucumber/business_rules.rb
|
229
|
-
- lib/remi/cucumber/data_source.rb
|
230
228
|
- lib/remi/data_frame.rb
|
231
229
|
- lib/remi/data_frame/daru.rb
|
232
230
|
- lib/remi/data_subject.rb
|
@@ -248,11 +246,14 @@ files:
|
|
248
246
|
- lib/remi/source_to_target_map.rb
|
249
247
|
- lib/remi/source_to_target_map/map.rb
|
250
248
|
- lib/remi/source_to_target_map/row.rb
|
249
|
+
- lib/remi/testing/business_rules.rb
|
250
|
+
- lib/remi/testing/data_stub.rb
|
251
251
|
- lib/remi/transform.rb
|
252
252
|
- lib/remi/version.rb
|
253
253
|
- remi.gemspec
|
254
254
|
- spec/data_subject/csv_file_spec.rb
|
255
255
|
- spec/data_subject/data_frame.rb
|
256
|
+
- spec/data_subject_spec.rb
|
256
257
|
- spec/extractor/file_system_spec.rb
|
257
258
|
- spec/extractor/local_file_spec.rb
|
258
259
|
- spec/extractor/s3_file_spec.rb
|
@@ -264,6 +265,7 @@ files:
|
|
264
265
|
- spec/metadata_spec.rb
|
265
266
|
- spec/remi_spec.rb
|
266
267
|
- spec/source_to_target_map_spec.rb
|
268
|
+
- spec/testing/data_stub_spec.rb
|
267
269
|
- spec/transform_spec.rb
|
268
270
|
- workbooks/sample_workbook.ipynb
|
269
271
|
- workbooks/workbook_helper.rb
|
@@ -315,6 +317,7 @@ test_files:
|
|
315
317
|
- features/transforms/truthy.feature
|
316
318
|
- spec/data_subject/csv_file_spec.rb
|
317
319
|
- spec/data_subject/data_frame.rb
|
320
|
+
- spec/data_subject_spec.rb
|
318
321
|
- spec/extractor/file_system_spec.rb
|
319
322
|
- spec/extractor/local_file_spec.rb
|
320
323
|
- spec/extractor/s3_file_spec.rb
|
@@ -326,4 +329,5 @@ test_files:
|
|
326
329
|
- spec/metadata_spec.rb
|
327
330
|
- spec/remi_spec.rb
|
328
331
|
- spec/source_to_target_map_spec.rb
|
332
|
+
- spec/testing/data_stub_spec.rb
|
329
333
|
- spec/transform_spec.rb
|
@@ -1,70 +0,0 @@
|
|
1
|
-
module Remi
|
2
|
-
module DataSource
|
3
|
-
module DataStub
|
4
|
-
def stub_row_array
|
5
|
-
@fields.values.map do |attrib|
|
6
|
-
stub_values[attrib[:type]].call
|
7
|
-
end
|
8
|
-
end
|
9
|
-
|
10
|
-
def empty_stub_df
|
11
|
-
self.df = Daru::DataFrame.new([], order: @fields.keys)
|
12
|
-
end
|
13
|
-
|
14
|
-
def stub_df
|
15
|
-
empty_stub_df
|
16
|
-
self.df.add_row(stub_row_array)
|
17
|
-
end
|
18
|
-
|
19
|
-
def stub_values
|
20
|
-
@stub_values ||= Hash.new(->() { Faker::Hipster.word }).merge({
|
21
|
-
string: ->() { Faker::Hipster.word },
|
22
|
-
number: ->() { Faker::Number.decimal(4,4) },
|
23
|
-
float: ->() { Faker::Number.decimal(2,2) },
|
24
|
-
integer: ->() { Faker::Number.number(4) },
|
25
|
-
date: ->() { Faker::Date.backward(3650) },
|
26
|
-
datetime: ->() { Faker::Time.backward(3650).to_datetime },
|
27
|
-
boolean: ->() { ['T','F'].shuffle.first }
|
28
|
-
})
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
|
33
|
-
class CsvFile
|
34
|
-
include DataStub
|
35
|
-
def stub_tmp_file
|
36
|
-
@stub_tmp_file ||= Tempfile.new('stub_tmp_file.csv').path
|
37
|
-
end
|
38
|
-
|
39
|
-
def write_stub_tmp_file
|
40
|
-
File.open(stub_tmp_file, "wb") do |file|
|
41
|
-
file.puts stub_header
|
42
|
-
file.puts stub_row_csv
|
43
|
-
end
|
44
|
-
|
45
|
-
stub_tmp_file
|
46
|
-
end
|
47
|
-
|
48
|
-
def stub_header
|
49
|
-
@fields.keys.join(@csv_options[:col_sep])
|
50
|
-
end
|
51
|
-
|
52
|
-
def stub_row_csv
|
53
|
-
stub_row_array.join(@csv_options[:col_sep])
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
# Hmmm.... this gets called first because I'm trying to split SF off as a "plugin"
|
58
|
-
class Salesforce < Remi::DataSubject
|
59
|
-
include DataStub
|
60
|
-
end
|
61
|
-
|
62
|
-
class DataFrame
|
63
|
-
include DataStub
|
64
|
-
end
|
65
|
-
|
66
|
-
class Postgres
|
67
|
-
include DataStub
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|