remi 0.2.39 → 0.2.40
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/features/metadata.feature +30 -4
- data/features/step_definitions/remi_step.rb +9 -9
- data/features/transforms/parse_date.feature +1 -0
- data/jobs/metadata_job.rb +15 -22
- data/jobs/transforms/parse_date_job.rb +1 -1
- data/lib/remi/cucumber.rb +6 -2
- data/lib/remi/data_subject.rb +70 -47
- data/lib/remi/data_subject/csv_file.rb +2 -4
- data/lib/remi/data_subject/data_frame.rb +2 -4
- data/lib/remi/data_subject/postgres.rb +2 -4
- data/lib/remi/data_subject/salesforce.rb +2 -4
- data/lib/remi/data_subject/sftp_file.rb +1 -2
- data/lib/remi/{cucumber → testing}/business_rules.rb +1 -1
- data/lib/remi/testing/data_stub.rb +72 -0
- data/lib/remi/transform.rb +59 -13
- data/lib/remi/version.rb +1 -1
- data/spec/data_subject_spec.rb +44 -0
- data/spec/source_to_target_map_spec.rb +1 -1
- data/spec/testing/data_stub_spec.rb +171 -0
- data/spec/transform_spec.rb +75 -0
- metadata +8 -4
- data/lib/remi/cucumber/data_source.rb +0 -70
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d9f919d918cc6c2b83a8f6849a30a111ddf07e87
|
4
|
+
data.tar.gz: 7eeaee62f683ee9fd0851e8c61cd47fab8a7f1bd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eecc73e562cf266445cf85f7c144424a730f71d95ea588b9ad7c75fc2aba0a09c0aba7440c78639c7433340e3e89888c329aec17660471038996763593e50e45
|
7
|
+
data.tar.gz: 96852fba98c17a79ef1763ece9f5b5cb9893cca9acf5c21da4ac09e352a1fcc2cb8e8f760836f3111493ef3f4b6258f01688b10b8c14952a7978e51623f70653
|
data/Gemfile.lock
CHANGED
data/features/metadata.feature
CHANGED
@@ -9,9 +9,35 @@ Feature: This tests the application of metadata.
|
|
9
9
|
And the source 'Source Data'
|
10
10
|
And the target 'Target Data'
|
11
11
|
|
12
|
-
And the following example record for 'Source Data':
|
13
|
-
| activity_id | student_id | student_dob | activity_type | activity_counter | activity_score | activity_cost | activity_date | source_filename |
|
14
|
-
| 1 | 1 | 3/3/1998 | A | 1 | 3.8 | 12.23 | 1/3/2016 03:22:36 | one.csv |
|
15
12
|
|
16
13
|
Scenario: Metadata is used to parse date fields
|
17
|
-
|
14
|
+
|
15
|
+
Given the following example record for 'Source Data':
|
16
|
+
| activity_id | student_id | student_dob | activity_type | activity_counter | activity_score | activity_cost | activity_date | source_filename |
|
17
|
+
| 1 | 1 | 3/3/1998 | A | 1 | 3.8 | 12.23 | 1/3/2016 03:22:36 | one.csv |
|
18
|
+
|
19
|
+
Then the target should match the example:
|
20
|
+
| activity_id | student_id | student_dob | activity_type | activity_counter | activity_score | activity_cost | activity_date | source_filename |
|
21
|
+
| 1 | 1 | 1998-03-03 | A | 1 | 3.8 | 12.23 | 2016-01-03 03:22:36 +0000 | one.csv |
|
22
|
+
|
23
|
+
Scenario Outline: Metadata is used to stub records with values that conform to the metadata
|
24
|
+
|
25
|
+
Then the target field '<Field>' is set to the value "<Class>"
|
26
|
+
|
27
|
+
Examples:
|
28
|
+
| Field | Class |
|
29
|
+
| activity_id_class | String |
|
30
|
+
| student_id_class | String |
|
31
|
+
| student_dob_class | Date |
|
32
|
+
| activity_type_class | String |
|
33
|
+
| activity_counter_class | Fixnum |
|
34
|
+
| activity_score_class | Float |
|
35
|
+
| activity_cost_class | Float |
|
36
|
+
| activity_date_class | Time |
|
37
|
+
| source_filename_class | String |
|
38
|
+
|
39
|
+
|
40
|
+
Scenario: Metadata for decimals is stubbed
|
41
|
+
|
42
|
+
Then the target field 'activity_cost_precision' is populated with "8"
|
43
|
+
And the target field 'activity_cost_scale' is populated with "2"
|
@@ -4,7 +4,7 @@
|
|
4
4
|
### Job and background setup
|
5
5
|
|
6
6
|
Given /^the job is '([[:alnum:]\s]+)'$/ do |arg|
|
7
|
-
@brt = Remi::BusinessRules::Tester.new(arg)
|
7
|
+
@brt = Remi::Testing::BusinessRules::Tester.new(arg)
|
8
8
|
end
|
9
9
|
|
10
10
|
Given /^the job source '([[:alnum:]\s\-_]+)'$/ do |arg|
|
@@ -64,13 +64,13 @@ Then /^the file that comes last in an alphanumeric sort by group will be downloa
|
|
64
64
|
end
|
65
65
|
|
66
66
|
Then /^the file is uploaded to the remote path "([^"]+)"$/ do |remote_path|
|
67
|
-
expect(@brt.target.get_attrib(:remote_path)).to eq Remi::BusinessRules::ParseFormula.parse(remote_path)
|
67
|
+
expect(@brt.target.get_attrib(:remote_path)).to eq Remi::Testing::BusinessRules::ParseFormula.parse(remote_path)
|
68
68
|
end
|
69
69
|
|
70
70
|
## CSV Options
|
71
71
|
|
72
72
|
Given /^the (source|target) file is delimited with a (\w+)$/ do |st, delimiter|
|
73
|
-
expect(@brt.send(st.to_sym).csv_options[:col_sep]).to eq Remi::BusinessRules.csv_opt_map[delimiter]
|
73
|
+
expect(@brt.send(st.to_sym).csv_options[:col_sep]).to eq Remi::Testing::BusinessRules.csv_opt_map[delimiter]
|
74
74
|
end
|
75
75
|
|
76
76
|
Given /^the (source|target) file is encoded using "([^"]+)" format$/ do |st, encoding|
|
@@ -78,15 +78,15 @@ Given /^the (source|target) file is encoded using "([^"]+)" format$/ do |st, enc
|
|
78
78
|
end
|
79
79
|
|
80
80
|
Given /^the (source|target) file uses a ([\w ]+) to quote embedded delimiters$/ do |st, quote_char|
|
81
|
-
expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::BusinessRules.csv_opt_map[quote_char]
|
81
|
+
expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::Testing::BusinessRules.csv_opt_map[quote_char]
|
82
82
|
end
|
83
83
|
|
84
84
|
Given /^the (source|target) file uses a preceding ([\w ]+) to escape an embedded quoting character$/ do |st, escape_char|
|
85
|
-
expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::BusinessRules.csv_opt_map[escape_char]
|
85
|
+
expect(@brt.send(st.to_sym).csv_options[:quote_char]).to eq Remi::Testing::BusinessRules.csv_opt_map[escape_char]
|
86
86
|
end
|
87
87
|
|
88
88
|
Given /^the (source|target) file uses ([\w ]+) line endings$/ do |st, line_endings|
|
89
|
-
expect(@brt.send(st.to_sym).csv_options[:row_sep]).to eq Remi::BusinessRules.csv_opt_map[line_endings]
|
89
|
+
expect(@brt.send(st.to_sym).csv_options[:row_sep]).to eq Remi::Testing::BusinessRules.csv_opt_map[line_endings]
|
90
90
|
end
|
91
91
|
|
92
92
|
Given /^the (source|target) file uses "([^"]+)" as a record separator$/ do |st, line_endings|
|
@@ -130,7 +130,7 @@ Given /^the source field '([^']+)' (?:has|is set to) the value "([^"]*)"$/ do |s
|
|
130
130
|
step "the source field '#{source_field}'"
|
131
131
|
|
132
132
|
source_name, source_field_name = @brt.sources.parse_full_field(source_field)
|
133
|
-
@brt.sources[source_name].fields[source_field_name].value = Remi::BusinessRules::ParseFormula.parse(value)
|
133
|
+
@brt.sources[source_name].fields[source_field_name].value = Remi::Testing::BusinessRules::ParseFormula.parse(value)
|
134
134
|
end
|
135
135
|
|
136
136
|
Given /^the source field (?:has|is set to) the value "([^"]*)"$/ do |value|
|
@@ -143,7 +143,7 @@ Given /^the source field '([^']+)' (?:has|is set to) the multiline value$/ do |s
|
|
143
143
|
step "the source field '#{source_field}'"
|
144
144
|
|
145
145
|
source_name, source_field_name = @brt.sources.parse_full_field(source_field)
|
146
|
-
@brt.sources[source_name].fields[source_field_name].value = Remi::BusinessRules::ParseFormula.parse(value)
|
146
|
+
@brt.sources[source_name].fields[source_field_name].value = Remi::Testing::BusinessRules::ParseFormula.parse(value)
|
147
147
|
end
|
148
148
|
|
149
149
|
Given /^the source field (?:has|is set to) the multiline value$/ do |value|
|
@@ -266,7 +266,7 @@ Then /^the target field '([^']+)' is (?:set to the value|populated with) "([^"]*
|
|
266
266
|
@brt.run_transforms
|
267
267
|
}.not_to raise_error
|
268
268
|
Array(target_names).each do |target_name|
|
269
|
-
expect(@brt.targets[target_name].fields[target_field_name].values.uniq).to eq [Remi::BusinessRules::ParseFormula.parse(value)]
|
269
|
+
expect(@brt.targets[target_name].fields[target_field_name].values.uniq).to eq [Remi::Testing::BusinessRules::ParseFormula.parse(value)]
|
270
270
|
end
|
271
271
|
}
|
272
272
|
end
|
@@ -12,6 +12,7 @@ Feature: Tests the parse_date transform
|
|
12
12
|
Given the source field 'Date String' has the value "<Date String>"
|
13
13
|
And the job parameter 'format' is "<Format>"
|
14
14
|
Then the target field 'Parsed Date' is set to the value "<Parsed Date>"
|
15
|
+
|
15
16
|
Examples:
|
16
17
|
| Date String | Format | Parsed Date |
|
17
18
|
| 2015-10-21 | %Y-%m-%d | 2015-10-21 |
|
data/jobs/metadata_job.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative 'all_jobs_shared'
|
2
|
+
ENV['TZ'] = 'UTC'
|
2
3
|
|
3
4
|
class MetadataJob
|
4
5
|
include AllJobsShared
|
@@ -11,7 +12,7 @@ class MetadataJob
|
|
11
12
|
:activity_type => { from: 'in', in: true, type: :string, valid_values: ['A', 'B', 'C'], cdc_type: 2 },
|
12
13
|
:activity_counter => { from: 'in', in: true, type: :integer, cdc_type: 2 },
|
13
14
|
:activity_score => { from: 'in', in: true, type: :float, cdc_type: 2 },
|
14
|
-
:activity_cost => { from: 'in', in: true, type: :decimal, precision:
|
15
|
+
:activity_cost => { from: 'in', in: true, type: :decimal, precision: 8, scale: 2, cdc_type: 2 },
|
15
16
|
:activity_date => { from: 'in', in: true, type: :datetime, in_format: '%m/%d/%Y %H:%M:%S', out_format: '%Y-%m-%dT%H:%M:%S', cdc_type: 2 },
|
16
17
|
:source_filename => { from: 'in', in: true, type: :string, cdc_type: 1 }
|
17
18
|
}
|
@@ -25,36 +26,28 @@ class MetadataJob
|
|
25
26
|
:activity_type => { from: 'out', out: true, type: :string, valid_values: ['A', 'B', 'C'] },
|
26
27
|
:activity_counter => { from: 'out', out: true, type: :integer },
|
27
28
|
:activity_score => { from: 'out', out: true, type: :float },
|
28
|
-
:activity_cost => { from: 'out', out: true, type: :decimal, precision:
|
29
|
+
:activity_cost => { from: 'out', out: true, type: :decimal, precision: 8, scale: 2 },
|
29
30
|
:activity_date => { from: 'out', out: true, type: :datetime, in_format: '%m/%d/%Y %H:%M:%S', out_format: '%Y-%m-%dT%H:%M:%S' },
|
30
31
|
:source_filename => { from: 'out', out: true, type: :string, cdc_type: 1 }
|
31
32
|
}
|
32
33
|
|
33
34
|
define_transform :main do
|
35
|
+
source_data.enforce_types
|
34
36
|
|
35
|
-
|
36
|
-
source_data.df = Remi::DataFrame.daru([
|
37
|
-
['1','1','3/3/1998','A','1','3.8','12.23','1/3/2016 03:22:36','one.csv'],
|
38
|
-
['2','1','3/3/1998','B','3','4.2','10.53','1/3/2016 03:58:22','one.csv'],
|
39
|
-
['2','1','','B','2','4.23','10.539','1/3/2016 03:58:22','one.csv']
|
40
|
-
].transpose, order: [
|
41
|
-
:activity_id,
|
42
|
-
:student_id,
|
43
|
-
:student_dob,
|
44
|
-
:activity_type,
|
45
|
-
:activity_counter,
|
46
|
-
:activity_score,
|
47
|
-
:activity_cost,
|
48
|
-
:activity_date,
|
49
|
-
:source_filename
|
50
|
-
])
|
51
|
-
=end
|
52
|
-
|
53
|
-
Remi::SourceToTargetMap.apply(source_data.df, target_data.df, source_metadata: source_data.fields) do
|
37
|
+
Remi::SourceToTargetMap.apply(source_data.df, target_data.df, source_metadata: source_data.fields, target_metadata: target_data.fields) do
|
54
38
|
target_data.fields.keys.each do |field|
|
55
39
|
map source(field) .target(field)
|
56
|
-
|
40
|
+
|
41
|
+
map source(field) .target("#{field}_class".to_sym)
|
42
|
+
.transform(->(v) { v.class })
|
57
43
|
end
|
44
|
+
|
45
|
+
map source(:activity_cost) .target(:activity_cost_precision, :activity_cost_scale)
|
46
|
+
.transform(->(row) {
|
47
|
+
components = row[:activity_cost].to_s.split('.')
|
48
|
+
row[:activity_cost_precision] = components.first.size
|
49
|
+
row[:activity_cost_scale] = components.last.size
|
50
|
+
})
|
58
51
|
end
|
59
52
|
end
|
60
53
|
end
|
@@ -22,7 +22,7 @@ class ParseDateJob
|
|
22
22
|
.transform(Remi::Transform::ParseDate.new(in_format: params[:format], if_blank: params[:if_blank]))
|
23
23
|
|
24
24
|
map source(:stubbed_date) .target(:parsed_stubbed_date)
|
25
|
-
.transform(Remi::Transform::ParseDate.new(in_format:
|
25
|
+
.transform(Remi::Transform::ParseDate.new(in_format: source_data.fields[:stubbed_date][:in_format], if_blank: params[:if_blank]))
|
26
26
|
end
|
27
27
|
end
|
28
28
|
end
|
data/lib/remi/cucumber.rb
CHANGED
@@ -3,5 +3,9 @@ require 'cucumber/rspec/doubles'
|
|
3
3
|
|
4
4
|
require 'regexp-examples'
|
5
5
|
|
6
|
-
require_relative '
|
7
|
-
require_relative '
|
6
|
+
require_relative 'testing/data_stub'
|
7
|
+
require_relative 'testing/business_rules'
|
8
|
+
|
9
|
+
class Remi::DataSource
|
10
|
+
include Remi::Testing::DataStub
|
11
|
+
end
|
data/lib/remi/data_subject.rb
CHANGED
@@ -1,9 +1,4 @@
|
|
1
1
|
module Remi
|
2
|
-
|
3
|
-
# Namespaces for specific sources/targets
|
4
|
-
module DataSource; end
|
5
|
-
module DataTarget; end
|
6
|
-
|
7
2
|
class DataSubject
|
8
3
|
def initialize(*args, fields: Remi::Fields.new, remi_df_type: :daru, logger: Remi::Settings.logger, **kargs, &block)
|
9
4
|
@fields = fields
|
@@ -11,16 +6,24 @@ module Remi
|
|
11
6
|
@logger = logger
|
12
7
|
end
|
13
8
|
|
9
|
+
# Public: Fields defined for this data subject
|
14
10
|
attr_accessor :fields
|
15
11
|
|
12
|
+
# Public: The default method for symbolizing field names
|
16
13
|
def field_symbolizer
|
17
14
|
Remi::FieldSymbolizers[:standard]
|
18
15
|
end
|
19
16
|
|
17
|
+
# Public: Access the dataframe from a DataSource
|
18
|
+
#
|
19
|
+
# Returns a Remi::DataFrame
|
20
20
|
def df
|
21
21
|
@dataframe ||= Remi::DataFrame.create(@remi_df_type, [], order: @fields.keys)
|
22
22
|
end
|
23
23
|
|
24
|
+
# Public: Reassigns the dataframe associated with this subject
|
25
|
+
#
|
26
|
+
# Returns the assigned dataframe
|
24
27
|
def df=(new_dataframe)
|
25
28
|
if new_dataframe.respond_to? :remi_df_type
|
26
29
|
@dataframe = new_dataframe
|
@@ -29,58 +32,78 @@ module Remi
|
|
29
32
|
end
|
30
33
|
end
|
31
34
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
35
|
+
# Public: Enforces types defined in the field metadata.
|
36
|
+
# For example, if a field has metadata with type: :date, then the
|
37
|
+
# type enforcer will convert data in that field into a date, and will
|
38
|
+
# throw an error if it is unable to parse any of the values.
|
39
|
+
#
|
40
|
+
# types - If set, restricts the data types that are enforced to just those listed.
|
41
|
+
#
|
42
|
+
# Returns nothing.
|
43
|
+
def enforce_types(*types)
|
44
|
+
sttm = SourceToTargetMap.new(df, source_metadata: fields)
|
45
|
+
fields.keys.each do |field|
|
46
|
+
next unless (types.size == 0 || types.include?(fields[field][:type])) && df.vectors.include?(field)
|
47
|
+
sttm.source(field).target(field).transform(Remi::Transform::EnforceType.new).execute
|
39
48
|
end
|
40
49
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
end
|
50
|
+
nil
|
51
|
+
end
|
52
|
+
end
|
45
53
|
|
46
|
-
# Public: Called to extract data from the source.
|
47
|
-
#
|
48
|
-
# Returns data in a format that can be used to create a dataframe.
|
49
|
-
def extract!
|
50
|
-
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
51
|
-
@extract
|
52
|
-
end
|
53
54
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
55
|
+
class DataSource < DataSubject
|
56
|
+
|
57
|
+
# Public: Access the dataframe from a DataSource
|
58
|
+
#
|
59
|
+
# Returns a Remi::DataFrame
|
60
|
+
def df
|
61
|
+
@dataframe ||= to_dataframe
|
60
62
|
end
|
61
63
|
|
62
|
-
|
64
|
+
# Public: Memoized version of extract!
|
65
|
+
def extract
|
66
|
+
@extract ||= extract!
|
67
|
+
end
|
63
68
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
69
|
+
# Public: Called to extract data from the source.
|
70
|
+
#
|
71
|
+
# Returns data in a format that can be used to create a dataframe.
|
72
|
+
def extract!
|
73
|
+
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
74
|
+
@extract
|
75
|
+
end
|
71
76
|
|
72
|
-
|
73
|
-
|
77
|
+
# Public: Converts extracted data to a dataframe
|
78
|
+
#
|
79
|
+
# Returns a Remi::DataFrame
|
80
|
+
def to_dataframe
|
81
|
+
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
82
|
+
end
|
83
|
+
end
|
74
84
|
|
75
|
-
# Public: Performs the load operation, regardless of whether it has
|
76
|
-
# already executed.
|
77
|
-
#
|
78
|
-
# Returns true if the load operation was successful
|
79
|
-
def load!
|
80
|
-
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
81
85
|
|
82
|
-
|
83
|
-
|
86
|
+
class DataTarget < DataSubject
|
87
|
+
|
88
|
+
# Public: Loads data to the target. This is automatically called
|
89
|
+
# after all transforms have executed, but could also get called manually.
|
90
|
+
# The actual load operation is only executed if hasn't already.
|
91
|
+
#
|
92
|
+
# Returns true if the load operation was successful.
|
93
|
+
def load
|
94
|
+
return true if @loaded || df.size == 0
|
95
|
+
|
96
|
+
@loaded = load!
|
97
|
+
end
|
98
|
+
|
99
|
+
# Public: Performs the load operation, regardless of whether it has
|
100
|
+
# already executed.
|
101
|
+
#
|
102
|
+
# Returns true if the load operation was successful
|
103
|
+
def load!
|
104
|
+
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
105
|
+
|
106
|
+
false
|
84
107
|
end
|
85
108
|
end
|
86
109
|
end
|
@@ -26,8 +26,7 @@ module Remi
|
|
26
26
|
|
27
27
|
|
28
28
|
|
29
|
-
class DataSource::CsvFile <
|
30
|
-
include Remi::DataSubject::DataSource
|
29
|
+
class DataSource::CsvFile < DataSource
|
31
30
|
include Remi::DataSubject::CsvFile
|
32
31
|
|
33
32
|
def initialize(*args, **kargs, &block)
|
@@ -130,8 +129,7 @@ module Remi
|
|
130
129
|
|
131
130
|
|
132
131
|
|
133
|
-
class DataTarget::CsvFile <
|
134
|
-
include ::Remi::DataSubject::DataTarget
|
132
|
+
class DataTarget::CsvFile < DataTarget
|
135
133
|
include ::Remi::DataSubject::CsvFile
|
136
134
|
|
137
135
|
default_csv_options[:row_sep] = "\n"
|
@@ -1,7 +1,6 @@
|
|
1
1
|
module Remi
|
2
2
|
|
3
|
-
class DataSource::DataFrame <
|
4
|
-
include Remi::DataSubject::DataSource
|
3
|
+
class DataSource::DataFrame < DataSource
|
5
4
|
|
6
5
|
def initialize(*args, **kargs, &block)
|
7
6
|
super
|
@@ -30,8 +29,7 @@ module Remi
|
|
30
29
|
end
|
31
30
|
|
32
31
|
|
33
|
-
class DataTarget::DataFrame <
|
34
|
-
include Remi::DataSubject::DataTarget
|
32
|
+
class DataTarget::DataFrame < DataTarget
|
35
33
|
|
36
34
|
def initialize(*args, **kargs, &block)
|
37
35
|
super
|
@@ -13,8 +13,7 @@ module Remi
|
|
13
13
|
end
|
14
14
|
|
15
15
|
|
16
|
-
class DataSource::Postgres <
|
17
|
-
include Remi::DataSubject::DataSource
|
16
|
+
class DataSource::Postgres < DataSource
|
18
17
|
include Remi::DataSubject::Postgres
|
19
18
|
|
20
19
|
|
@@ -65,8 +64,7 @@ module Remi
|
|
65
64
|
|
66
65
|
# VERY PRELIMINARY IMPLEMENTAtION - ONLY LOADS TO TEMP TABLES
|
67
66
|
# IT IS THEN UP TO THE USER TO DO ELT TO LOAD THE FINAL TABLE
|
68
|
-
class DataTarget::Postgres <
|
69
|
-
include Remi::DataSubject::DataTarget
|
67
|
+
class DataTarget::Postgres < DataTarget
|
70
68
|
include Remi::DataSubject::Postgres
|
71
69
|
|
72
70
|
def initialize(*args, **kargs, &block)
|
@@ -21,8 +21,7 @@ module Remi
|
|
21
21
|
end
|
22
22
|
|
23
23
|
|
24
|
-
class DataSource::Salesforce <
|
25
|
-
include Remi::DataSubject::DataSource
|
24
|
+
class DataSource::Salesforce < DataSource
|
26
25
|
include Remi::DataSubject::Salesforce
|
27
26
|
|
28
27
|
def initialize(*args, **kargs, &block)
|
@@ -92,8 +91,7 @@ module Remi
|
|
92
91
|
end
|
93
92
|
|
94
93
|
|
95
|
-
class DataTarget::Salesforce <
|
96
|
-
include Remi::DataSubject::DataTarget
|
94
|
+
class DataTarget::Salesforce < DataTarget
|
97
95
|
include Remi::DataSubject::Salesforce
|
98
96
|
|
99
97
|
def initialize(*args, **kargs, &block)
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module Remi
|
2
|
+
module Testing
|
3
|
+
module DataStub
|
4
|
+
def stub_row_array
|
5
|
+
@fields.values.map do |attribs|
|
6
|
+
stub_values(attribs)
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
def empty_stub_df
|
11
|
+
self.df = Daru::DataFrame.new([], order: @fields.keys)
|
12
|
+
end
|
13
|
+
|
14
|
+
def stub_df
|
15
|
+
empty_stub_df
|
16
|
+
self.df.add_row(stub_row_array)
|
17
|
+
end
|
18
|
+
|
19
|
+
def stub_values(**attribs)
|
20
|
+
stub_type = "stub_#{attribs[:type]}".to_sym
|
21
|
+
if respond_to?(stub_type)
|
22
|
+
send(stub_type, attribs)
|
23
|
+
else
|
24
|
+
stub_string(attribs)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def stub_string(**attribs)
|
29
|
+
Faker::Hipster.word
|
30
|
+
end
|
31
|
+
|
32
|
+
def stub_float(**attribs)
|
33
|
+
Faker::Number.decimal(2,3)
|
34
|
+
end
|
35
|
+
|
36
|
+
def stub_decimal(**attribs)
|
37
|
+
Faker::Number.decimal(attribs[:precision],attribs[:scale])
|
38
|
+
end
|
39
|
+
|
40
|
+
def stub_integer(**attribs)
|
41
|
+
Faker::Number.number(4).to_s
|
42
|
+
end
|
43
|
+
|
44
|
+
def stub_date(**attribs)
|
45
|
+
in_format = attribs[:in_format]
|
46
|
+
result = Faker::Date.backward(3650)
|
47
|
+
result = result.strftime(in_format) if in_format
|
48
|
+
result
|
49
|
+
end
|
50
|
+
|
51
|
+
def stub_datetime(**attribs)
|
52
|
+
in_format = attribs[:in_format]
|
53
|
+
result = Faker::Time.backward(3650)
|
54
|
+
result = result.strftime(in_format) if in_format
|
55
|
+
result
|
56
|
+
end
|
57
|
+
|
58
|
+
def stub_boolean(**attribs)
|
59
|
+
['T','F'].shuffle.first
|
60
|
+
end
|
61
|
+
|
62
|
+
def stub_json(**attribs)
|
63
|
+
if attribs[:json_array]
|
64
|
+
[ stub_string ]
|
65
|
+
else
|
66
|
+
{ Faker::Hipster.words(1, true, true) => stub_string }
|
67
|
+
end.to_json
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/lib/remi/transform.rb
CHANGED
@@ -218,6 +218,7 @@ module Remi
|
|
218
218
|
# This transform is metadata aware and will use :in_format metadata
|
219
219
|
# from the source
|
220
220
|
#
|
221
|
+
# type - Specify either :date, or :datetime type (default: date)
|
221
222
|
# in_format - The date format to use to convert the string (default: uses :in_format
|
222
223
|
# from the source metadata. If that is not defined, use '%Y-%m-%d').
|
223
224
|
# if_blank - Value to use if the the incoming value is blank (default: uses :if_blank
|
@@ -232,20 +233,33 @@ module Remi
|
|
232
233
|
# tform.source_metadata = { in_format: '%m/%d/%Y' }
|
233
234
|
# tform.to_proc.call('02/22/2013') # => Date.new(2013,2,22)
|
234
235
|
class ParseDate < Transform
|
235
|
-
def initialize(*args, in_format: nil, if_blank: nil, **kargs, &block)
|
236
|
+
def initialize(*args, type: nil, in_format: nil, if_blank: nil, **kargs, &block)
|
236
237
|
super
|
238
|
+
@type = type
|
237
239
|
@in_format = in_format
|
238
240
|
@if_blank = if_blank
|
239
241
|
end
|
240
242
|
|
243
|
+
def type
|
244
|
+
@type ||= @source_metadata.fetch(:type, :date)
|
245
|
+
end
|
246
|
+
|
241
247
|
def in_format
|
242
|
-
@in_format ||= @source_metadata.fetch(:in_format,
|
248
|
+
@in_format ||= @source_metadata.fetch(:in_format, default_date_format)
|
243
249
|
end
|
244
250
|
|
245
251
|
def if_blank
|
246
252
|
@if_blank ||= @source_metadata.fetch(:if_blank, nil)
|
247
253
|
end
|
248
254
|
|
255
|
+
def default_date_format
|
256
|
+
if type == :datetime
|
257
|
+
'%Y-%m-%d %H:%M:%S'
|
258
|
+
else
|
259
|
+
'%Y-%m-%d'
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
249
263
|
def transform(value)
|
250
264
|
begin
|
251
265
|
if value.respond_to?(:strftime)
|
@@ -260,15 +274,19 @@ module Remi
|
|
260
274
|
end
|
261
275
|
end
|
262
276
|
|
277
|
+
def class_type
|
278
|
+
@class_type ||= type == :datetime ? Time : Date
|
279
|
+
end
|
280
|
+
|
263
281
|
def string_to_date(value)
|
264
|
-
|
282
|
+
class_type.strptime(value, in_format)
|
265
283
|
end
|
266
284
|
|
267
285
|
def blank_handler(value)
|
268
286
|
if if_blank == :low
|
269
|
-
|
287
|
+
class_type.new(1900,01,01)
|
270
288
|
elsif if_blank == :high
|
271
|
-
|
289
|
+
class_type.new(2999,12,31)
|
272
290
|
elsif if_blank.respond_to? :call
|
273
291
|
if_blank.call(value)
|
274
292
|
else
|
@@ -282,6 +300,7 @@ module Remi
|
|
282
300
|
# This transform is metadata aware and will use :in_format/:out_format metadata
|
283
301
|
# from the source.
|
284
302
|
#
|
303
|
+
# type - Specify either :date, or :datetime type (default: date)
|
285
304
|
# in_format - The date format to used to parse the input value. If the input value
|
286
305
|
# is a date, then then parameter is ignored. (default: uses :in_format
|
287
306
|
# from the source metadata. If that is not defined, use '%Y-%m-%d')
|
@@ -297,18 +316,35 @@ module Remi
|
|
297
316
|
# tform.source_metadata = { in_format: '%m/%d/%Y', out_format: '%Y-%m-%d' }
|
298
317
|
# tform.to_proc.call('02/22/2013') # => "2013-02-22"
|
299
318
|
class FormatDate < Transform
|
300
|
-
def initialize(*args, in_format: nil, out_format: nil, **kargs, &block)
|
319
|
+
def initialize(*args, type: nil, in_format: nil, out_format: nil, **kargs, &block)
|
301
320
|
super
|
321
|
+
@type = type
|
302
322
|
@in_format = in_format
|
303
323
|
@out_format = out_format
|
304
324
|
end
|
305
325
|
|
326
|
+
def type
|
327
|
+
@type ||= @source_metadata.fetch(:type, :date)
|
328
|
+
end
|
329
|
+
|
306
330
|
def in_format
|
307
|
-
@in_format ||= @source_metadata.fetch(:in_format,
|
331
|
+
@in_format ||= @source_metadata.fetch(:in_format, default_date_format)
|
308
332
|
end
|
309
333
|
|
310
334
|
def out_format
|
311
|
-
@out_format ||= @source_metadata.fetch(:out_format,
|
335
|
+
@out_format ||= @source_metadata.fetch(:out_format, default_date_format)
|
336
|
+
end
|
337
|
+
|
338
|
+
def default_date_format
|
339
|
+
if type == :datetime
|
340
|
+
'%Y-%m-%d %H:%M:%S'
|
341
|
+
else
|
342
|
+
'%Y-%m-%d'
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
def class_type
|
347
|
+
@class_type ||= type == :datetime ? Time : Date
|
312
348
|
end
|
313
349
|
|
314
350
|
def transform(value)
|
@@ -318,7 +354,7 @@ module Remi
|
|
318
354
|
elsif value.respond_to? :strftime
|
319
355
|
value.strftime(out_format)
|
320
356
|
else
|
321
|
-
|
357
|
+
class_type.strptime(value, in_format).strftime(out_format)
|
322
358
|
end
|
323
359
|
rescue ArgumentError => err
|
324
360
|
raise err, "Error parsing date (#{value.class}): '#{value}' using the format #{in_format} => #{out_format}"
|
@@ -461,7 +497,7 @@ module Remi
|
|
461
497
|
def if_blank
|
462
498
|
return @if_blank if @if_blank_set
|
463
499
|
@if_blank_set = true
|
464
|
-
@if_blank = @source_metadata.fetch(:if_blank,
|
500
|
+
@if_blank = @source_metadata.fetch(:if_blank, default_if_blank)
|
465
501
|
end
|
466
502
|
|
467
503
|
def blank_handler(value)
|
@@ -474,8 +510,12 @@ module Remi
|
|
474
510
|
end
|
475
511
|
end
|
476
512
|
|
513
|
+
def default_if_blank
|
514
|
+
type == :string ? '' : nil
|
515
|
+
end
|
516
|
+
|
477
517
|
def transform(value)
|
478
|
-
if value.blank?
|
518
|
+
if value.blank? && type != :json
|
479
519
|
blank_handler(value)
|
480
520
|
else
|
481
521
|
case type
|
@@ -488,9 +528,15 @@ module Remi
|
|
488
528
|
when :decimal
|
489
529
|
Float("%.#{scale}f" % Float(value))
|
490
530
|
when :date
|
491
|
-
value.is_a?(Date) ? value : Date.strptime(value, in_format)
|
531
|
+
value.is_a?(Date) ? value : Date.strptime(value, in_format)
|
492
532
|
when :datetime
|
493
|
-
Time.strptime(value, in_format)
|
533
|
+
value.is_a?(Time) ? value : Time.strptime(value, in_format)
|
534
|
+
when :json
|
535
|
+
if value.blank? && value != [] && value != {}
|
536
|
+
blank_handler(value)
|
537
|
+
else
|
538
|
+
value.is_a?(Hash) || value.is_a?(Array) ? value : JSON.parse(value)
|
539
|
+
end
|
494
540
|
else
|
495
541
|
raise ArgumentError, "Unknown type enforcement: #{type}"
|
496
542
|
end
|
data/lib/remi/version.rb
CHANGED
@@ -0,0 +1,44 @@
|
|
1
|
+
require_relative 'remi_spec'
|
2
|
+
|
3
|
+
# VERY SPARSE TESTING! DO MORE!
|
4
|
+
|
5
|
+
describe DataSubject do
|
6
|
+
|
7
|
+
describe 'enforcing types' do
|
8
|
+
let(:dataframe) do
|
9
|
+
Remi::DataFrame::Daru.new({ my_date: ['10/21/2015'] })
|
10
|
+
end
|
11
|
+
|
12
|
+
let(:data_subject) do
|
13
|
+
DataSubject.new(fields: fields).tap { |ds| ds.df = dataframe }
|
14
|
+
end
|
15
|
+
|
16
|
+
let(:fields) do
|
17
|
+
Fields.new({
|
18
|
+
my_date: { type: :date, in_format: '%m/%d/%Y' },
|
19
|
+
other_date: { type: :date, in_format: '%m/%d/%Y' }
|
20
|
+
})
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'converts a date string to a date using an in_format' do
|
24
|
+
data_subject.enforce_types
|
25
|
+
expect(data_subject.df[:my_date].to_a).to eq [Date.new(2015, 10, 21)]
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'does not do any conversion if the type is not specified' do
|
29
|
+
fields[:my_date].delete(:type)
|
30
|
+
data_subject.enforce_types
|
31
|
+
expect(data_subject.df[:my_date].to_a).to eq ['10/21/2015']
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'throws an error if the data does not conform to its type' do
|
35
|
+
dataframe[:my_date].recode! { |v| '2015-10-21' }
|
36
|
+
expect { data_subject.enforce_types }.to raise_error ArgumentError
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'does not create new vectors during enforcement' do
|
40
|
+
data_subject.enforce_types
|
41
|
+
expect(dataframe.vectors.to_a).to eq [:my_date]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
require_relative '../remi_spec'
|
2
|
+
require 'remi/testing/data_stub'
|
3
|
+
|
4
|
+
describe Testing::DataStub do
|
5
|
+
class StubTester < DataSubject
|
6
|
+
include Testing::DataStub
|
7
|
+
end
|
8
|
+
|
9
|
+
context 'data type stubs' do
|
10
|
+
let(:stub_tester) { StubTester.new }
|
11
|
+
|
12
|
+
context '#stub_string' do
|
13
|
+
let(:stub) { stub_tester.stub_string }
|
14
|
+
|
15
|
+
it 'stubs as strings' do
|
16
|
+
expect(stub).to be_a String
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
context '#stub_float' do
|
21
|
+
let(:stub) { stub_tester.stub_float }
|
22
|
+
|
23
|
+
it 'stubs as strings' do
|
24
|
+
expect(stub).to be_a String
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'represents a floating point number' do
|
28
|
+
expect(Float(stub) % 1).not_to eq 0.0
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
context '#stub_decimal' do
|
33
|
+
let(:stub) { stub_tester.stub_decimal(precision: 8, scale: 2) }
|
34
|
+
|
35
|
+
it 'stubs as strings' do
|
36
|
+
expect(stub).to be_a String
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'represents a floating point number' do
|
40
|
+
expect(Float(stub) % 1).not_to eq 0.0
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'comes with the specified precision' do
|
44
|
+
expect(Float(stub).to_s.split('.').first.size).to eq 8
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'comes with the specified scale' do
|
48
|
+
expect(Float(stub).to_s.split('.').last.size).to eq 2
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
context '#stub_integer' do
|
53
|
+
let(:stub) { stub_tester.stub_integer }
|
54
|
+
|
55
|
+
it 'stubs as strings' do
|
56
|
+
expect(stub).to be_a String
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'represents an integer' do
|
60
|
+
expect(Float(stub) % 1).to eq 0.0
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'converts to an integer' do
|
64
|
+
expect { Integer(stub) }.not_to raise_error
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
context '#stub_date' do
|
70
|
+
context 'without an in_format' do
|
71
|
+
let(:stub) { stub_tester.stub_date }
|
72
|
+
|
73
|
+
it 'stubs as a date' do
|
74
|
+
expect(stub).to be_a Date
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
context 'with an in_format' do
|
79
|
+
let(:stub) { stub_tester.stub_date(in_format: '%m/%d/%Y') }
|
80
|
+
|
81
|
+
it 'stubs as strings' do
|
82
|
+
expect(stub).to be_a String
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'can parsed as a date using the specified in_format' do
|
86
|
+
expect { Date.strptime(stub, '%m/%d/%Y') }.not_to raise_error
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
context '#stub_datetime' do
|
92
|
+
context 'without an in_format' do
|
93
|
+
let(:stub) { stub_tester.stub_datetime }
|
94
|
+
|
95
|
+
it 'stubs as a time' do
|
96
|
+
expect(stub).to be_a Time
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
context 'with an in_format' do
|
101
|
+
let(:stub) { stub_tester.stub_datetime(in_format: '%m/%d/%Y %H:%M:%S') }
|
102
|
+
|
103
|
+
it 'stubs as strings' do
|
104
|
+
expect(stub).to be_a String
|
105
|
+
end
|
106
|
+
|
107
|
+
it 'can parsed as a time using the specified in_format' do
|
108
|
+
expect { Time.strptime(stub, '%m/%d/%Y %H:%M:%S') }.not_to raise_error
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
context '#stub_boolean' do
|
114
|
+
let(:stub) { stub_tester.stub_boolean }
|
115
|
+
|
116
|
+
it 'stubs as strings' do
|
117
|
+
expect(stub).to be_a String
|
118
|
+
end
|
119
|
+
|
120
|
+
it 'is either T or F' do
|
121
|
+
expect(stub).to eq('T').or eq('F')
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
context '#stub_json' do
|
126
|
+
let(:stub) { stub_tester.stub_json }
|
127
|
+
|
128
|
+
it 'stubs as strings' do
|
129
|
+
expect(stub).to be_a String
|
130
|
+
end
|
131
|
+
|
132
|
+
it 'can be parsed as JSON' do
|
133
|
+
expect { JSON.parse(stub) }.not_to raise_error
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
context 'stubbed dataframe data' do
|
140
|
+
let(:stub_tester) do
|
141
|
+
StubTester.new(fields: {
|
142
|
+
my_date: { type: :date, in_format: '%m/%d/%Y' },
|
143
|
+
my_str: {}
|
144
|
+
})
|
145
|
+
end
|
146
|
+
|
147
|
+
context '#empty_stub_df' do
|
148
|
+
before { stub_tester.empty_stub_df }
|
149
|
+
|
150
|
+
it 'creates a dataframe with no data' do
|
151
|
+
expect(stub_tester.df.size).to eq 0
|
152
|
+
end
|
153
|
+
|
154
|
+
it 'creates a dataframe with the right number of vectors' do
|
155
|
+
expect(stub_tester.df.vectors.size).to eq 2
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
context '#stub_df' do
|
160
|
+
before { stub_tester.stub_df }
|
161
|
+
|
162
|
+
it 'creates a row of data' do
|
163
|
+
expect(stub_tester.df.size).to eq 1
|
164
|
+
end
|
165
|
+
|
166
|
+
it 'creates data according to the supplied metadata' do
|
167
|
+
expect { Date.strptime(stub_tester.df[:my_date].first, '%m/%d/%Y') }.not_to raise_error
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
data/spec/transform_spec.rb
CHANGED
@@ -50,4 +50,79 @@ describe Transform do
|
|
50
50
|
expect(transform.call(1, 2)).to eq [1, 2]
|
51
51
|
end
|
52
52
|
end
|
53
|
+
|
54
|
+
describe Transform::ParseDate do
|
55
|
+
it 'parses a date using the specified in_format' do
|
56
|
+
parser = Transform::ParseDate.new(in_format: '%m/%d/%Y')
|
57
|
+
expect(parser.call('03/22/2011')).to eq Date.new(2011,3,22)
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'returns a date if it has already been parsed' do
|
61
|
+
parser = Transform::ParseDate.new(in_format: '%m/%d/%Y')
|
62
|
+
expect(Date.new(2011,3,22)).to eq Date.new(2011,3,22)
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'uses ISO 8601 as the default date parser' do
|
66
|
+
parser = Transform::ParseDate.new
|
67
|
+
expect(parser.call('2011-03-22')).to eq Date.new(2011,3,22)
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'fails when an unparseable date is provided' do
|
71
|
+
parser = Transform::ParseDate.new
|
72
|
+
expect { parser.call('03/22/2011') }.to raise_error ArgumentError
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'parses datetimes when the type is specified' do
|
76
|
+
parser = Transform::ParseDate.new(type: :datetime, in_format: '%m/%d/%Y %H:%M:%S')
|
77
|
+
expect(parser.call('03/22/2011 04:22:00')).to eq Time.new(2011,3,22,4,22,0)
|
78
|
+
end
|
79
|
+
|
80
|
+
it 'uses ISO 8601 as the default datetime parser' do
|
81
|
+
parser = Transform::ParseDate.new(type: :datetime)
|
82
|
+
expect(parser.call('2011-03-22 04:22:00')).to eq Time.new(2011,3,22,4,22,0)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
context Transform::FormatDate do
|
87
|
+
it 'formats a date using the specified out_format' do
|
88
|
+
formatter = Transform::FormatDate.new(out_format: '%m/%d/%Y')
|
89
|
+
expect(formatter.call(Date.new(2011,3,22))).to eq '03/22/2011'
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'formats a datetime using the specified out_format' do
|
93
|
+
formatter = Transform::FormatDate.new(type: :datetime, out_format: '%m/%d/%Y %H:%M:%S')
|
94
|
+
expect(formatter.call(Time.new(2011,3,22,4,22,0))).to eq '03/22/2011 04:22:00'
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'uses the in_format to parse strings when the source is not already a date' do
|
98
|
+
formatter = Transform::FormatDate.new(in_format: '%d/%m/%Y', out_format: '%m/%d/%Y')
|
99
|
+
expect(formatter.call('22/03/2011')).to eq '03/22/2011'
|
100
|
+
end
|
101
|
+
|
102
|
+
it 'fails when an unparseable date is provided' do
|
103
|
+
formatter = Transform::FormatDate.new(in_format: '%d/%m/%Y', out_format: '%m/%d/%Y')
|
104
|
+
expect { formatter.call('22/22/2011') }.to raise_error ArgumentError
|
105
|
+
end
|
106
|
+
|
107
|
+
it 'uses ISO 8601 as the default date parser' do
|
108
|
+
formatter = Transform::FormatDate.new(out_format: '%m/%d/%Y')
|
109
|
+
expect(formatter.call('2011-03-22')).to eq '03/22/2011'
|
110
|
+
end
|
111
|
+
|
112
|
+
it 'uses ISO 8601 as the default date formatter' do
|
113
|
+
formatter = Transform::FormatDate.new(in_format: '%m/%d/%Y')
|
114
|
+
expect(formatter.call('03/22/2011')).to eq '2011-03-22'
|
115
|
+
end
|
116
|
+
|
117
|
+
it 'uses ISO 8601 as the default datetime parser' do
|
118
|
+
formatter = Transform::FormatDate.new(type: :datetime, out_format: '%m/%d/%Y %H:%M:%S')
|
119
|
+
expect(formatter.call('2011-03-22 04:22:00')).to eq '03/22/2011 04:22:00'
|
120
|
+
end
|
121
|
+
|
122
|
+
it 'uses ISO 8601 as the default datetime formatter' do
|
123
|
+
formatter = Transform::FormatDate.new(type: :datetime, in_format: '%m/%d/%Y %H:%M:%S')
|
124
|
+
expect(formatter.call('03/22/2011 04:22:00')).to eq '2011-03-22 04:22:00'
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
53
128
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.40
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sterling Paramore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-07-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bond
|
@@ -225,8 +225,6 @@ files:
|
|
225
225
|
- lib/remi.rb
|
226
226
|
- lib/remi/cli.rb
|
227
227
|
- lib/remi/cucumber.rb
|
228
|
-
- lib/remi/cucumber/business_rules.rb
|
229
|
-
- lib/remi/cucumber/data_source.rb
|
230
228
|
- lib/remi/data_frame.rb
|
231
229
|
- lib/remi/data_frame/daru.rb
|
232
230
|
- lib/remi/data_subject.rb
|
@@ -248,11 +246,14 @@ files:
|
|
248
246
|
- lib/remi/source_to_target_map.rb
|
249
247
|
- lib/remi/source_to_target_map/map.rb
|
250
248
|
- lib/remi/source_to_target_map/row.rb
|
249
|
+
- lib/remi/testing/business_rules.rb
|
250
|
+
- lib/remi/testing/data_stub.rb
|
251
251
|
- lib/remi/transform.rb
|
252
252
|
- lib/remi/version.rb
|
253
253
|
- remi.gemspec
|
254
254
|
- spec/data_subject/csv_file_spec.rb
|
255
255
|
- spec/data_subject/data_frame.rb
|
256
|
+
- spec/data_subject_spec.rb
|
256
257
|
- spec/extractor/file_system_spec.rb
|
257
258
|
- spec/extractor/local_file_spec.rb
|
258
259
|
- spec/extractor/s3_file_spec.rb
|
@@ -264,6 +265,7 @@ files:
|
|
264
265
|
- spec/metadata_spec.rb
|
265
266
|
- spec/remi_spec.rb
|
266
267
|
- spec/source_to_target_map_spec.rb
|
268
|
+
- spec/testing/data_stub_spec.rb
|
267
269
|
- spec/transform_spec.rb
|
268
270
|
- workbooks/sample_workbook.ipynb
|
269
271
|
- workbooks/workbook_helper.rb
|
@@ -315,6 +317,7 @@ test_files:
|
|
315
317
|
- features/transforms/truthy.feature
|
316
318
|
- spec/data_subject/csv_file_spec.rb
|
317
319
|
- spec/data_subject/data_frame.rb
|
320
|
+
- spec/data_subject_spec.rb
|
318
321
|
- spec/extractor/file_system_spec.rb
|
319
322
|
- spec/extractor/local_file_spec.rb
|
320
323
|
- spec/extractor/s3_file_spec.rb
|
@@ -326,4 +329,5 @@ test_files:
|
|
326
329
|
- spec/metadata_spec.rb
|
327
330
|
- spec/remi_spec.rb
|
328
331
|
- spec/source_to_target_map_spec.rb
|
332
|
+
- spec/testing/data_stub_spec.rb
|
329
333
|
- spec/transform_spec.rb
|
@@ -1,70 +0,0 @@
|
|
1
|
-
module Remi
|
2
|
-
module DataSource
|
3
|
-
module DataStub
|
4
|
-
def stub_row_array
|
5
|
-
@fields.values.map do |attrib|
|
6
|
-
stub_values[attrib[:type]].call
|
7
|
-
end
|
8
|
-
end
|
9
|
-
|
10
|
-
def empty_stub_df
|
11
|
-
self.df = Daru::DataFrame.new([], order: @fields.keys)
|
12
|
-
end
|
13
|
-
|
14
|
-
def stub_df
|
15
|
-
empty_stub_df
|
16
|
-
self.df.add_row(stub_row_array)
|
17
|
-
end
|
18
|
-
|
19
|
-
def stub_values
|
20
|
-
@stub_values ||= Hash.new(->() { Faker::Hipster.word }).merge({
|
21
|
-
string: ->() { Faker::Hipster.word },
|
22
|
-
number: ->() { Faker::Number.decimal(4,4) },
|
23
|
-
float: ->() { Faker::Number.decimal(2,2) },
|
24
|
-
integer: ->() { Faker::Number.number(4) },
|
25
|
-
date: ->() { Faker::Date.backward(3650) },
|
26
|
-
datetime: ->() { Faker::Time.backward(3650).to_datetime },
|
27
|
-
boolean: ->() { ['T','F'].shuffle.first }
|
28
|
-
})
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
|
33
|
-
class CsvFile
|
34
|
-
include DataStub
|
35
|
-
def stub_tmp_file
|
36
|
-
@stub_tmp_file ||= Tempfile.new('stub_tmp_file.csv').path
|
37
|
-
end
|
38
|
-
|
39
|
-
def write_stub_tmp_file
|
40
|
-
File.open(stub_tmp_file, "wb") do |file|
|
41
|
-
file.puts stub_header
|
42
|
-
file.puts stub_row_csv
|
43
|
-
end
|
44
|
-
|
45
|
-
stub_tmp_file
|
46
|
-
end
|
47
|
-
|
48
|
-
def stub_header
|
49
|
-
@fields.keys.join(@csv_options[:col_sep])
|
50
|
-
end
|
51
|
-
|
52
|
-
def stub_row_csv
|
53
|
-
stub_row_array.join(@csv_options[:col_sep])
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
# Hmmm.... this gets called first because I'm trying to split SF off as a "plugin"
|
58
|
-
class Salesforce < Remi::DataSubject
|
59
|
-
include DataStub
|
60
|
-
end
|
61
|
-
|
62
|
-
class DataFrame
|
63
|
-
include DataStub
|
64
|
-
end
|
65
|
-
|
66
|
-
class Postgres
|
67
|
-
include DataStub
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|