remi 0.2.37 → 0.2.38
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +78 -0
- data/features/json.feature +0 -1
- data/features/transforms/concatenate.feature +30 -0
- data/features/transforms/date_diff.feature +1 -2
- data/jobs/json_job.rb +2 -6
- data/jobs/parameters_job.rb +1 -1
- data/jobs/sample_job.rb +20 -11
- data/jobs/transforms/concatenate_job.rb +21 -0
- data/jobs/transforms/date_diff_job.rb +4 -1
- data/jobs/transforms/partitioner_job.rb +1 -1
- data/lib/remi/source_to_target_map/map.rb +209 -0
- data/lib/remi/source_to_target_map/row.rb +99 -0
- data/lib/remi/source_to_target_map.rb +55 -90
- data/lib/remi/transform.rb +35 -26
- data/lib/remi/version.rb +1 -1
- data/lib/remi.rb +2 -0
- data/spec/source_to_target_map_spec.rb +301 -0
- metadata +9 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a59538393438e759c02554c7dac61c914841e468
|
4
|
+
data.tar.gz: 181df9c16e528b0d1315e992fb25a97cc711c678
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 92520cd0b2dc002879bfef7cfaf78e8e6f4a3609b121d23c9a04de054fcd86e9dfe8fd1d06b0cef3870a486c948a29994ed58eac6c8caefdd0fbab4d7b06fc8a
|
7
|
+
data.tar.gz: f50d012217b786c3fdebd97caa1a183545a6e99689f82fce9e888a933a33cd96247b31f63ab0265a80cb443a9cd9258237884a464747ebb75180e481b64d49c4
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -48,6 +48,84 @@ Examples setting up a job class with
|
|
48
48
|
* parameters
|
49
49
|
* maps
|
50
50
|
|
51
|
+
|
52
|
+
### Transform cardinality
|
53
|
+
|
54
|
+
Within a source-to-target map block, there are a few different
|
55
|
+
possible transform cardinalities: one-to-one, many-to-one, one-to-many,
|
56
|
+
many-to-many, zero-to-one, and zero-to-many. The lambda functions that
|
57
|
+
are supplied to `#transfrom` method must satisfy different conditions based
|
58
|
+
on cardinality.
|
59
|
+
|
60
|
+
For all of the following examples, we'll assume that a dataframe exists defined by
|
61
|
+
````ruby
|
62
|
+
df = Remi::DataFrame::Daru.new(
|
63
|
+
[
|
64
|
+
['a1','b1','c1', ['d',1]],
|
65
|
+
['a2','b2','c2', ['d',2]],
|
66
|
+
['a3','b3','c3', ['d',3]],
|
67
|
+
].transpose,
|
68
|
+
order: [:a, :b, :c, :d]
|
69
|
+
)
|
70
|
+
````
|
71
|
+
|
72
|
+
**one-to-one** - These maps expect a lambda that accepts the value of a
|
73
|
+
field as an argument and returns the result of some operation, which
|
74
|
+
is used to populate the target.
|
75
|
+
|
76
|
+
````ruby
|
77
|
+
Remi::SourceToTargetMap.apply(df) do
|
78
|
+
map source(:a) .target(:aprime)
|
79
|
+
.transform(->(v) { "#{v}prime" })
|
80
|
+
end
|
81
|
+
|
82
|
+
df[:aprime].to_a #=> ['a1prime', 'a2prime', 'a3prime']
|
83
|
+
````
|
84
|
+
|
85
|
+
**many-to-one** - These maps expect that the lambda accepts a row object as an argument
|
86
|
+
and returns the result of the operation, which is used to populate the target.
|
87
|
+
|
88
|
+
````ruby
|
89
|
+
Remi::SourceToTargetMap.apply(df) do
|
90
|
+
map source(:a, :b) .target(:ab)
|
91
|
+
.transform(->(row) { "#{row[:a]}#{row[:b]}" })
|
92
|
+
end
|
93
|
+
|
94
|
+
df[:ab].to_a #=> ['a1b1', 'a2b2', 'a3b3']
|
95
|
+
````
|
96
|
+
|
97
|
+
**zero-to-many/one-to-many/many-to-many** - These maps expect that the
|
98
|
+
lambda accepts a row object as an argument. The row object is then
|
99
|
+
modified in place, which is used to populate the targets. The return
|
100
|
+
value of the lambda is ignored.
|
101
|
+
|
102
|
+
````ruby
|
103
|
+
Remi::SourceToTargetMap.apply(df) do
|
104
|
+
map source(:a, :b) .target(:aprime, :ab)
|
105
|
+
.transform(->(row) {
|
106
|
+
row[:aprime] = row[:a]
|
107
|
+
row[:ab] = "#{row[:a]}#{row[:b]}" })
|
108
|
+
})
|
109
|
+
end
|
110
|
+
|
111
|
+
df[:aprime].to_a #=> ['a1prime', 'a2prime', 'a3prime']
|
112
|
+
df[:ab].to_a #=> ['a1b1', 'a2b2', 'a3b3']
|
113
|
+
````
|
114
|
+
|
115
|
+
**zero-to-one** - These maps expect that the lambda accepts no arguments and returns the
|
116
|
+
result of some operation, which is used to populate the target.
|
117
|
+
|
118
|
+
````ruby
|
119
|
+
Remi::SourceToTargetMap.apply(df) do
|
120
|
+
counter = 1.upto(3).to_a
|
121
|
+
map target(:counter)
|
122
|
+
.transform(->() { counter.pop })
|
123
|
+
end
|
124
|
+
|
125
|
+
df[:counter].to_a #=> [1, 2, 3]
|
126
|
+
````
|
127
|
+
|
128
|
+
|
51
129
|
## Business Rules
|
52
130
|
|
53
131
|
TODO: Description of writing Business Rules.
|
data/features/json.feature
CHANGED
@@ -0,0 +1,30 @@
|
|
1
|
+
Feature: Test the concatenate transformer.
|
2
|
+
|
3
|
+
Background:
|
4
|
+
Given the job is 'Concatenate'
|
5
|
+
And the job source 'Source Data'
|
6
|
+
And the job target 'Target Data'
|
7
|
+
|
8
|
+
Scenario Outline: Performing a concatenation
|
9
|
+
Given the source 'Source Data'
|
10
|
+
And the target 'Target Data'
|
11
|
+
|
12
|
+
And the source field 'Field1' is set to the value "<Field1>"
|
13
|
+
And the source field 'Field2' is set to the value "<Field2>"
|
14
|
+
And the source field 'Field3' is set to the value "<Field3>"
|
15
|
+
And the job parameter 'delimiter' is "<Delimiter>"
|
16
|
+
Then the target field 'Result Field' is set to the value "<Expected>"
|
17
|
+
|
18
|
+
Examples:
|
19
|
+
| Field1 | Field2 | Field3 | Delimiter | Expected |
|
20
|
+
| A | B | C | , | A,B,C |
|
21
|
+
| | B | C | - | B-C |
|
22
|
+
| | | C | , | C |
|
23
|
+
| | | | , | |
|
24
|
+
|
25
|
+
|
26
|
+
Scenario: Testing a concatenation with the short form version
|
27
|
+
Given the source 'Source Data'
|
28
|
+
And the target 'Target Data'
|
29
|
+
|
30
|
+
Then the target field 'Result Field' is a concatenation of the source fields 'Field1', 'Field2', 'Field3', delimited by ","
|
@@ -8,8 +8,7 @@ Feature: Tests the date_diff transform
|
|
8
8
|
And the source 'Source Data'
|
9
9
|
And the target 'Target Data'
|
10
10
|
|
11
|
-
|
12
|
-
Scenario Outline: Calculating date difference in days2.
|
11
|
+
Scenario Outline: Calculating date difference in days.
|
13
12
|
Given the job parameter 'measure' is "days"
|
14
13
|
And the source field 'Date1' has the value "<Date1>"
|
15
14
|
And the source field 'Date2' has the value "<Date2>"
|
data/jobs/json_job.rb
CHANGED
@@ -18,13 +18,9 @@ class JsonJob
|
|
18
18
|
define_transform :main do
|
19
19
|
Remi::SourceToTargetMap.apply(source_data.df, target_data.df, source_metadata: source_data.fields) do
|
20
20
|
map source(:json_array) .target(:second_element)
|
21
|
-
.transform(->(
|
22
|
-
# This is NOT the way I would like it to work, but we need to do some work on STTM first
|
21
|
+
.transform(->(values) { values[1] })
|
23
22
|
map source(:json_hash) .target(:name_field)
|
24
|
-
.transform(->(
|
25
|
-
# preferred
|
26
|
-
# map source(:json_hash) .target(:name_field)
|
27
|
-
# .transform(->(json_hash) { json_hash['name'] })
|
23
|
+
.transform(->(json_hash) { json_hash['name'] })
|
28
24
|
end
|
29
25
|
end
|
30
26
|
end
|
data/jobs/parameters_job.rb
CHANGED
@@ -13,7 +13,7 @@ class ParametersJob
|
|
13
13
|
|
14
14
|
define_transform :main do
|
15
15
|
Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
|
16
|
-
map
|
16
|
+
map target(:myparam)
|
17
17
|
.transform(Remi::Transform::Constant.new(params[:myparam]))
|
18
18
|
map source(:parameter_name) .target(:parameter_name)
|
19
19
|
.transform(->(v) { params[v.to_sym] })
|
data/jobs/sample_job.rb
CHANGED
@@ -92,9 +92,12 @@ class SampleJob
|
|
92
92
|
Remi::SourceToTargetMap.apply(all_contacts.df) do
|
93
93
|
|
94
94
|
# Prefixes source id record and then looks up existing salesforce Id
|
95
|
+
prefixer = Remi::Transform::Prefix.new('SAMP')
|
95
96
|
map source(:student_id) .target(:External_ID__c, :Id)
|
96
|
-
.transform(
|
97
|
-
|
97
|
+
.transform(->(row) {
|
98
|
+
row[:External_ID__c] = prefixer.call(row[:student_id])
|
99
|
+
row[:Id] = student_id_to_sf_id[row[:External_ID__c]]
|
100
|
+
})
|
98
101
|
end
|
99
102
|
end
|
100
103
|
|
@@ -102,9 +105,11 @@ class SampleJob
|
|
102
105
|
define_transform :map_creates, sources: :all_contacts, targets: :contact_creates do
|
103
106
|
|
104
107
|
work_contact_creates = all_contacts.df.where(all_contacts.df[:Id].eq(nil))
|
108
|
+
|
105
109
|
Remi::SourceToTargetMap.apply(work_contact_creates) do
|
106
110
|
|
107
111
|
map source(:school_id) .target(:School_ID__c)
|
112
|
+
|
108
113
|
map source(:school_name) .target(:School_Name__c)
|
109
114
|
map source(:first_name) .target(:FirstName)
|
110
115
|
.transform(Remi::Transform::IfBlank.new('Not Provided'))
|
@@ -122,16 +127,20 @@ class SampleJob
|
|
122
127
|
.transform(Remi::Transform::FormatDate.new(in_format: sample_file.fields[:applied_date][:in_format]))
|
123
128
|
|
124
129
|
map source(:mailing_address_line_1, :mailing_address_line_2) .target(:MailingStreet)
|
125
|
-
.transform(->(
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
+
.transform(->(row) {
|
131
|
+
if row[:mailing_address_line_1].blank?
|
132
|
+
''
|
133
|
+
else
|
134
|
+
[row[:mailing_address_line_1], row[:mailing_address_line_2]].join(', ')
|
135
|
+
end
|
136
|
+
})
|
137
|
+
|
138
|
+
if_blank_unknown = Remi::Transform::IfBlank.new("Unknown")
|
130
139
|
map source(:school_id, :school_name) .target(:School__c)
|
131
|
-
.transform(->(
|
132
|
-
|
133
|
-
|
134
|
-
|
140
|
+
.transform(->(row) {
|
141
|
+
row[:school_id] = if_blank_unknown.call(row[:school_id])
|
142
|
+
row[:school_name] = if_blank_unknown.call(row[:school_name])
|
143
|
+
})
|
135
144
|
.transform(Remi::Transform::Concatenate.new('-'))
|
136
145
|
|
137
146
|
map source(:current_email) .target(:Email)
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require_relative '../all_jobs_shared'
|
2
|
+
|
3
|
+
class ConcatenateJob
|
4
|
+
include AllJobsShared
|
5
|
+
|
6
|
+
define_param :delimiter, ','
|
7
|
+
define_source :source_data, Remi::DataSource::DataFrame,
|
8
|
+
fields: {
|
9
|
+
:field1 => {},
|
10
|
+
:field2 => {},
|
11
|
+
:field3 => {}
|
12
|
+
}
|
13
|
+
define_target :target_data, Remi::DataTarget::DataFrame
|
14
|
+
|
15
|
+
define_transform :main, sources: :source_data, targets: :target_data do
|
16
|
+
Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
|
17
|
+
map source(:field1, :field2, :field3) .target(:result_field)
|
18
|
+
.transform(Remi::Transform::Concatenate.new(params[:delimiter]))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -14,7 +14,10 @@ class DateDiffJob
|
|
14
14
|
define_transform :main, sources: :source_data, targets: :target_data do
|
15
15
|
Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
|
16
16
|
map source(:date1, :date2) .target(:difference)
|
17
|
-
.transform(->(
|
17
|
+
.transform(->(row) {
|
18
|
+
row[:date1] = Date.strptime(row[:date1])
|
19
|
+
row[:date2] = Date.strptime(row[:date2])
|
20
|
+
})
|
18
21
|
.transform(Remi::Transform::DateDiff.new(params[:measure]))
|
19
22
|
end
|
20
23
|
end
|
@@ -28,7 +28,7 @@ class PartitionerJob
|
|
28
28
|
current_population_hash = current_population.df.map(:row) { |row| [row[:group], row[:count].to_i] }.to_h
|
29
29
|
|
30
30
|
Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
|
31
|
-
map
|
31
|
+
map target(:group)
|
32
32
|
.transform(Remi::Transform::Partitioner.new(buckets: distribution_hash, initial_population: current_population_hash))
|
33
33
|
end
|
34
34
|
end
|
@@ -0,0 +1,209 @@
|
|
1
|
+
module Remi
|
2
|
+
class SourceToTargetMap
|
3
|
+
|
4
|
+
# Public: Class used to perform source to target mappings.
|
5
|
+
#
|
6
|
+
# Examples
|
7
|
+
#
|
8
|
+
# # One-to-one map
|
9
|
+
# map = Map.new(source_df, target_df)
|
10
|
+
# map.source(:a).target(:aprime)
|
11
|
+
# .transform(->(v) { "#{v}prime" })
|
12
|
+
# # see tests for more
|
13
|
+
class Map
|
14
|
+
|
15
|
+
# Public: Initializes a map
|
16
|
+
#
|
17
|
+
# source_df - The source dataframe.
|
18
|
+
# target_df - The target dataframe (default: source_df).
|
19
|
+
# source_metadata - Metadata (Remi::Fields) for the source fields.
|
20
|
+
# target_metadata - Metadata (Remi::Fields) for the target fields.
|
21
|
+
def initialize(source_df, target_df, source_metadata: Remi::Fields.new, target_metadata: Remi::Fields.new)
|
22
|
+
@source_df = source_df
|
23
|
+
@target_df = target_df
|
24
|
+
|
25
|
+
@source_metadata = source_metadata
|
26
|
+
@target_metadata = target_metadata
|
27
|
+
|
28
|
+
@source_vectors = []
|
29
|
+
@target_vectors = []
|
30
|
+
@transforms = []
|
31
|
+
@transform_procs = []
|
32
|
+
end
|
33
|
+
|
34
|
+
# Public: Returns the map's source dataframe
|
35
|
+
attr_reader :source_df
|
36
|
+
|
37
|
+
# Public: Returns the map's target dataframe
|
38
|
+
attr_reader :target_df
|
39
|
+
|
40
|
+
# Public: Returns all of the map's source vectors
|
41
|
+
attr_reader :source_vectors
|
42
|
+
|
43
|
+
# Public: Returns all of the map's target vectors
|
44
|
+
attr_reader :target_vectors
|
45
|
+
|
46
|
+
# Public: Returns all of the map's defined transforms
|
47
|
+
attr_reader :transforms
|
48
|
+
|
49
|
+
|
50
|
+
# Public: Adds a list of source vectors to a map
|
51
|
+
#
|
52
|
+
# source_vectors - A list of source vectors.
|
53
|
+
#
|
54
|
+
# Returns self
|
55
|
+
def source(*source_vectors)
|
56
|
+
@source_vectors += Array(source_vectors)
|
57
|
+
self
|
58
|
+
end
|
59
|
+
|
60
|
+
# Public: Adds a list of target vectors to a map
|
61
|
+
#
|
62
|
+
# target_vectors - A list of target vectors.
|
63
|
+
#
|
64
|
+
# Returns self
|
65
|
+
def target(*target_vectors)
|
66
|
+
@target_vectors += Array(target_vectors)
|
67
|
+
self
|
68
|
+
end
|
69
|
+
|
70
|
+
# Public: Adds a transform to the map
|
71
|
+
# A transform is an object that behaves like a proc and responds
|
72
|
+
# to #call and #to_proc. This method returns self, so transforms
|
73
|
+
# may be chained. They will be executed in the order that they are
|
74
|
+
# applied to the map.
|
75
|
+
#
|
76
|
+
# tform - The transform to add
|
77
|
+
#
|
78
|
+
# Returns self
|
79
|
+
def transform(tform)
|
80
|
+
@transforms << tform
|
81
|
+
@transform_procs << tform.to_proc
|
82
|
+
self
|
83
|
+
end
|
84
|
+
|
85
|
+
# Public: Executes the map defined by the source vectors, target vectors, and transforms.
|
86
|
+
#
|
87
|
+
# Returns the target dataframe.
|
88
|
+
def execute
|
89
|
+
inject_transforms_with_metadata
|
90
|
+
set_default_transform
|
91
|
+
map_to_target_df
|
92
|
+
end
|
93
|
+
|
94
|
+
# Public: Returns the number of source vectors defined
|
95
|
+
def source_cardinality
|
96
|
+
@source_vectors.size
|
97
|
+
end
|
98
|
+
|
99
|
+
# Public: Returns the number of target vectors defined
|
100
|
+
def target_cardinality
|
101
|
+
@target_vectors.size
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
def inject_transforms_with_metadata
|
110
|
+
@transforms.each do |tform|
|
111
|
+
if tform.respond_to? :source_metadata
|
112
|
+
meta = @source_vectors.map { |v| @source_metadata[v] || {} }
|
113
|
+
tform.source_metadata = meta.size > 1 ? meta : meta.first
|
114
|
+
end
|
115
|
+
if tform.respond_to? :target_metadata
|
116
|
+
meta = @target_vectors.map { |v| @target_metadata[v] || {} }
|
117
|
+
tform.target_metadata = meta.size > 1 ? meta : meta.first
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Private: If no transforms are defined, assume it's a simple copy
|
123
|
+
def set_default_transform
|
124
|
+
if @transforms.size == 0
|
125
|
+
transform(->(v) { v })
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Private: Converts the transformed data into vectors in the target dataframe.
|
130
|
+
def map_to_target_df
|
131
|
+
result_hash_of_arrays.each do |vector, values|
|
132
|
+
@target_df[vector] = Daru::Vector.new(values, index: @source_df.index)
|
133
|
+
end
|
134
|
+
|
135
|
+
@target_df
|
136
|
+
end
|
137
|
+
|
138
|
+
# Private: Splits the transformed rows into separate arrays, indexed by vector name
|
139
|
+
def result_hash_of_arrays
|
140
|
+
result = @target_vectors.each_with_object({}) { |v,h| h[v] = [] }
|
141
|
+
|
142
|
+
transformed_rows.each do |result_row|
|
143
|
+
result.keys.each do |vector|
|
144
|
+
result[vector] << result_row[vector]
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
result
|
149
|
+
end
|
150
|
+
|
151
|
+
# Private: Applies all of the transforms to each row.
|
152
|
+
def transformed_rows
|
153
|
+
work_rows.map do |row|
|
154
|
+
@transform_procs.each do |tform|
|
155
|
+
result = call_transform(tform, row)
|
156
|
+
row[*@target_vectors] = result if target_cardinality == 1
|
157
|
+
row[*@source_vectors] = result if source_cardinality == 1 && target_cardinality == 1
|
158
|
+
end
|
159
|
+
|
160
|
+
row
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
# Private: Applies the given transform to the given row.
|
165
|
+
#
|
166
|
+
# tform - The transform (proc).
|
167
|
+
# row - The row.
|
168
|
+
#
|
169
|
+
# Returns the return value of the transform.
|
170
|
+
def call_transform(tform, row)
|
171
|
+
if source_cardinality == 0 && target_cardinality == 1
|
172
|
+
tform.call
|
173
|
+
elsif source_cardinality == 1 && target_cardinality == 1
|
174
|
+
tform.call(row[*@source_vectors])
|
175
|
+
else
|
176
|
+
tform.call(row)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# Private: Returns a unique list of all vectors (source and target) invovled in the map.
|
181
|
+
def all_vectors
|
182
|
+
@all_vectors ||= (@source_vectors + @target_vectors).uniq
|
183
|
+
end
|
184
|
+
|
185
|
+
# Private: Returns a hash that maps vector names to an index
|
186
|
+
# The index is the position of the vector value for a row in #work_rows
|
187
|
+
def rows_index
|
188
|
+
@rows_index ||= all_vectors.each_with_index.to_h
|
189
|
+
end
|
190
|
+
|
191
|
+
# Private: Converts all of vectors involved in the map into an array of row objects.
|
192
|
+
def work_rows
|
193
|
+
all_vectors.map do |vector|
|
194
|
+
is_source_vector = @source_vectors.include? vector
|
195
|
+
|
196
|
+
if is_source_vector && @source_df.vectors.include?(vector)
|
197
|
+
@source_df[vector].to_a
|
198
|
+
elsif is_source_vector && @target_df.vectors.include?(vector)
|
199
|
+
@target_df[vector].to_a
|
200
|
+
else
|
201
|
+
Array.new(@source_df.size)
|
202
|
+
end
|
203
|
+
end.transpose.map do |row_as_array|
|
204
|
+
Row.new(rows_index, row_as_array, source_keys: @source_vectors)
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
module Remi
|
2
|
+
class SourceToTargetMap
|
3
|
+
|
4
|
+
# Public: A row is composed of an array and an index hash.
|
5
|
+
# The index hash converts a key into a number representing the position in the array.
|
6
|
+
# Functionally, it's very similar to how a hash works. However,
|
7
|
+
# we need to create a lot of Row objects that all have the same
|
8
|
+
# index hash. All of those row objects can reference the same
|
9
|
+
# index hash object and thus dramatically reduce the amount of memory
|
10
|
+
# needed store a lot of rows.
|
11
|
+
#
|
12
|
+
# Examples
|
13
|
+
#
|
14
|
+
# row = Row.new({ a: 1, b: 2}, ['alpha', 'beta'])
|
15
|
+
# row[:a] #=> 'alpha'
|
16
|
+
# row[:b] #=> 'beta'
|
17
|
+
class Row
|
18
|
+
|
19
|
+
# Public: Converts hash-like objects into rows, array-like objects into rows,
|
20
|
+
# or just returns a row if one is provied.
|
21
|
+
#
|
22
|
+
# arg - A Row, array-like object, or hash-like object.
|
23
|
+
#
|
24
|
+
# Examples:
|
25
|
+
#
|
26
|
+
# Row[{ a: 'one', b: 'two' }] #=> #<Row @index={:a=>0, :b=>1} @values=["one", "two"]>
|
27
|
+
# Returns a Row
|
28
|
+
def self.[](arg)
|
29
|
+
return arg if arg.is_a? Row
|
30
|
+
|
31
|
+
if arg.respond_to? :keys
|
32
|
+
Row.new(arg.keys.each_with_index.to_h, arg.values)
|
33
|
+
else
|
34
|
+
Row.new(0.upto(arg.size).each_with_index.to_h, arg)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
# Public: Initializes a row object.
|
40
|
+
#
|
41
|
+
# index - A hash containing keys that are usually symbols and values that
|
42
|
+
# represent a position in the values array.
|
43
|
+
# values - An array of values.
|
44
|
+
# source_keys - Array of keys that should be treated as data
|
45
|
+
# sources for a row transformation
|
46
|
+
def initialize(index, values, source_keys: nil)
|
47
|
+
@index = index
|
48
|
+
@inverted_index = index.invert
|
49
|
+
@values = values
|
50
|
+
@source_keys = source_keys || index.keys
|
51
|
+
end
|
52
|
+
|
53
|
+
# Public: Returns the value of the row array for the given key
|
54
|
+
def [](key)
|
55
|
+
@values[@index[key]]
|
56
|
+
end
|
57
|
+
|
58
|
+
# Public: Sets the value of the row array for the given key
|
59
|
+
def []=(key, value)
|
60
|
+
@values[@index[key]] = value
|
61
|
+
end
|
62
|
+
|
63
|
+
# Public: Makes Row enumerable, and acts like a hash.
|
64
|
+
def each &block
|
65
|
+
@values.each_with_index { |value, idx| block.call([@inverted_index[idx], value]) }
|
66
|
+
end
|
67
|
+
|
68
|
+
def each_source &block
|
69
|
+
Enumerator.new do |y|
|
70
|
+
source_keys.each { |key| y << [key, self[key]] }
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def each_target &block
|
75
|
+
Enumerator.new do |y|
|
76
|
+
target_keys.each { |key| y << [key, self[key]] }
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# Public: Returns the values stored in the row.
|
81
|
+
def to_a
|
82
|
+
@values
|
83
|
+
end
|
84
|
+
|
85
|
+
# Public: Returns the keys of the index.
|
86
|
+
def keys
|
87
|
+
@index.keys
|
88
|
+
end
|
89
|
+
|
90
|
+
def source_keys
|
91
|
+
@source_keys
|
92
|
+
end
|
93
|
+
|
94
|
+
def target_keys
|
95
|
+
@target_keys ||= keys - source_keys
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -1,117 +1,82 @@
|
|
1
1
|
module Remi
|
2
|
+
|
3
|
+
# Public: Class used to define a DSL for source to target maps.
|
4
|
+
#
|
5
|
+
# Examples
|
6
|
+
#
|
7
|
+
# SourceToTargetMap.apply(df) do
|
8
|
+
# map source(:a) .target(:aprime)
|
9
|
+
# .transform(->(v) { "#{v}prime" })
|
10
|
+
# map source(:a) .target(:aup)
|
11
|
+
# .transform(->(v) { "#{v.upcase}" })
|
12
|
+
# end
|
13
|
+
# #=> <Daru::DataFrame:70291322684920 @name = 8c546a52-c1a7-495a-996a-7f352b0087b7 @size = 3>
|
14
|
+
# a aprime aup
|
15
|
+
# 0 a1 a1prime A1
|
16
|
+
# 1 a2 a2prime A2
|
17
|
+
# 2 a3 a3prime A3
|
2
18
|
class SourceToTargetMap
|
19
|
+
|
20
|
+
# Public: Initializes the SourceToTargetMap DSL
|
21
|
+
#
|
22
|
+
# source_df - The source dataframe.
|
23
|
+
# target_df - The target dataframe (default: source_df).
|
24
|
+
# source_metadata - Metadata (Remi::Fields) for the source fields.
|
25
|
+
# target_metadata - Metadata (Remi::Fields) for the target fields.
|
3
26
|
def initialize(source_df, target_df=nil, source_metadata: Remi::Fields.new, target_metadata: Remi::Fields.new)
|
4
27
|
@source_df = source_df
|
5
28
|
@source_metadata = source_metadata
|
6
29
|
|
7
|
-
|
8
|
-
|
9
|
-
@target_metadata = target_metadata
|
10
|
-
else
|
11
|
-
@target_df = @source_df
|
12
|
-
@target_metadata = @source_metadata
|
13
|
-
end
|
14
|
-
|
15
|
-
reset_map
|
30
|
+
@target_df = target_df || source_df
|
31
|
+
@target_metadata = target_metadata || source_metadata
|
16
32
|
end
|
17
33
|
|
34
|
+
attr_reader :source_df, :target_df
|
35
|
+
|
36
|
+
# Public: Expects a block in which the DSL will be applied.
|
37
|
+
#
|
38
|
+
# Same arguments as the constructor.
|
39
|
+
#
|
40
|
+
# Returns the target dataframe.
|
18
41
|
def self.apply(source_df, target_df=nil, source_metadata: Remi::Fields.new, target_metadata: Remi::Fields.new, &block)
|
19
42
|
sttm = SourceToTargetMap.new(source_df, target_df, source_metadata: source_metadata, target_metadata: target_metadata)
|
20
43
|
Docile.dsl_eval(sttm, &block)
|
44
|
+
target_df || source_df
|
21
45
|
end
|
22
46
|
|
47
|
+
# Public: Adds a list of source vectors to a new mapping.
|
48
|
+
#
|
49
|
+
# source_vectors - A list of vector names.
|
50
|
+
#
|
51
|
+
# Returns a SourceToTargetMap::Map with the defined source vectors.
|
23
52
|
def source(*source_vectors)
|
24
|
-
|
25
|
-
self
|
26
|
-
end
|
27
|
-
|
28
|
-
def transform(*transforms)
|
29
|
-
@transforms += Array(transforms)
|
30
|
-
@transform_procs += Array(transforms).map { |t| t.to_proc }
|
31
|
-
self
|
53
|
+
new_map.source(*source_vectors)
|
32
54
|
end
|
33
55
|
|
56
|
+
# Public: Adds a list of targets vectors to a new mapping.
|
57
|
+
#
|
58
|
+
# target_vectors - A list of target names.
|
59
|
+
#
|
60
|
+
# Returns a SourceToTargetMap::Map with the defined target vectors.
|
34
61
|
def target(*target_vectors)
|
35
|
-
|
36
|
-
self
|
37
|
-
end
|
38
|
-
|
39
|
-
def reset_map
|
40
|
-
@source_vectors = []
|
41
|
-
@target_vectors = []
|
42
|
-
@transforms = []
|
43
|
-
@transform_procs = []
|
62
|
+
new_map.target(*target_vectors)
|
44
63
|
end
|
45
64
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
do_map_direct_copy
|
54
|
-
when @source_vectors.size == 1 && @target_vectors.size == 1
|
55
|
-
do_map_single_source_and_target_vector
|
56
|
-
else
|
57
|
-
do_map_generic
|
58
|
-
end
|
59
|
-
reset_map
|
65
|
+
# Public: Executes a mapping.
|
66
|
+
#
|
67
|
+
# defined_map - The SourceToTargetMap::Map object to execute
|
68
|
+
#
|
69
|
+
# Returns the target dataframe.
|
70
|
+
def map(defined_map)
|
71
|
+
defined_map.execute
|
60
72
|
end
|
61
73
|
|
62
74
|
|
63
|
-
|
64
75
|
private
|
65
76
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
meta = @source_vectors.map { |v| @source_metadata[v] || {} }
|
70
|
-
tform.source_metadata = meta.size > 1 ? meta : meta.first
|
71
|
-
end
|
72
|
-
if tform.respond_to? :target_metadata
|
73
|
-
meta = @target_vectors.map { |v| @target_metadata[v] || {} }
|
74
|
-
tform.target_metadata = meta.size > 1 ? meta : meta.first
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
def do_map_direct_copy
|
80
|
-
@target_vectors.each do |target_vector|
|
81
|
-
@target_df[target_vector] = @source_df[@source_vectors.first].dup
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
def do_map_single_source_and_target_vector
|
86
|
-
@target_df[@target_vectors.first] = @source_df[@source_vectors.first].recode do |vector_value|
|
87
|
-
@transform_procs.reduce(vector_value) { |value, tform| tform.call(*(value.nil? ? [nil] : value)) }
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
def do_map_generic
|
92
|
-
work_vector = if @source_vectors.size == 1 && @source_vectors.first != nil
|
93
|
-
@source_df[@source_vectors.first].dup
|
94
|
-
elsif @source_vectors.size > 1
|
95
|
-
# It's faster to zip together several vectors and recode those than it is to
|
96
|
-
# recode a dataframe row by row!
|
97
|
-
Daru::Vector.new(@source_df[@source_vectors.first].zip(*@source_vectors[1..-1].map { |name| @source_df[name] }), index: @source_df.index)
|
98
|
-
else
|
99
|
-
Daru::Vector.new([], index: @source_df.index)
|
100
|
-
end
|
101
|
-
|
102
|
-
work_vector.recode! do |vector_value|
|
103
|
-
@transform_procs.reduce(vector_value) { |value, tform| tform.call(*(value.nil? ? [nil] : value)) }
|
104
|
-
end
|
105
|
-
|
106
|
-
@target_vectors.each_with_index do |target_vector, vector_idx|
|
107
|
-
@target_df[target_vector] = work_vector.recode do |vector_value|
|
108
|
-
if vector_value.is_a?(Array) then
|
109
|
-
vector_value[vector_idx]
|
110
|
-
else
|
111
|
-
vector_value
|
112
|
-
end
|
113
|
-
end
|
114
|
-
end
|
77
|
+
# Public: Returns a new SourceToTargetMap::Map
|
78
|
+
def new_map
|
79
|
+
Map.new(@source_df, @target_df, source_metadata: @source_metadata, target_metadata: @target_metadata)
|
115
80
|
end
|
116
81
|
end
|
117
82
|
end
|
data/lib/remi/transform.rb
CHANGED
@@ -35,11 +35,11 @@ module Remi
|
|
35
35
|
# values - The values to be transformed.
|
36
36
|
#
|
37
37
|
# Returns the transformed value.
|
38
|
-
def call(*
|
39
|
-
if
|
40
|
-
to_proc.call
|
38
|
+
def call(*args)
|
39
|
+
if to_proc.arity == 0
|
40
|
+
to_proc.call
|
41
41
|
else
|
42
|
-
to_proc.call(
|
42
|
+
to_proc.call(*args)
|
43
43
|
end
|
44
44
|
end
|
45
45
|
|
@@ -135,8 +135,9 @@ module Remi
|
|
135
135
|
@delimiter = delimiter
|
136
136
|
end
|
137
137
|
|
138
|
-
def transform(
|
139
|
-
|
138
|
+
def transform(row)
|
139
|
+
row = SourceToTargetMap::Row[row]
|
140
|
+
row.each_source.map { |key, value| value.blank? ? nil : value }.compact.join(@delimiter)
|
140
141
|
end
|
141
142
|
end
|
142
143
|
|
@@ -188,8 +189,9 @@ module Remi
|
|
188
189
|
@default = default
|
189
190
|
end
|
190
191
|
|
191
|
-
def transform(
|
192
|
-
|
192
|
+
def transform(row)
|
193
|
+
row = SourceToTargetMap::Row[row]
|
194
|
+
row.each_source.find(->() { [nil, @default] }) { |key, value| !value.blank? }[1]
|
193
195
|
end
|
194
196
|
end
|
195
197
|
|
@@ -338,7 +340,10 @@ module Remi
|
|
338
340
|
@measure = measure
|
339
341
|
end
|
340
342
|
|
341
|
-
def transform(
|
343
|
+
def transform(row)
|
344
|
+
row = SourceToTargetMap::Row[row]
|
345
|
+
from_date = row[row.keys[0]]
|
346
|
+
to_date = row[row.keys[1]]
|
342
347
|
|
343
348
|
case @measure.to_sym
|
344
349
|
when :days
|
@@ -366,7 +371,7 @@ module Remi
|
|
366
371
|
@constant = constant
|
367
372
|
end
|
368
373
|
|
369
|
-
def transform
|
374
|
+
def transform
|
370
375
|
@constant
|
371
376
|
end
|
372
377
|
end
|
@@ -563,9 +568,10 @@ module Remi
|
|
563
568
|
# wildcards and match anything. The first row that matches wins
|
564
569
|
# and the sieve progression stops.
|
565
570
|
#
|
566
|
-
# sieve_df - The sieve, defined as a dataframe. The
|
567
|
-
#
|
568
|
-
#
|
571
|
+
# sieve_df - The sieve, defined as a dataframe. The names of the
|
572
|
+
# sieve vectors must correspond to the names of the
|
573
|
+
# vectors in the dataframe source to target map. The
|
574
|
+
# last vector in the sieve_df is used as the result of the sieve.
|
569
575
|
#
|
570
576
|
#
|
571
577
|
# Examples:
|
@@ -612,23 +618,26 @@ module Remi
|
|
612
618
|
class DataFrameSieve < Transform
|
613
619
|
def initialize(sieve_df, *args, **kargs, &block)
|
614
620
|
super
|
615
|
-
@
|
621
|
+
@sieve_table = sieve_df.transpose.to_h.values
|
616
622
|
end
|
617
623
|
|
618
|
-
|
619
|
-
|
624
|
+
|
625
|
+
def transform(row)
|
626
|
+
sieve_keys = @sieve_table.first.index.to_a
|
620
627
|
sieve_result_key = sieve_keys.pop
|
621
628
|
|
622
|
-
|
629
|
+
raise ArgumentError, "#{sieve_keys - row.source_keys} not found in row" unless (sieve_keys - row.source_keys).size == 0
|
630
|
+
|
631
|
+
@sieve_table.each.find do |sieve_row|
|
623
632
|
match_row = true
|
624
|
-
sieve_keys.
|
625
|
-
match_value = if sieve_row[
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
match_row &&= sieve_row[
|
633
|
+
sieve_keys.each do |sieve_key|
|
634
|
+
match_value = if sieve_row[sieve_key].is_a?(Regexp)
|
635
|
+
!!sieve_row[sieve_key].match(row[sieve_key])
|
636
|
+
else
|
637
|
+
sieve_row[sieve_key] == row[sieve_key]
|
638
|
+
end
|
639
|
+
|
640
|
+
match_row &&= sieve_row[sieve_key].nil? || match_value
|
632
641
|
end
|
633
642
|
match_row
|
634
643
|
end[sieve_result_key]
|
@@ -661,7 +670,7 @@ module Remi
|
|
661
670
|
attr_reader :buckets
|
662
671
|
attr_reader :current_population
|
663
672
|
|
664
|
-
def transform
|
673
|
+
def transform
|
665
674
|
get_next_value
|
666
675
|
end
|
667
676
|
|
data/lib/remi/version.rb
CHANGED
data/lib/remi.rb
CHANGED
@@ -38,6 +38,8 @@ require 'remi/version.rb'
|
|
38
38
|
require 'remi/settings'
|
39
39
|
require 'remi/job'
|
40
40
|
require 'remi/source_to_target_map'
|
41
|
+
require 'remi/source_to_target_map/map'
|
42
|
+
require 'remi/source_to_target_map/row'
|
41
43
|
require 'remi/field_symbolizers'
|
42
44
|
|
43
45
|
require 'remi/refinements/symbolizer'
|
@@ -0,0 +1,301 @@
|
|
1
|
+
require_relative 'remi_spec'
|
2
|
+
|
3
|
+
describe SourceToTargetMap do
|
4
|
+
let(:df) do
|
5
|
+
Remi::DataFrame::Daru.new(
|
6
|
+
[
|
7
|
+
['a1','b1','c1', ['d',1]],
|
8
|
+
['a2','b2','c2', ['d',2]],
|
9
|
+
['a3','b3','c3', ['d',3]],
|
10
|
+
].transpose,
|
11
|
+
order: [:a, :b, :c, :d]
|
12
|
+
)
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
let(:map) { SourceToTargetMap::Map.new(df, df) }
|
17
|
+
|
18
|
+
describe 'one-to-one maps' do
|
19
|
+
shared_examples_for 'one-to-one map' do
|
20
|
+
it 'provides a value to the transform, and expects a return value' do
|
21
|
+
expect(result).to eq ['a1prime', 'a2prime', 'a3prime']
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'accepts chained transformations with the same source/target cardinality' do
|
25
|
+
map.transform(->(v) { "#{v}-prime" })
|
26
|
+
expect(result).to eq ['a1prime-prime', 'a2prime-prime', 'a3prime-prime']
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
context 'standard use' do
|
31
|
+
before { map.source(:a) .target(:aprime) .transform(->(v) { "#{v}prime" }) }
|
32
|
+
|
33
|
+
let(:result) do
|
34
|
+
map.execute
|
35
|
+
df[:aprime].to_a
|
36
|
+
end
|
37
|
+
|
38
|
+
it_behaves_like 'one-to-one map'
|
39
|
+
end
|
40
|
+
|
41
|
+
context 'the source and target have the same name' do
|
42
|
+
before { map.source(:a) .target(:a) .transform(->(v) { "#{v}prime" }) }
|
43
|
+
|
44
|
+
let(:result) do
|
45
|
+
map.execute
|
46
|
+
df[:a].to_a
|
47
|
+
end
|
48
|
+
|
49
|
+
it_behaves_like 'one-to-one map'
|
50
|
+
end
|
51
|
+
|
52
|
+
context 'without any transforms', wip: true do
|
53
|
+
before { map.source(:a) .target(:aprime) }
|
54
|
+
|
55
|
+
let(:result) do
|
56
|
+
map.execute
|
57
|
+
df[:aprime].to_a
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'copies data from source to target' do
|
61
|
+
expect(result).to eq ['a1', 'a2', 'a3']
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
context 'source and target dataframe are different' do
|
67
|
+
let(:map) { SourceToTargetMap::Map.new(df, df_target) }
|
68
|
+
|
69
|
+
context 'vectors referenced in the source only exist on the target' do
|
70
|
+
let(:df_target) do
|
71
|
+
Remi::DataFrame::Daru.new({ a_in_target: [ 'a1target', 'a2target', 'a3target' ] }, index: df.index)
|
72
|
+
end
|
73
|
+
|
74
|
+
before { map.source(:a_in_target) .target(:aprime) .transform(->(v) { "#{v}prime" }) }
|
75
|
+
|
76
|
+
let(:result) do
|
77
|
+
map.execute
|
78
|
+
df_target[:aprime].to_a
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'uses the target values' do
|
82
|
+
expect(result).to eq ['a1targetprime', 'a2targetprime', 'a3targetprime']
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
context 'vectors referenced in the source exist on both source and target' do
|
87
|
+
let(:df_target) do
|
88
|
+
Remi::DataFrame::Daru.new({ a: [ 'a1target', 'a2target', 'a3target' ] }, index: df.index)
|
89
|
+
end
|
90
|
+
|
91
|
+
before { map.source(:a) .target(:aprime) .transform(->(v) { "#{v}prime" }) }
|
92
|
+
|
93
|
+
let(:result) do
|
94
|
+
map.execute
|
95
|
+
df_target[:aprime].to_a
|
96
|
+
end
|
97
|
+
|
98
|
+
it 'uses the source values' do
|
99
|
+
expect(result).to eq ['a1prime', 'a2prime', 'a3prime']
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
describe 'one-to-one maps where the source and target have the same name' do
|
107
|
+
before { map.source(:a) .target(:a) .transform(->(v) { "#{v}prime" }) }
|
108
|
+
|
109
|
+
let(:result) do
|
110
|
+
map.execute
|
111
|
+
df[:a].to_a
|
112
|
+
end
|
113
|
+
|
114
|
+
it 'provides a value to the transform, and expects a return value' do
|
115
|
+
expect(result).to eq ['a1prime', 'a2prime', 'a3prime']
|
116
|
+
end
|
117
|
+
|
118
|
+
it 'accepts chained transformations with the same source/target cardinality' do
|
119
|
+
map.transform(->(v) { "#{v}-prime" })
|
120
|
+
expect(result).to eq ['a1prime-prime', 'a2prime-prime', 'a3prime-prime']
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
describe 'many-to-one maps' do
|
125
|
+
before { map.source(:a,:b) .target(:ab) .transform(->(row) { row[:a] + row[:b] }) }
|
126
|
+
|
127
|
+
let(:result) do
|
128
|
+
map.execute
|
129
|
+
df[:ab].to_a
|
130
|
+
end
|
131
|
+
|
132
|
+
it 'provides a row to the transform, and expects a return value' do
|
133
|
+
expect(result).to eq ['a1b1', 'a2b2', 'a3b3']
|
134
|
+
end
|
135
|
+
|
136
|
+
it 'accepts chained transformations with the same source/target cardinality' do
|
137
|
+
map.transform(->(row) { "-#{row[:ab]}-" })
|
138
|
+
expect(result).to eq ['-a1b1-', '-a2b2-', '-a3b3-']
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
describe 'one-to-many maps' do
|
143
|
+
before do
|
144
|
+
map.source(:a) .target(:a_col, :a_row)
|
145
|
+
.transform(->(row) {
|
146
|
+
row[:a_col] = row[:a][0]
|
147
|
+
row[:a_row] = row[:a][1]
|
148
|
+
})
|
149
|
+
end
|
150
|
+
|
151
|
+
let(:result) do
|
152
|
+
map.execute
|
153
|
+
df[:a_col, :a_row].to_h.each_with_object({}) { |(k,v), h| h[k] = v.to_a }
|
154
|
+
end
|
155
|
+
|
156
|
+
it 'provides a row to the transform and expects the row to be populated' do
|
157
|
+
expect(result).to eq({ :a_col => ['a', 'a', 'a'], :a_row => ['1', '2', '3'] })
|
158
|
+
end
|
159
|
+
|
160
|
+
it 'accepts chained transformations with the same source/target cardinality' do
|
161
|
+
map.transform(->(row) {
|
162
|
+
row[:a_col] = "COL#{row[:a_col]}"
|
163
|
+
row[:a_row] = "ROW#{row[:a_row]}"
|
164
|
+
})
|
165
|
+
|
166
|
+
expect(result).to eq({ :a_col => ['COLa', 'COLa', 'COLa'], :a_row => ['ROW1', 'ROW2', 'ROW3'] })
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
describe 'many-to-many maps' do
|
171
|
+
before do
|
172
|
+
map.source(:b, :c) .target(:b_is_c, :c_is_b)
|
173
|
+
.transform(->(row) {
|
174
|
+
row[:b], row[:c] = row[:c], row[:b]
|
175
|
+
row[:b_is_c] = row[:b]
|
176
|
+
row[:c_is_b] = row[:c]
|
177
|
+
})
|
178
|
+
end
|
179
|
+
|
180
|
+
let(:result) do
|
181
|
+
map.execute
|
182
|
+
df[:b_is_c, :c_is_b].to_h.each_with_object({}) { |(k,v), h| h[k] = v.to_a }
|
183
|
+
end
|
184
|
+
|
185
|
+
it 'provides a row to the transform and expects the row to be populated' do
|
186
|
+
expect(result).to eq({ :b_is_c => ['c1', 'c2', 'c3'], :c_is_b => ['b1', 'b2', 'b3'] })
|
187
|
+
end
|
188
|
+
|
189
|
+
it 'does not modify source vectors' do
|
190
|
+
map.execute
|
191
|
+
source_vectors = df[:b, :c].to_h.each_with_object({}) { |(k,v), h| h[k] = v.to_a }
|
192
|
+
expect(source_vectors).to eq({ :b => ['b1', 'b2', 'b3'], :c => ['c1', 'c2', 'c3'] })
|
193
|
+
end
|
194
|
+
|
195
|
+
it 'accepts chained transformations with the same source/target cardinality' do
|
196
|
+
map.transform(->(row) {
|
197
|
+
row[:b_is_c] = row[:b_is_c].reverse
|
198
|
+
row[:c_is_b] = row[:c_is_b].reverse
|
199
|
+
})
|
200
|
+
|
201
|
+
expect(result).to eq({ :b_is_c => ['1c', '2c', '3c'], :c_is_b => ['1b', '2b', '3b'] })
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
describe 'zero-to-one maps' do
|
206
|
+
before do
|
207
|
+
values = ['x1', 'x2', 'x3']
|
208
|
+
map.target(:x) .transform(->() { values.shift })
|
209
|
+
end
|
210
|
+
|
211
|
+
let(:result) do
|
212
|
+
map.execute
|
213
|
+
df[:x].to_a
|
214
|
+
end
|
215
|
+
|
216
|
+
it 'expects no argument and expects a return value' do
|
217
|
+
expect(result).to eq ['x1', 'x2', 'x3']
|
218
|
+
end
|
219
|
+
|
220
|
+
it 'accepts chained transformations with the same source/target cardinality' do
|
221
|
+
map.transform(->() { 'useless' })
|
222
|
+
expect(result).to eq ['useless']*3
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
describe 'zero-to-many maps' do
|
227
|
+
before do
|
228
|
+
values = ['x1', 'x2', 'x3']
|
229
|
+
map.target(:x_col, :x_row)
|
230
|
+
.transform(->(row) {
|
231
|
+
x = values.shift
|
232
|
+
row[:x_col] = x[0]
|
233
|
+
row[:x_row] = x[1]
|
234
|
+
})
|
235
|
+
end
|
236
|
+
|
237
|
+
let(:result) do
|
238
|
+
map.execute
|
239
|
+
df[:x_col, :x_row].to_h.each_with_object({}) { |(k,v), h| h[k] = v.to_a }
|
240
|
+
end
|
241
|
+
|
242
|
+
it 'provides a row to the transform and expects the row to be populated' do
|
243
|
+
expect(result).to eq({ :x_col => ['x', 'x', 'x'], :x_row => ['1', '2', '3'] })
|
244
|
+
end
|
245
|
+
|
246
|
+
it 'accepts chained transformations with the same source/target cardinality' do
|
247
|
+
map.transform(->(row) { row[:x_row] = "ROW#{row[:x_row]}" })
|
248
|
+
expect(result).to eq({ :x_col => ['x', 'x', 'x'], :x_row => ['ROW1', 'ROW2', 'ROW3'] })
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
describe 'vectors containing arrays' do
|
253
|
+
it 'provides the array as a value the transform with one-to-one maps' do
|
254
|
+
map.source(:d) .target(:dprime)
|
255
|
+
.transform(->(v) { v.join('-') })
|
256
|
+
map.execute
|
257
|
+
|
258
|
+
expect(df[:dprime].to_a).to eq ['d-1', 'd-2', 'd-3']
|
259
|
+
end
|
260
|
+
|
261
|
+
it 'provides the array in the row with one-to-many maps' do
|
262
|
+
map.source(:d) .target(:d_col, :d_row)
|
263
|
+
.transform(->(row) {
|
264
|
+
row[:d_col] = row[:d].first
|
265
|
+
row[:d_row] = row[:d].last
|
266
|
+
})
|
267
|
+
map.execute
|
268
|
+
|
269
|
+
result = df[:d_col, :d_row].to_h.each_with_object({}) { |(k,v), h| h[k] = v.to_a }
|
270
|
+
expect(result).to eq({ :d_col => ['d', 'd', 'd'], :d_row => [1, 2, 3] })
|
271
|
+
end
|
272
|
+
end
|
273
|
+
|
274
|
+
describe 'using the DSL' do
|
275
|
+
let(:sttm) do
|
276
|
+
SourceToTargetMap.apply(df) do
|
277
|
+
map source(:a) .target(:aprime)
|
278
|
+
.transform(->(v) { "#{v}prime" })
|
279
|
+
map source(:a) .target(:aprimeprime)
|
280
|
+
.transform(->(v) { "#{v}prime" })
|
281
|
+
.transform(->(v) { "#{v}-prime" })
|
282
|
+
map source(:a, :d) .target(:ad)
|
283
|
+
.transform(->(row) { "#{row[:a][0]}-#{row[:d].first}-#{row[:d].last}" })
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
it 'allows one to specify multiple source-to-target maps in one block' do
|
288
|
+
sttm
|
289
|
+
result = df[:aprime, :aprimeprime, :ad].to_h.each_with_object({}) { |(k,v), h| h[k] = v.to_a }
|
290
|
+
expect(result).to eq({
|
291
|
+
:aprime => ['a1prime', 'a2prime', 'a3prime'],
|
292
|
+
:aprimeprime => ['a1prime-prime', 'a2prime-prime', 'a3prime-prime'],
|
293
|
+
:ad => ['a-d-1', 'a-d-2', 'a-d-3']
|
294
|
+
})
|
295
|
+
end
|
296
|
+
|
297
|
+
it 'returns a dataframe' do
|
298
|
+
expect(sttm).to be_a(Remi::DataFrame::Daru)
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.38
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sterling Paramore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-06-
|
11
|
+
date: 2016-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bond
|
@@ -194,6 +194,7 @@ files:
|
|
194
194
|
- features/step_definitions/remi_step.rb
|
195
195
|
- features/support/env.rb
|
196
196
|
- features/support/env_app.rb
|
197
|
+
- features/transforms/concatenate.feature
|
197
198
|
- features/transforms/data_frame_sieve.feature
|
198
199
|
- features/transforms/date_diff.feature
|
199
200
|
- features/transforms/nvl.feature
|
@@ -211,6 +212,7 @@ files:
|
|
211
212
|
- jobs/parameters_job.rb
|
212
213
|
- jobs/sample_job.rb
|
213
214
|
- jobs/sftp_file_target_job.rb
|
215
|
+
- jobs/transforms/concatenate_job.rb
|
214
216
|
- jobs/transforms/data_frame_sieve_job.rb
|
215
217
|
- jobs/transforms/date_diff_job.rb
|
216
218
|
- jobs/transforms/nvl_job.rb
|
@@ -244,6 +246,8 @@ files:
|
|
244
246
|
- lib/remi/settings.rb
|
245
247
|
- lib/remi/sf_bulk_helper.rb
|
246
248
|
- lib/remi/source_to_target_map.rb
|
249
|
+
- lib/remi/source_to_target_map/map.rb
|
250
|
+
- lib/remi/source_to_target_map/row.rb
|
247
251
|
- lib/remi/transform.rb
|
248
252
|
- lib/remi/version.rb
|
249
253
|
- remi.gemspec
|
@@ -259,6 +263,7 @@ files:
|
|
259
263
|
- spec/fixtures/unsupported_escape.csv
|
260
264
|
- spec/metadata_spec.rb
|
261
265
|
- spec/remi_spec.rb
|
266
|
+
- spec/source_to_target_map_spec.rb
|
262
267
|
- spec/transform_spec.rb
|
263
268
|
- workbooks/sample_workbook.ipynb
|
264
269
|
- workbooks/workbook_helper.rb
|
@@ -299,6 +304,7 @@ test_files:
|
|
299
304
|
- features/step_definitions/remi_step.rb
|
300
305
|
- features/support/env.rb
|
301
306
|
- features/support/env_app.rb
|
307
|
+
- features/transforms/concatenate.feature
|
302
308
|
- features/transforms/data_frame_sieve.feature
|
303
309
|
- features/transforms/date_diff.feature
|
304
310
|
- features/transforms/nvl.feature
|
@@ -319,4 +325,5 @@ test_files:
|
|
319
325
|
- spec/fixtures/unsupported_escape.csv
|
320
326
|
- spec/metadata_spec.rb
|
321
327
|
- spec/remi_spec.rb
|
328
|
+
- spec/source_to_target_map_spec.rb
|
322
329
|
- spec/transform_spec.rb
|