remi 0.2.37 → 0.2.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +78 -0
- data/features/json.feature +0 -1
- data/features/transforms/concatenate.feature +30 -0
- data/features/transforms/date_diff.feature +1 -2
- data/jobs/json_job.rb +2 -6
- data/jobs/parameters_job.rb +1 -1
- data/jobs/sample_job.rb +20 -11
- data/jobs/transforms/concatenate_job.rb +21 -0
- data/jobs/transforms/date_diff_job.rb +4 -1
- data/jobs/transforms/partitioner_job.rb +1 -1
- data/lib/remi/source_to_target_map/map.rb +209 -0
- data/lib/remi/source_to_target_map/row.rb +99 -0
- data/lib/remi/source_to_target_map.rb +55 -90
- data/lib/remi/transform.rb +35 -26
- data/lib/remi/version.rb +1 -1
- data/lib/remi.rb +2 -0
- data/spec/source_to_target_map_spec.rb +301 -0
- metadata +9 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a59538393438e759c02554c7dac61c914841e468
|
|
4
|
+
data.tar.gz: 181df9c16e528b0d1315e992fb25a97cc711c678
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 92520cd0b2dc002879bfef7cfaf78e8e6f4a3609b121d23c9a04de054fcd86e9dfe8fd1d06b0cef3870a486c948a29994ed58eac6c8caefdd0fbab4d7b06fc8a
|
|
7
|
+
data.tar.gz: f50d012217b786c3fdebd97caa1a183545a6e99689f82fce9e888a933a33cd96247b31f63ab0265a80cb443a9cd9258237884a464747ebb75180e481b64d49c4
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
|
@@ -48,6 +48,84 @@ Examples setting up a job class with
|
|
|
48
48
|
* parameters
|
|
49
49
|
* maps
|
|
50
50
|
|
|
51
|
+
|
|
52
|
+
### Transform cardinality
|
|
53
|
+
|
|
54
|
+
Within a source-to-target map block, there are a few different
|
|
55
|
+
possible transform cardinalities: one-to-one, many-to-one, one-to-many,
|
|
56
|
+
many-to-many, zero-to-one, and zero-to-many. The lambda functions that
|
|
57
|
+
are supplied to `#transfrom` method must satisfy different conditions based
|
|
58
|
+
on cardinality.
|
|
59
|
+
|
|
60
|
+
For all of the following examples, we'll assume that a dataframe exists defined by
|
|
61
|
+
````ruby
|
|
62
|
+
df = Remi::DataFrame::Daru.new(
|
|
63
|
+
[
|
|
64
|
+
['a1','b1','c1', ['d',1]],
|
|
65
|
+
['a2','b2','c2', ['d',2]],
|
|
66
|
+
['a3','b3','c3', ['d',3]],
|
|
67
|
+
].transpose,
|
|
68
|
+
order: [:a, :b, :c, :d]
|
|
69
|
+
)
|
|
70
|
+
````
|
|
71
|
+
|
|
72
|
+
**one-to-one** - These maps expect a lambda that accepts the value of a
|
|
73
|
+
field as an argument and returns the result of some operation, which
|
|
74
|
+
is used to populate the target.
|
|
75
|
+
|
|
76
|
+
````ruby
|
|
77
|
+
Remi::SourceToTargetMap.apply(df) do
|
|
78
|
+
map source(:a) .target(:aprime)
|
|
79
|
+
.transform(->(v) { "#{v}prime" })
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
df[:aprime].to_a #=> ['a1prime', 'a2prime', 'a3prime']
|
|
83
|
+
````
|
|
84
|
+
|
|
85
|
+
**many-to-one** - These maps expect that the lambda accepts a row object as an argument
|
|
86
|
+
and returns the result of the operation, which is used to populate the target.
|
|
87
|
+
|
|
88
|
+
````ruby
|
|
89
|
+
Remi::SourceToTargetMap.apply(df) do
|
|
90
|
+
map source(:a, :b) .target(:ab)
|
|
91
|
+
.transform(->(row) { "#{row[:a]}#{row[:b]}" })
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
df[:ab].to_a #=> ['a1b1', 'a2b2', 'a3b3']
|
|
95
|
+
````
|
|
96
|
+
|
|
97
|
+
**zero-to-many/one-to-many/many-to-many** - These maps expect that the
|
|
98
|
+
lambda accepts a row object as an argument. The row object is then
|
|
99
|
+
modified in place, which is used to populate the targets. The return
|
|
100
|
+
value of the lambda is ignored.
|
|
101
|
+
|
|
102
|
+
````ruby
|
|
103
|
+
Remi::SourceToTargetMap.apply(df) do
|
|
104
|
+
map source(:a, :b) .target(:aprime, :ab)
|
|
105
|
+
.transform(->(row) {
|
|
106
|
+
row[:aprime] = row[:a]
|
|
107
|
+
row[:ab] = "#{row[:a]}#{row[:b]}" })
|
|
108
|
+
})
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
df[:aprime].to_a #=> ['a1prime', 'a2prime', 'a3prime']
|
|
112
|
+
df[:ab].to_a #=> ['a1b1', 'a2b2', 'a3b3']
|
|
113
|
+
````
|
|
114
|
+
|
|
115
|
+
**zero-to-one** - These maps expect that the lambda accepts no arguments and returns the
|
|
116
|
+
result of some operation, which is used to populate the target.
|
|
117
|
+
|
|
118
|
+
````ruby
|
|
119
|
+
Remi::SourceToTargetMap.apply(df) do
|
|
120
|
+
counter = 1.upto(3).to_a
|
|
121
|
+
map target(:counter)
|
|
122
|
+
.transform(->() { counter.pop })
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
df[:counter].to_a #=> [1, 2, 3]
|
|
126
|
+
````
|
|
127
|
+
|
|
128
|
+
|
|
51
129
|
## Business Rules
|
|
52
130
|
|
|
53
131
|
TODO: Description of writing Business Rules.
|
data/features/json.feature
CHANGED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
Feature: Test the concatenate transformer.
|
|
2
|
+
|
|
3
|
+
Background:
|
|
4
|
+
Given the job is 'Concatenate'
|
|
5
|
+
And the job source 'Source Data'
|
|
6
|
+
And the job target 'Target Data'
|
|
7
|
+
|
|
8
|
+
Scenario Outline: Performing a concatenation
|
|
9
|
+
Given the source 'Source Data'
|
|
10
|
+
And the target 'Target Data'
|
|
11
|
+
|
|
12
|
+
And the source field 'Field1' is set to the value "<Field1>"
|
|
13
|
+
And the source field 'Field2' is set to the value "<Field2>"
|
|
14
|
+
And the source field 'Field3' is set to the value "<Field3>"
|
|
15
|
+
And the job parameter 'delimiter' is "<Delimiter>"
|
|
16
|
+
Then the target field 'Result Field' is set to the value "<Expected>"
|
|
17
|
+
|
|
18
|
+
Examples:
|
|
19
|
+
| Field1 | Field2 | Field3 | Delimiter | Expected |
|
|
20
|
+
| A | B | C | , | A,B,C |
|
|
21
|
+
| | B | C | - | B-C |
|
|
22
|
+
| | | C | , | C |
|
|
23
|
+
| | | | , | |
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
Scenario: Testing a concatenation with the short form version
|
|
27
|
+
Given the source 'Source Data'
|
|
28
|
+
And the target 'Target Data'
|
|
29
|
+
|
|
30
|
+
Then the target field 'Result Field' is a concatenation of the source fields 'Field1', 'Field2', 'Field3', delimited by ","
|
|
@@ -8,8 +8,7 @@ Feature: Tests the date_diff transform
|
|
|
8
8
|
And the source 'Source Data'
|
|
9
9
|
And the target 'Target Data'
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
Scenario Outline: Calculating date difference in days2.
|
|
11
|
+
Scenario Outline: Calculating date difference in days.
|
|
13
12
|
Given the job parameter 'measure' is "days"
|
|
14
13
|
And the source field 'Date1' has the value "<Date1>"
|
|
15
14
|
And the source field 'Date2' has the value "<Date2>"
|
data/jobs/json_job.rb
CHANGED
|
@@ -18,13 +18,9 @@ class JsonJob
|
|
|
18
18
|
define_transform :main do
|
|
19
19
|
Remi::SourceToTargetMap.apply(source_data.df, target_data.df, source_metadata: source_data.fields) do
|
|
20
20
|
map source(:json_array) .target(:second_element)
|
|
21
|
-
.transform(->(
|
|
22
|
-
# This is NOT the way I would like it to work, but we need to do some work on STTM first
|
|
21
|
+
.transform(->(values) { values[1] })
|
|
23
22
|
map source(:json_hash) .target(:name_field)
|
|
24
|
-
.transform(->(
|
|
25
|
-
# preferred
|
|
26
|
-
# map source(:json_hash) .target(:name_field)
|
|
27
|
-
# .transform(->(json_hash) { json_hash['name'] })
|
|
23
|
+
.transform(->(json_hash) { json_hash['name'] })
|
|
28
24
|
end
|
|
29
25
|
end
|
|
30
26
|
end
|
data/jobs/parameters_job.rb
CHANGED
|
@@ -13,7 +13,7 @@ class ParametersJob
|
|
|
13
13
|
|
|
14
14
|
define_transform :main do
|
|
15
15
|
Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
|
|
16
|
-
map
|
|
16
|
+
map target(:myparam)
|
|
17
17
|
.transform(Remi::Transform::Constant.new(params[:myparam]))
|
|
18
18
|
map source(:parameter_name) .target(:parameter_name)
|
|
19
19
|
.transform(->(v) { params[v.to_sym] })
|
data/jobs/sample_job.rb
CHANGED
|
@@ -92,9 +92,12 @@ class SampleJob
|
|
|
92
92
|
Remi::SourceToTargetMap.apply(all_contacts.df) do
|
|
93
93
|
|
|
94
94
|
# Prefixes source id record and then looks up existing salesforce Id
|
|
95
|
+
prefixer = Remi::Transform::Prefix.new('SAMP')
|
|
95
96
|
map source(:student_id) .target(:External_ID__c, :Id)
|
|
96
|
-
.transform(
|
|
97
|
-
|
|
97
|
+
.transform(->(row) {
|
|
98
|
+
row[:External_ID__c] = prefixer.call(row[:student_id])
|
|
99
|
+
row[:Id] = student_id_to_sf_id[row[:External_ID__c]]
|
|
100
|
+
})
|
|
98
101
|
end
|
|
99
102
|
end
|
|
100
103
|
|
|
@@ -102,9 +105,11 @@ class SampleJob
|
|
|
102
105
|
define_transform :map_creates, sources: :all_contacts, targets: :contact_creates do
|
|
103
106
|
|
|
104
107
|
work_contact_creates = all_contacts.df.where(all_contacts.df[:Id].eq(nil))
|
|
108
|
+
|
|
105
109
|
Remi::SourceToTargetMap.apply(work_contact_creates) do
|
|
106
110
|
|
|
107
111
|
map source(:school_id) .target(:School_ID__c)
|
|
112
|
+
|
|
108
113
|
map source(:school_name) .target(:School_Name__c)
|
|
109
114
|
map source(:first_name) .target(:FirstName)
|
|
110
115
|
.transform(Remi::Transform::IfBlank.new('Not Provided'))
|
|
@@ -122,16 +127,20 @@ class SampleJob
|
|
|
122
127
|
.transform(Remi::Transform::FormatDate.new(in_format: sample_file.fields[:applied_date][:in_format]))
|
|
123
128
|
|
|
124
129
|
map source(:mailing_address_line_1, :mailing_address_line_2) .target(:MailingStreet)
|
|
125
|
-
.transform(->(
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
+
.transform(->(row) {
|
|
131
|
+
if row[:mailing_address_line_1].blank?
|
|
132
|
+
''
|
|
133
|
+
else
|
|
134
|
+
[row[:mailing_address_line_1], row[:mailing_address_line_2]].join(', ')
|
|
135
|
+
end
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
if_blank_unknown = Remi::Transform::IfBlank.new("Unknown")
|
|
130
139
|
map source(:school_id, :school_name) .target(:School__c)
|
|
131
|
-
.transform(->(
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
140
|
+
.transform(->(row) {
|
|
141
|
+
row[:school_id] = if_blank_unknown.call(row[:school_id])
|
|
142
|
+
row[:school_name] = if_blank_unknown.call(row[:school_name])
|
|
143
|
+
})
|
|
135
144
|
.transform(Remi::Transform::Concatenate.new('-'))
|
|
136
145
|
|
|
137
146
|
map source(:current_email) .target(:Email)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
require_relative '../all_jobs_shared'
|
|
2
|
+
|
|
3
|
+
class ConcatenateJob
|
|
4
|
+
include AllJobsShared
|
|
5
|
+
|
|
6
|
+
define_param :delimiter, ','
|
|
7
|
+
define_source :source_data, Remi::DataSource::DataFrame,
|
|
8
|
+
fields: {
|
|
9
|
+
:field1 => {},
|
|
10
|
+
:field2 => {},
|
|
11
|
+
:field3 => {}
|
|
12
|
+
}
|
|
13
|
+
define_target :target_data, Remi::DataTarget::DataFrame
|
|
14
|
+
|
|
15
|
+
define_transform :main, sources: :source_data, targets: :target_data do
|
|
16
|
+
Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
|
|
17
|
+
map source(:field1, :field2, :field3) .target(:result_field)
|
|
18
|
+
.transform(Remi::Transform::Concatenate.new(params[:delimiter]))
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -14,7 +14,10 @@ class DateDiffJob
|
|
|
14
14
|
define_transform :main, sources: :source_data, targets: :target_data do
|
|
15
15
|
Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
|
|
16
16
|
map source(:date1, :date2) .target(:difference)
|
|
17
|
-
.transform(->(
|
|
17
|
+
.transform(->(row) {
|
|
18
|
+
row[:date1] = Date.strptime(row[:date1])
|
|
19
|
+
row[:date2] = Date.strptime(row[:date2])
|
|
20
|
+
})
|
|
18
21
|
.transform(Remi::Transform::DateDiff.new(params[:measure]))
|
|
19
22
|
end
|
|
20
23
|
end
|
|
@@ -28,7 +28,7 @@ class PartitionerJob
|
|
|
28
28
|
current_population_hash = current_population.df.map(:row) { |row| [row[:group], row[:count].to_i] }.to_h
|
|
29
29
|
|
|
30
30
|
Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
|
|
31
|
-
map
|
|
31
|
+
map target(:group)
|
|
32
32
|
.transform(Remi::Transform::Partitioner.new(buckets: distribution_hash, initial_population: current_population_hash))
|
|
33
33
|
end
|
|
34
34
|
end
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
module Remi
|
|
2
|
+
class SourceToTargetMap
|
|
3
|
+
|
|
4
|
+
# Public: Class used to perform source to target mappings.
|
|
5
|
+
#
|
|
6
|
+
# Examples
|
|
7
|
+
#
|
|
8
|
+
# # One-to-one map
|
|
9
|
+
# map = Map.new(source_df, target_df)
|
|
10
|
+
# map.source(:a).target(:aprime)
|
|
11
|
+
# .transform(->(v) { "#{v}prime" })
|
|
12
|
+
# # see tests for more
|
|
13
|
+
class Map
|
|
14
|
+
|
|
15
|
+
# Public: Initializes a map
|
|
16
|
+
#
|
|
17
|
+
# source_df - The source dataframe.
|
|
18
|
+
# target_df - The target dataframe (default: source_df).
|
|
19
|
+
# source_metadata - Metadata (Remi::Fields) for the source fields.
|
|
20
|
+
# target_metadata - Metadata (Remi::Fields) for the target fields.
|
|
21
|
+
def initialize(source_df, target_df, source_metadata: Remi::Fields.new, target_metadata: Remi::Fields.new)
|
|
22
|
+
@source_df = source_df
|
|
23
|
+
@target_df = target_df
|
|
24
|
+
|
|
25
|
+
@source_metadata = source_metadata
|
|
26
|
+
@target_metadata = target_metadata
|
|
27
|
+
|
|
28
|
+
@source_vectors = []
|
|
29
|
+
@target_vectors = []
|
|
30
|
+
@transforms = []
|
|
31
|
+
@transform_procs = []
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Public: Returns the map's source dataframe
|
|
35
|
+
attr_reader :source_df
|
|
36
|
+
|
|
37
|
+
# Public: Returns the map's target dataframe
|
|
38
|
+
attr_reader :target_df
|
|
39
|
+
|
|
40
|
+
# Public: Returns all of the map's source vectors
|
|
41
|
+
attr_reader :source_vectors
|
|
42
|
+
|
|
43
|
+
# Public: Returns all of the map's target vectors
|
|
44
|
+
attr_reader :target_vectors
|
|
45
|
+
|
|
46
|
+
# Public: Returns all of the map's defined transforms
|
|
47
|
+
attr_reader :transforms
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# Public: Adds a list of source vectors to a map
|
|
51
|
+
#
|
|
52
|
+
# source_vectors - A list of source vectors.
|
|
53
|
+
#
|
|
54
|
+
# Returns self
|
|
55
|
+
def source(*source_vectors)
|
|
56
|
+
@source_vectors += Array(source_vectors)
|
|
57
|
+
self
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Public: Adds a list of target vectors to a map
|
|
61
|
+
#
|
|
62
|
+
# target_vectors - A list of target vectors.
|
|
63
|
+
#
|
|
64
|
+
# Returns self
|
|
65
|
+
def target(*target_vectors)
|
|
66
|
+
@target_vectors += Array(target_vectors)
|
|
67
|
+
self
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Public: Adds a transform to the map
|
|
71
|
+
# A transform is an object that behaves like a proc and responds
|
|
72
|
+
# to #call and #to_proc. This method returns self, so transforms
|
|
73
|
+
# may be chained. They will be executed in the order that they are
|
|
74
|
+
# applied to the map.
|
|
75
|
+
#
|
|
76
|
+
# tform - The transform to add
|
|
77
|
+
#
|
|
78
|
+
# Returns self
|
|
79
|
+
def transform(tform)
|
|
80
|
+
@transforms << tform
|
|
81
|
+
@transform_procs << tform.to_proc
|
|
82
|
+
self
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Public: Executes the map defined by the source vectors, target vectors, and transforms.
|
|
86
|
+
#
|
|
87
|
+
# Returns the target dataframe.
|
|
88
|
+
def execute
|
|
89
|
+
inject_transforms_with_metadata
|
|
90
|
+
set_default_transform
|
|
91
|
+
map_to_target_df
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Public: Returns the number of source vectors defined
|
|
95
|
+
def source_cardinality
|
|
96
|
+
@source_vectors.size
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Public: Returns the number of target vectors defined
|
|
100
|
+
def target_cardinality
|
|
101
|
+
@target_vectors.size
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
private
|
|
108
|
+
|
|
109
|
+
def inject_transforms_with_metadata
|
|
110
|
+
@transforms.each do |tform|
|
|
111
|
+
if tform.respond_to? :source_metadata
|
|
112
|
+
meta = @source_vectors.map { |v| @source_metadata[v] || {} }
|
|
113
|
+
tform.source_metadata = meta.size > 1 ? meta : meta.first
|
|
114
|
+
end
|
|
115
|
+
if tform.respond_to? :target_metadata
|
|
116
|
+
meta = @target_vectors.map { |v| @target_metadata[v] || {} }
|
|
117
|
+
tform.target_metadata = meta.size > 1 ? meta : meta.first
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Private: If no transforms are defined, assume it's a simple copy
|
|
123
|
+
def set_default_transform
|
|
124
|
+
if @transforms.size == 0
|
|
125
|
+
transform(->(v) { v })
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Private: Converts the transformed data into vectors in the target dataframe.
|
|
130
|
+
def map_to_target_df
|
|
131
|
+
result_hash_of_arrays.each do |vector, values|
|
|
132
|
+
@target_df[vector] = Daru::Vector.new(values, index: @source_df.index)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
@target_df
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Private: Splits the transformed rows into separate arrays, indexed by vector name
|
|
139
|
+
def result_hash_of_arrays
|
|
140
|
+
result = @target_vectors.each_with_object({}) { |v,h| h[v] = [] }
|
|
141
|
+
|
|
142
|
+
transformed_rows.each do |result_row|
|
|
143
|
+
result.keys.each do |vector|
|
|
144
|
+
result[vector] << result_row[vector]
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
result
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Private: Applies all of the transforms to each row.
|
|
152
|
+
def transformed_rows
|
|
153
|
+
work_rows.map do |row|
|
|
154
|
+
@transform_procs.each do |tform|
|
|
155
|
+
result = call_transform(tform, row)
|
|
156
|
+
row[*@target_vectors] = result if target_cardinality == 1
|
|
157
|
+
row[*@source_vectors] = result if source_cardinality == 1 && target_cardinality == 1
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
row
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Private: Applies the given transform to the given row.
|
|
165
|
+
#
|
|
166
|
+
# tform - The transform (proc).
|
|
167
|
+
# row - The row.
|
|
168
|
+
#
|
|
169
|
+
# Returns the return value of the transform.
|
|
170
|
+
def call_transform(tform, row)
|
|
171
|
+
if source_cardinality == 0 && target_cardinality == 1
|
|
172
|
+
tform.call
|
|
173
|
+
elsif source_cardinality == 1 && target_cardinality == 1
|
|
174
|
+
tform.call(row[*@source_vectors])
|
|
175
|
+
else
|
|
176
|
+
tform.call(row)
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Private: Returns a unique list of all vectors (source and target) invovled in the map.
|
|
181
|
+
def all_vectors
|
|
182
|
+
@all_vectors ||= (@source_vectors + @target_vectors).uniq
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Private: Returns a hash that maps vector names to an index
|
|
186
|
+
# The index is the position of the vector value for a row in #work_rows
|
|
187
|
+
def rows_index
|
|
188
|
+
@rows_index ||= all_vectors.each_with_index.to_h
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# Private: Converts all of vectors involved in the map into an array of row objects.
|
|
192
|
+
def work_rows
|
|
193
|
+
all_vectors.map do |vector|
|
|
194
|
+
is_source_vector = @source_vectors.include? vector
|
|
195
|
+
|
|
196
|
+
if is_source_vector && @source_df.vectors.include?(vector)
|
|
197
|
+
@source_df[vector].to_a
|
|
198
|
+
elsif is_source_vector && @target_df.vectors.include?(vector)
|
|
199
|
+
@target_df[vector].to_a
|
|
200
|
+
else
|
|
201
|
+
Array.new(@source_df.size)
|
|
202
|
+
end
|
|
203
|
+
end.transpose.map do |row_as_array|
|
|
204
|
+
Row.new(rows_index, row_as_array, source_keys: @source_vectors)
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
module Remi
|
|
2
|
+
class SourceToTargetMap
|
|
3
|
+
|
|
4
|
+
# Public: A row is composed of an array and an index hash.
|
|
5
|
+
# The index hash converts a key into a number representing the position in the array.
|
|
6
|
+
# Functionally, it's very similar to how a hash works. However,
|
|
7
|
+
# we need to create a lot of Row objects that all have the same
|
|
8
|
+
# index hash. All of those row objects can reference the same
|
|
9
|
+
# index hash object and thus dramatically reduce the amount of memory
|
|
10
|
+
# needed store a lot of rows.
|
|
11
|
+
#
|
|
12
|
+
# Examples
|
|
13
|
+
#
|
|
14
|
+
# row = Row.new({ a: 1, b: 2}, ['alpha', 'beta'])
|
|
15
|
+
# row[:a] #=> 'alpha'
|
|
16
|
+
# row[:b] #=> 'beta'
|
|
17
|
+
class Row
|
|
18
|
+
|
|
19
|
+
# Public: Converts hash-like objects into rows, array-like objects into rows,
|
|
20
|
+
# or just returns a row if one is provied.
|
|
21
|
+
#
|
|
22
|
+
# arg - A Row, array-like object, or hash-like object.
|
|
23
|
+
#
|
|
24
|
+
# Examples:
|
|
25
|
+
#
|
|
26
|
+
# Row[{ a: 'one', b: 'two' }] #=> #<Row @index={:a=>0, :b=>1} @values=["one", "two"]>
|
|
27
|
+
# Returns a Row
|
|
28
|
+
def self.[](arg)
|
|
29
|
+
return arg if arg.is_a? Row
|
|
30
|
+
|
|
31
|
+
if arg.respond_to? :keys
|
|
32
|
+
Row.new(arg.keys.each_with_index.to_h, arg.values)
|
|
33
|
+
else
|
|
34
|
+
Row.new(0.upto(arg.size).each_with_index.to_h, arg)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Public: Initializes a row object.
|
|
40
|
+
#
|
|
41
|
+
# index - A hash containing keys that are usually symbols and values that
|
|
42
|
+
# represent a position in the values array.
|
|
43
|
+
# values - An array of values.
|
|
44
|
+
# source_keys - Array of keys that should be treated as data
|
|
45
|
+
# sources for a row transformation
|
|
46
|
+
def initialize(index, values, source_keys: nil)
|
|
47
|
+
@index = index
|
|
48
|
+
@inverted_index = index.invert
|
|
49
|
+
@values = values
|
|
50
|
+
@source_keys = source_keys || index.keys
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Public: Returns the value of the row array for the given key
|
|
54
|
+
def [](key)
|
|
55
|
+
@values[@index[key]]
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Public: Sets the value of the row array for the given key
|
|
59
|
+
def []=(key, value)
|
|
60
|
+
@values[@index[key]] = value
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Public: Makes Row enumerable, and acts like a hash.
|
|
64
|
+
def each &block
|
|
65
|
+
@values.each_with_index { |value, idx| block.call([@inverted_index[idx], value]) }
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def each_source &block
|
|
69
|
+
Enumerator.new do |y|
|
|
70
|
+
source_keys.each { |key| y << [key, self[key]] }
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def each_target &block
|
|
75
|
+
Enumerator.new do |y|
|
|
76
|
+
target_keys.each { |key| y << [key, self[key]] }
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Public: Returns the values stored in the row.
|
|
81
|
+
def to_a
|
|
82
|
+
@values
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Public: Returns the keys of the index.
|
|
86
|
+
def keys
|
|
87
|
+
@index.keys
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def source_keys
|
|
91
|
+
@source_keys
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def target_keys
|
|
95
|
+
@target_keys ||= keys - source_keys
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
@@ -1,117 +1,82 @@
|
|
|
1
1
|
module Remi
|
|
2
|
+
|
|
3
|
+
# Public: Class used to define a DSL for source to target maps.
|
|
4
|
+
#
|
|
5
|
+
# Examples
|
|
6
|
+
#
|
|
7
|
+
# SourceToTargetMap.apply(df) do
|
|
8
|
+
# map source(:a) .target(:aprime)
|
|
9
|
+
# .transform(->(v) { "#{v}prime" })
|
|
10
|
+
# map source(:a) .target(:aup)
|
|
11
|
+
# .transform(->(v) { "#{v.upcase}" })
|
|
12
|
+
# end
|
|
13
|
+
# #=> <Daru::DataFrame:70291322684920 @name = 8c546a52-c1a7-495a-996a-7f352b0087b7 @size = 3>
|
|
14
|
+
# a aprime aup
|
|
15
|
+
# 0 a1 a1prime A1
|
|
16
|
+
# 1 a2 a2prime A2
|
|
17
|
+
# 2 a3 a3prime A3
|
|
2
18
|
class SourceToTargetMap
|
|
19
|
+
|
|
20
|
+
# Public: Initializes the SourceToTargetMap DSL
|
|
21
|
+
#
|
|
22
|
+
# source_df - The source dataframe.
|
|
23
|
+
# target_df - The target dataframe (default: source_df).
|
|
24
|
+
# source_metadata - Metadata (Remi::Fields) for the source fields.
|
|
25
|
+
# target_metadata - Metadata (Remi::Fields) for the target fields.
|
|
3
26
|
def initialize(source_df, target_df=nil, source_metadata: Remi::Fields.new, target_metadata: Remi::Fields.new)
|
|
4
27
|
@source_df = source_df
|
|
5
28
|
@source_metadata = source_metadata
|
|
6
29
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
@target_metadata = target_metadata
|
|
10
|
-
else
|
|
11
|
-
@target_df = @source_df
|
|
12
|
-
@target_metadata = @source_metadata
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
reset_map
|
|
30
|
+
@target_df = target_df || source_df
|
|
31
|
+
@target_metadata = target_metadata || source_metadata
|
|
16
32
|
end
|
|
17
33
|
|
|
34
|
+
attr_reader :source_df, :target_df
|
|
35
|
+
|
|
36
|
+
# Public: Expects a block in which the DSL will be applied.
|
|
37
|
+
#
|
|
38
|
+
# Same arguments as the constructor.
|
|
39
|
+
#
|
|
40
|
+
# Returns the target dataframe.
|
|
18
41
|
def self.apply(source_df, target_df=nil, source_metadata: Remi::Fields.new, target_metadata: Remi::Fields.new, &block)
|
|
19
42
|
sttm = SourceToTargetMap.new(source_df, target_df, source_metadata: source_metadata, target_metadata: target_metadata)
|
|
20
43
|
Docile.dsl_eval(sttm, &block)
|
|
44
|
+
target_df || source_df
|
|
21
45
|
end
|
|
22
46
|
|
|
47
|
+
# Public: Adds a list of source vectors to a new mapping.
|
|
48
|
+
#
|
|
49
|
+
# source_vectors - A list of vector names.
|
|
50
|
+
#
|
|
51
|
+
# Returns a SourceToTargetMap::Map with the defined source vectors.
|
|
23
52
|
def source(*source_vectors)
|
|
24
|
-
|
|
25
|
-
self
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
def transform(*transforms)
|
|
29
|
-
@transforms += Array(transforms)
|
|
30
|
-
@transform_procs += Array(transforms).map { |t| t.to_proc }
|
|
31
|
-
self
|
|
53
|
+
new_map.source(*source_vectors)
|
|
32
54
|
end
|
|
33
55
|
|
|
56
|
+
# Public: Adds a list of targets vectors to a new mapping.
|
|
57
|
+
#
|
|
58
|
+
# target_vectors - A list of target names.
|
|
59
|
+
#
|
|
60
|
+
# Returns a SourceToTargetMap::Map with the defined target vectors.
|
|
34
61
|
def target(*target_vectors)
|
|
35
|
-
|
|
36
|
-
self
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
def reset_map
|
|
40
|
-
@source_vectors = []
|
|
41
|
-
@target_vectors = []
|
|
42
|
-
@transforms = []
|
|
43
|
-
@transform_procs = []
|
|
62
|
+
new_map.target(*target_vectors)
|
|
44
63
|
end
|
|
45
64
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
do_map_direct_copy
|
|
54
|
-
when @source_vectors.size == 1 && @target_vectors.size == 1
|
|
55
|
-
do_map_single_source_and_target_vector
|
|
56
|
-
else
|
|
57
|
-
do_map_generic
|
|
58
|
-
end
|
|
59
|
-
reset_map
|
|
65
|
+
# Public: Executes a mapping.
|
|
66
|
+
#
|
|
67
|
+
# defined_map - The SourceToTargetMap::Map object to execute
|
|
68
|
+
#
|
|
69
|
+
# Returns the target dataframe.
|
|
70
|
+
def map(defined_map)
|
|
71
|
+
defined_map.execute
|
|
60
72
|
end
|
|
61
73
|
|
|
62
74
|
|
|
63
|
-
|
|
64
75
|
private
|
|
65
76
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
meta = @source_vectors.map { |v| @source_metadata[v] || {} }
|
|
70
|
-
tform.source_metadata = meta.size > 1 ? meta : meta.first
|
|
71
|
-
end
|
|
72
|
-
if tform.respond_to? :target_metadata
|
|
73
|
-
meta = @target_vectors.map { |v| @target_metadata[v] || {} }
|
|
74
|
-
tform.target_metadata = meta.size > 1 ? meta : meta.first
|
|
75
|
-
end
|
|
76
|
-
end
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
def do_map_direct_copy
|
|
80
|
-
@target_vectors.each do |target_vector|
|
|
81
|
-
@target_df[target_vector] = @source_df[@source_vectors.first].dup
|
|
82
|
-
end
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
def do_map_single_source_and_target_vector
|
|
86
|
-
@target_df[@target_vectors.first] = @source_df[@source_vectors.first].recode do |vector_value|
|
|
87
|
-
@transform_procs.reduce(vector_value) { |value, tform| tform.call(*(value.nil? ? [nil] : value)) }
|
|
88
|
-
end
|
|
89
|
-
end
|
|
90
|
-
|
|
91
|
-
def do_map_generic
|
|
92
|
-
work_vector = if @source_vectors.size == 1 && @source_vectors.first != nil
|
|
93
|
-
@source_df[@source_vectors.first].dup
|
|
94
|
-
elsif @source_vectors.size > 1
|
|
95
|
-
# It's faster to zip together several vectors and recode those than it is to
|
|
96
|
-
# recode a dataframe row by row!
|
|
97
|
-
Daru::Vector.new(@source_df[@source_vectors.first].zip(*@source_vectors[1..-1].map { |name| @source_df[name] }), index: @source_df.index)
|
|
98
|
-
else
|
|
99
|
-
Daru::Vector.new([], index: @source_df.index)
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
work_vector.recode! do |vector_value|
|
|
103
|
-
@transform_procs.reduce(vector_value) { |value, tform| tform.call(*(value.nil? ? [nil] : value)) }
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
@target_vectors.each_with_index do |target_vector, vector_idx|
|
|
107
|
-
@target_df[target_vector] = work_vector.recode do |vector_value|
|
|
108
|
-
if vector_value.is_a?(Array) then
|
|
109
|
-
vector_value[vector_idx]
|
|
110
|
-
else
|
|
111
|
-
vector_value
|
|
112
|
-
end
|
|
113
|
-
end
|
|
114
|
-
end
|
|
77
|
+
# Public: Returns a new SourceToTargetMap::Map
|
|
78
|
+
def new_map
|
|
79
|
+
Map.new(@source_df, @target_df, source_metadata: @source_metadata, target_metadata: @target_metadata)
|
|
115
80
|
end
|
|
116
81
|
end
|
|
117
82
|
end
|
data/lib/remi/transform.rb
CHANGED
|
@@ -35,11 +35,11 @@ module Remi
|
|
|
35
35
|
# values - The values to be transformed.
|
|
36
36
|
#
|
|
37
37
|
# Returns the transformed value.
|
|
38
|
-
def call(*
|
|
39
|
-
if
|
|
40
|
-
to_proc.call
|
|
38
|
+
def call(*args)
|
|
39
|
+
if to_proc.arity == 0
|
|
40
|
+
to_proc.call
|
|
41
41
|
else
|
|
42
|
-
to_proc.call(
|
|
42
|
+
to_proc.call(*args)
|
|
43
43
|
end
|
|
44
44
|
end
|
|
45
45
|
|
|
@@ -135,8 +135,9 @@ module Remi
|
|
|
135
135
|
@delimiter = delimiter
|
|
136
136
|
end
|
|
137
137
|
|
|
138
|
-
def transform(
|
|
139
|
-
|
|
138
|
+
def transform(row)
|
|
139
|
+
row = SourceToTargetMap::Row[row]
|
|
140
|
+
row.each_source.map { |key, value| value.blank? ? nil : value }.compact.join(@delimiter)
|
|
140
141
|
end
|
|
141
142
|
end
|
|
142
143
|
|
|
@@ -188,8 +189,9 @@ module Remi
|
|
|
188
189
|
@default = default
|
|
189
190
|
end
|
|
190
191
|
|
|
191
|
-
def transform(
|
|
192
|
-
|
|
192
|
+
def transform(row)
|
|
193
|
+
row = SourceToTargetMap::Row[row]
|
|
194
|
+
row.each_source.find(->() { [nil, @default] }) { |key, value| !value.blank? }[1]
|
|
193
195
|
end
|
|
194
196
|
end
|
|
195
197
|
|
|
@@ -338,7 +340,10 @@ module Remi
|
|
|
338
340
|
@measure = measure
|
|
339
341
|
end
|
|
340
342
|
|
|
341
|
-
def transform(
|
|
343
|
+
def transform(row)
|
|
344
|
+
row = SourceToTargetMap::Row[row]
|
|
345
|
+
from_date = row[row.keys[0]]
|
|
346
|
+
to_date = row[row.keys[1]]
|
|
342
347
|
|
|
343
348
|
case @measure.to_sym
|
|
344
349
|
when :days
|
|
@@ -366,7 +371,7 @@ module Remi
|
|
|
366
371
|
@constant = constant
|
|
367
372
|
end
|
|
368
373
|
|
|
369
|
-
def transform
|
|
374
|
+
def transform
|
|
370
375
|
@constant
|
|
371
376
|
end
|
|
372
377
|
end
|
|
@@ -563,9 +568,10 @@ module Remi
|
|
|
563
568
|
# wildcards and match anything. The first row that matches wins
|
|
564
569
|
# and the sieve progression stops.
|
|
565
570
|
#
|
|
566
|
-
# sieve_df - The sieve, defined as a dataframe. The
|
|
567
|
-
#
|
|
568
|
-
#
|
|
571
|
+
# sieve_df - The sieve, defined as a dataframe. The names of the
|
|
572
|
+
# sieve vectors must correspond to the names of the
|
|
573
|
+
# vectors in the dataframe source to target map. The
|
|
574
|
+
# last vector in the sieve_df is used as the result of the sieve.
|
|
569
575
|
#
|
|
570
576
|
#
|
|
571
577
|
# Examples:
|
|
@@ -612,23 +618,26 @@ module Remi
|
|
|
612
618
|
class DataFrameSieve < Transform
|
|
613
619
|
def initialize(sieve_df, *args, **kargs, &block)
|
|
614
620
|
super
|
|
615
|
-
@
|
|
621
|
+
@sieve_table = sieve_df.transpose.to_h.values
|
|
616
622
|
end
|
|
617
623
|
|
|
618
|
-
|
|
619
|
-
|
|
624
|
+
|
|
625
|
+
def transform(row)
|
|
626
|
+
sieve_keys = @sieve_table.first.index.to_a
|
|
620
627
|
sieve_result_key = sieve_keys.pop
|
|
621
628
|
|
|
622
|
-
|
|
629
|
+
raise ArgumentError, "#{sieve_keys - row.source_keys} not found in row" unless (sieve_keys - row.source_keys).size == 0
|
|
630
|
+
|
|
631
|
+
@sieve_table.each.find do |sieve_row|
|
|
623
632
|
match_row = true
|
|
624
|
-
sieve_keys.
|
|
625
|
-
match_value = if sieve_row[
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
match_row &&= sieve_row[
|
|
633
|
+
sieve_keys.each do |sieve_key|
|
|
634
|
+
match_value = if sieve_row[sieve_key].is_a?(Regexp)
|
|
635
|
+
!!sieve_row[sieve_key].match(row[sieve_key])
|
|
636
|
+
else
|
|
637
|
+
sieve_row[sieve_key] == row[sieve_key]
|
|
638
|
+
end
|
|
639
|
+
|
|
640
|
+
match_row &&= sieve_row[sieve_key].nil? || match_value
|
|
632
641
|
end
|
|
633
642
|
match_row
|
|
634
643
|
end[sieve_result_key]
|
|
@@ -661,7 +670,7 @@ module Remi
|
|
|
661
670
|
attr_reader :buckets
|
|
662
671
|
attr_reader :current_population
|
|
663
672
|
|
|
664
|
-
def transform
|
|
673
|
+
def transform
|
|
665
674
|
get_next_value
|
|
666
675
|
end
|
|
667
676
|
|
data/lib/remi/version.rb
CHANGED
data/lib/remi.rb
CHANGED
|
@@ -38,6 +38,8 @@ require 'remi/version.rb'
|
|
|
38
38
|
require 'remi/settings'
|
|
39
39
|
require 'remi/job'
|
|
40
40
|
require 'remi/source_to_target_map'
|
|
41
|
+
require 'remi/source_to_target_map/map'
|
|
42
|
+
require 'remi/source_to_target_map/row'
|
|
41
43
|
require 'remi/field_symbolizers'
|
|
42
44
|
|
|
43
45
|
require 'remi/refinements/symbolizer'
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
require_relative 'remi_spec'
|
|
2
|
+
|
|
3
|
+
describe SourceToTargetMap do
|
|
4
|
+
let(:df) do
|
|
5
|
+
Remi::DataFrame::Daru.new(
|
|
6
|
+
[
|
|
7
|
+
['a1','b1','c1', ['d',1]],
|
|
8
|
+
['a2','b2','c2', ['d',2]],
|
|
9
|
+
['a3','b3','c3', ['d',3]],
|
|
10
|
+
].transpose,
|
|
11
|
+
order: [:a, :b, :c, :d]
|
|
12
|
+
)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
let(:map) { SourceToTargetMap::Map.new(df, df) }
|
|
17
|
+
|
|
18
|
+
describe 'one-to-one maps' do
|
|
19
|
+
shared_examples_for 'one-to-one map' do
|
|
20
|
+
it 'provides a value to the transform, and expects a return value' do
|
|
21
|
+
expect(result).to eq ['a1prime', 'a2prime', 'a3prime']
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
it 'accepts chained transformations with the same source/target cardinality' do
|
|
25
|
+
map.transform(->(v) { "#{v}-prime" })
|
|
26
|
+
expect(result).to eq ['a1prime-prime', 'a2prime-prime', 'a3prime-prime']
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
context 'standard use' do
|
|
31
|
+
before { map.source(:a) .target(:aprime) .transform(->(v) { "#{v}prime" }) }
|
|
32
|
+
|
|
33
|
+
let(:result) do
|
|
34
|
+
map.execute
|
|
35
|
+
df[:aprime].to_a
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
it_behaves_like 'one-to-one map'
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
context 'the source and target have the same name' do
|
|
42
|
+
before { map.source(:a) .target(:a) .transform(->(v) { "#{v}prime" }) }
|
|
43
|
+
|
|
44
|
+
let(:result) do
|
|
45
|
+
map.execute
|
|
46
|
+
df[:a].to_a
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it_behaves_like 'one-to-one map'
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
context 'without any transforms', wip: true do
|
|
53
|
+
before { map.source(:a) .target(:aprime) }
|
|
54
|
+
|
|
55
|
+
let(:result) do
|
|
56
|
+
map.execute
|
|
57
|
+
df[:aprime].to_a
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
it 'copies data from source to target' do
|
|
61
|
+
expect(result).to eq ['a1', 'a2', 'a3']
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
context 'source and target dataframe are different' do
|
|
67
|
+
let(:map) { SourceToTargetMap::Map.new(df, df_target) }
|
|
68
|
+
|
|
69
|
+
context 'vectors referenced in the source only exist on the target' do
|
|
70
|
+
let(:df_target) do
|
|
71
|
+
Remi::DataFrame::Daru.new({ a_in_target: [ 'a1target', 'a2target', 'a3target' ] }, index: df.index)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
before { map.source(:a_in_target) .target(:aprime) .transform(->(v) { "#{v}prime" }) }
|
|
75
|
+
|
|
76
|
+
let(:result) do
|
|
77
|
+
map.execute
|
|
78
|
+
df_target[:aprime].to_a
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
it 'uses the target values' do
|
|
82
|
+
expect(result).to eq ['a1targetprime', 'a2targetprime', 'a3targetprime']
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
context 'vectors referenced in the source exist on both source and target' do
|
|
87
|
+
let(:df_target) do
|
|
88
|
+
Remi::DataFrame::Daru.new({ a: [ 'a1target', 'a2target', 'a3target' ] }, index: df.index)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
before { map.source(:a) .target(:aprime) .transform(->(v) { "#{v}prime" }) }
|
|
92
|
+
|
|
93
|
+
let(:result) do
|
|
94
|
+
map.execute
|
|
95
|
+
df_target[:aprime].to_a
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
it 'uses the source values' do
|
|
99
|
+
expect(result).to eq ['a1prime', 'a2prime', 'a3prime']
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
describe 'one-to-one maps where the source and target have the same name' do
|
|
107
|
+
before { map.source(:a) .target(:a) .transform(->(v) { "#{v}prime" }) }
|
|
108
|
+
|
|
109
|
+
let(:result) do
|
|
110
|
+
map.execute
|
|
111
|
+
df[:a].to_a
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
it 'provides a value to the transform, and expects a return value' do
|
|
115
|
+
expect(result).to eq ['a1prime', 'a2prime', 'a3prime']
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
it 'accepts chained transformations with the same source/target cardinality' do
|
|
119
|
+
map.transform(->(v) { "#{v}-prime" })
|
|
120
|
+
expect(result).to eq ['a1prime-prime', 'a2prime-prime', 'a3prime-prime']
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
describe 'many-to-one maps' do
|
|
125
|
+
before { map.source(:a,:b) .target(:ab) .transform(->(row) { row[:a] + row[:b] }) }
|
|
126
|
+
|
|
127
|
+
let(:result) do
|
|
128
|
+
map.execute
|
|
129
|
+
df[:ab].to_a
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
it 'provides a row to the transform, and expects a return value' do
|
|
133
|
+
expect(result).to eq ['a1b1', 'a2b2', 'a3b3']
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
it 'accepts chained transformations with the same source/target cardinality' do
|
|
137
|
+
map.transform(->(row) { "-#{row[:ab]}-" })
|
|
138
|
+
expect(result).to eq ['-a1b1-', '-a2b2-', '-a3b3-']
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
describe 'one-to-many maps' do
|
|
143
|
+
before do
|
|
144
|
+
map.source(:a) .target(:a_col, :a_row)
|
|
145
|
+
.transform(->(row) {
|
|
146
|
+
row[:a_col] = row[:a][0]
|
|
147
|
+
row[:a_row] = row[:a][1]
|
|
148
|
+
})
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
let(:result) do
|
|
152
|
+
map.execute
|
|
153
|
+
df[:a_col, :a_row].to_h.each_with_object({}) { |(k,v), h| h[k] = v.to_a }
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
it 'provides a row to the transform and expects the row to be populated' do
|
|
157
|
+
expect(result).to eq({ :a_col => ['a', 'a', 'a'], :a_row => ['1', '2', '3'] })
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
it 'accepts chained transformations with the same source/target cardinality' do
|
|
161
|
+
map.transform(->(row) {
|
|
162
|
+
row[:a_col] = "COL#{row[:a_col]}"
|
|
163
|
+
row[:a_row] = "ROW#{row[:a_row]}"
|
|
164
|
+
})
|
|
165
|
+
|
|
166
|
+
expect(result).to eq({ :a_col => ['COLa', 'COLa', 'COLa'], :a_row => ['ROW1', 'ROW2', 'ROW3'] })
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
describe 'many-to-many maps' do
|
|
171
|
+
before do
|
|
172
|
+
map.source(:b, :c) .target(:b_is_c, :c_is_b)
|
|
173
|
+
.transform(->(row) {
|
|
174
|
+
row[:b], row[:c] = row[:c], row[:b]
|
|
175
|
+
row[:b_is_c] = row[:b]
|
|
176
|
+
row[:c_is_b] = row[:c]
|
|
177
|
+
})
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
let(:result) do
|
|
181
|
+
map.execute
|
|
182
|
+
df[:b_is_c, :c_is_b].to_h.each_with_object({}) { |(k,v), h| h[k] = v.to_a }
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
it 'provides a row to the transform and expects the row to be populated' do
|
|
186
|
+
expect(result).to eq({ :b_is_c => ['c1', 'c2', 'c3'], :c_is_b => ['b1', 'b2', 'b3'] })
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
it 'does not modify source vectors' do
|
|
190
|
+
map.execute
|
|
191
|
+
source_vectors = df[:b, :c].to_h.each_with_object({}) { |(k,v), h| h[k] = v.to_a }
|
|
192
|
+
expect(source_vectors).to eq({ :b => ['b1', 'b2', 'b3'], :c => ['c1', 'c2', 'c3'] })
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
it 'accepts chained transformations with the same source/target cardinality' do
|
|
196
|
+
map.transform(->(row) {
|
|
197
|
+
row[:b_is_c] = row[:b_is_c].reverse
|
|
198
|
+
row[:c_is_b] = row[:c_is_b].reverse
|
|
199
|
+
})
|
|
200
|
+
|
|
201
|
+
expect(result).to eq({ :b_is_c => ['1c', '2c', '3c'], :c_is_b => ['1b', '2b', '3b'] })
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
describe 'zero-to-one maps' do
|
|
206
|
+
before do
|
|
207
|
+
values = ['x1', 'x2', 'x3']
|
|
208
|
+
map.target(:x) .transform(->() { values.shift })
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
let(:result) do
|
|
212
|
+
map.execute
|
|
213
|
+
df[:x].to_a
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
it 'expects no argument and expects a return value' do
|
|
217
|
+
expect(result).to eq ['x1', 'x2', 'x3']
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
it 'accepts chained transformations with the same source/target cardinality' do
|
|
221
|
+
map.transform(->() { 'useless' })
|
|
222
|
+
expect(result).to eq ['useless']*3
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
describe 'zero-to-many maps' do
|
|
227
|
+
before do
|
|
228
|
+
values = ['x1', 'x2', 'x3']
|
|
229
|
+
map.target(:x_col, :x_row)
|
|
230
|
+
.transform(->(row) {
|
|
231
|
+
x = values.shift
|
|
232
|
+
row[:x_col] = x[0]
|
|
233
|
+
row[:x_row] = x[1]
|
|
234
|
+
})
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
let(:result) do
|
|
238
|
+
map.execute
|
|
239
|
+
df[:x_col, :x_row].to_h.each_with_object({}) { |(k,v), h| h[k] = v.to_a }
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
it 'provides a row to the transform and expects the row to be populated' do
|
|
243
|
+
expect(result).to eq({ :x_col => ['x', 'x', 'x'], :x_row => ['1', '2', '3'] })
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
it 'accepts chained transformations with the same source/target cardinality' do
|
|
247
|
+
map.transform(->(row) { row[:x_row] = "ROW#{row[:x_row]}" })
|
|
248
|
+
expect(result).to eq({ :x_col => ['x', 'x', 'x'], :x_row => ['ROW1', 'ROW2', 'ROW3'] })
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
describe 'vectors containing arrays' do
|
|
253
|
+
it 'provides the array as a value the transform with one-to-one maps' do
|
|
254
|
+
map.source(:d) .target(:dprime)
|
|
255
|
+
.transform(->(v) { v.join('-') })
|
|
256
|
+
map.execute
|
|
257
|
+
|
|
258
|
+
expect(df[:dprime].to_a).to eq ['d-1', 'd-2', 'd-3']
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
it 'provides the array in the row with one-to-many maps' do
|
|
262
|
+
map.source(:d) .target(:d_col, :d_row)
|
|
263
|
+
.transform(->(row) {
|
|
264
|
+
row[:d_col] = row[:d].first
|
|
265
|
+
row[:d_row] = row[:d].last
|
|
266
|
+
})
|
|
267
|
+
map.execute
|
|
268
|
+
|
|
269
|
+
result = df[:d_col, :d_row].to_h.each_with_object({}) { |(k,v), h| h[k] = v.to_a }
|
|
270
|
+
expect(result).to eq({ :d_col => ['d', 'd', 'd'], :d_row => [1, 2, 3] })
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
describe 'using the DSL' do
|
|
275
|
+
let(:sttm) do
|
|
276
|
+
SourceToTargetMap.apply(df) do
|
|
277
|
+
map source(:a) .target(:aprime)
|
|
278
|
+
.transform(->(v) { "#{v}prime" })
|
|
279
|
+
map source(:a) .target(:aprimeprime)
|
|
280
|
+
.transform(->(v) { "#{v}prime" })
|
|
281
|
+
.transform(->(v) { "#{v}-prime" })
|
|
282
|
+
map source(:a, :d) .target(:ad)
|
|
283
|
+
.transform(->(row) { "#{row[:a][0]}-#{row[:d].first}-#{row[:d].last}" })
|
|
284
|
+
end
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
it 'allows one to specify multiple source-to-target maps in one block' do
|
|
288
|
+
sttm
|
|
289
|
+
result = df[:aprime, :aprimeprime, :ad].to_h.each_with_object({}) { |(k,v), h| h[k] = v.to_a }
|
|
290
|
+
expect(result).to eq({
|
|
291
|
+
:aprime => ['a1prime', 'a2prime', 'a3prime'],
|
|
292
|
+
:aprimeprime => ['a1prime-prime', 'a2prime-prime', 'a3prime-prime'],
|
|
293
|
+
:ad => ['a-d-1', 'a-d-2', 'a-d-3']
|
|
294
|
+
})
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
it 'returns a dataframe' do
|
|
298
|
+
expect(sttm).to be_a(Remi::DataFrame::Daru)
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: remi
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.38
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Sterling Paramore
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2016-06-
|
|
11
|
+
date: 2016-06-29 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bond
|
|
@@ -194,6 +194,7 @@ files:
|
|
|
194
194
|
- features/step_definitions/remi_step.rb
|
|
195
195
|
- features/support/env.rb
|
|
196
196
|
- features/support/env_app.rb
|
|
197
|
+
- features/transforms/concatenate.feature
|
|
197
198
|
- features/transforms/data_frame_sieve.feature
|
|
198
199
|
- features/transforms/date_diff.feature
|
|
199
200
|
- features/transforms/nvl.feature
|
|
@@ -211,6 +212,7 @@ files:
|
|
|
211
212
|
- jobs/parameters_job.rb
|
|
212
213
|
- jobs/sample_job.rb
|
|
213
214
|
- jobs/sftp_file_target_job.rb
|
|
215
|
+
- jobs/transforms/concatenate_job.rb
|
|
214
216
|
- jobs/transforms/data_frame_sieve_job.rb
|
|
215
217
|
- jobs/transforms/date_diff_job.rb
|
|
216
218
|
- jobs/transforms/nvl_job.rb
|
|
@@ -244,6 +246,8 @@ files:
|
|
|
244
246
|
- lib/remi/settings.rb
|
|
245
247
|
- lib/remi/sf_bulk_helper.rb
|
|
246
248
|
- lib/remi/source_to_target_map.rb
|
|
249
|
+
- lib/remi/source_to_target_map/map.rb
|
|
250
|
+
- lib/remi/source_to_target_map/row.rb
|
|
247
251
|
- lib/remi/transform.rb
|
|
248
252
|
- lib/remi/version.rb
|
|
249
253
|
- remi.gemspec
|
|
@@ -259,6 +263,7 @@ files:
|
|
|
259
263
|
- spec/fixtures/unsupported_escape.csv
|
|
260
264
|
- spec/metadata_spec.rb
|
|
261
265
|
- spec/remi_spec.rb
|
|
266
|
+
- spec/source_to_target_map_spec.rb
|
|
262
267
|
- spec/transform_spec.rb
|
|
263
268
|
- workbooks/sample_workbook.ipynb
|
|
264
269
|
- workbooks/workbook_helper.rb
|
|
@@ -299,6 +304,7 @@ test_files:
|
|
|
299
304
|
- features/step_definitions/remi_step.rb
|
|
300
305
|
- features/support/env.rb
|
|
301
306
|
- features/support/env_app.rb
|
|
307
|
+
- features/transforms/concatenate.feature
|
|
302
308
|
- features/transforms/data_frame_sieve.feature
|
|
303
309
|
- features/transforms/date_diff.feature
|
|
304
310
|
- features/transforms/nvl.feature
|
|
@@ -319,4 +325,5 @@ test_files:
|
|
|
319
325
|
- spec/fixtures/unsupported_escape.csv
|
|
320
326
|
- spec/metadata_spec.rb
|
|
321
327
|
- spec/remi_spec.rb
|
|
328
|
+
- spec/source_to_target_map_spec.rb
|
|
322
329
|
- spec/transform_spec.rb
|