remi 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/remi.rb +4 -3
- data/lib/remi/cli.rb +1 -1
- data/lib/remi/cucumber/business_rules.rb +21 -1
- data/lib/remi/project/features/aggregate.feature +23 -0
- data/lib/remi/project/features/step_definitions/remi_step.rb +16 -0
- data/lib/remi/project/features/support/env_app.rb +1 -0
- data/lib/remi/project/jobs/aggregate_job.rb +20 -0
- data/lib/remi/project/jobs/copy_source_job.rb +2 -1
- data/lib/remi/project/jobs/sample_job.rb +2 -1
- data/lib/remi/refinements/daru.rb +58 -0
- data/lib/remi/{core/refinements.rb → refinements/symbolizer.rb} +2 -2
- data/lib/remi/version.rb +1 -1
- metadata +6 -5
- data/lib/remi/core/daru.rb +0 -28
- data/lib/remi/core/string.rb +0 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 92b5bf2de20c5287b1879fb4602e39e71da6f6a7
|
4
|
+
data.tar.gz: 05071d5457f84bbc42cf14fc4ee566c658492962
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d41fdb4d52f15b3632538fffc3d987c35b204ddb3a68f06db71ab44073884250ec7be7495531fa2965365a4d41a9508bb346d31dd91c203c877b4ed5821c83d3
|
7
|
+
data.tar.gz: 92be48c0d23b22dbcfbaca25a43e94edda2c89947abf6505425a6439a665b99f39e317daaf6190bf4f0e3c7086ae73178c2dd688c400aeefefd8c736963c7264
|
data/Gemfile.lock
CHANGED
data/lib/remi.rb
CHANGED
@@ -18,6 +18,8 @@ require 'active_support/core_ext/object/blank'
|
|
18
18
|
require 'active_support/core_ext/object/try'
|
19
19
|
require 'active_support/core_ext/object/inclusion'
|
20
20
|
require 'active_support/core_ext/string/inflections'
|
21
|
+
require 'active_support/core_ext/string/strip'
|
22
|
+
require 'active_support/core_ext/string/filters'
|
21
23
|
require 'active_support/core_ext/numeric/time'
|
22
24
|
require 'active_support/core_ext/numeric/conversions'
|
23
25
|
require 'active_support/core_ext/date/calculations'
|
@@ -30,7 +32,6 @@ require 'salesforce_bulk_api'
|
|
30
32
|
|
31
33
|
# Remi
|
32
34
|
require 'remi/version.rb'
|
33
|
-
require 'remi/core/string.rb'
|
34
35
|
|
35
36
|
require 'remi/settings'
|
36
37
|
require 'remi/job'
|
@@ -39,8 +40,8 @@ require 'remi/field_symbolizers'
|
|
39
40
|
require 'remi/data_subject'
|
40
41
|
require 'remi/sf_bulk_helper' # separate into SF support package
|
41
42
|
|
42
|
-
require 'remi/
|
43
|
-
require 'remi/
|
43
|
+
require 'remi/refinements/symbolizer'
|
44
|
+
require 'remi/refinements/daru'
|
44
45
|
|
45
46
|
require 'remi/extractor/sftp_file'
|
46
47
|
|
data/lib/remi/cli.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module Remi::BusinessRules
|
2
|
-
using Remi::
|
2
|
+
using Remi::Refinements::Symbolizer
|
3
3
|
|
4
4
|
def self.parse_full_field(full_field_name)
|
5
5
|
full_field_name.split(':').map(&:strip)
|
@@ -230,6 +230,15 @@ module Remi::BusinessRules
|
|
230
230
|
@data_obj.df.size
|
231
231
|
end
|
232
232
|
|
233
|
+
# Public: Converts the data subject to a hash where the keys are the table
|
234
|
+
# columns and the values are an array for the value of column for each row.
|
235
|
+
def column_hash
|
236
|
+
@data_obj.df.to_hash.reduce({}) do |h, (k,v)|
|
237
|
+
h[k.symbolize] = v.to_a
|
238
|
+
h
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
233
242
|
# For debugging only
|
234
243
|
def _df
|
235
244
|
@data_obj.df
|
@@ -429,6 +438,17 @@ module Remi::BusinessRules
|
|
429
438
|
end
|
430
439
|
df
|
431
440
|
end
|
441
|
+
|
442
|
+
# Public: Converts a Cucumber::Ast::Table to a hash where the keys are the table
|
443
|
+
# columns and the values are an array for the value of column for each row.
|
444
|
+
def column_hash
|
445
|
+
@table.hashes.reduce({}) do |h, row|
|
446
|
+
row.each do |k,v|
|
447
|
+
(h[k.symbolize] ||= []) << v
|
448
|
+
end
|
449
|
+
h
|
450
|
+
end
|
451
|
+
end
|
432
452
|
end
|
433
453
|
|
434
454
|
|
@@ -0,0 +1,23 @@
|
|
1
|
+
Feature: Tests the aggregate refinement to the Daru library
|
2
|
+
|
3
|
+
Background:
|
4
|
+
Given the job is 'Aggregate'
|
5
|
+
And the job source 'Source Data'
|
6
|
+
And the job target 'Target Data'
|
7
|
+
|
8
|
+
And the source 'Source Data'
|
9
|
+
And the target 'Target Data'
|
10
|
+
|
11
|
+
Scenario: The aggregator should find the minimum year for each 'Alpha'
|
12
|
+
Given the following example record for 'Source Data':
|
13
|
+
| Alpha | Year | something |
|
14
|
+
| a | 2016 | 1 |
|
15
|
+
| a | 2018 | 1 |
|
16
|
+
| b | 2016 | 2 |
|
17
|
+
| b | 2010 | 3 |
|
18
|
+
| a | 2017 | 4 |
|
19
|
+
And the following example record called 'expected result':
|
20
|
+
| Alpha | Year |
|
21
|
+
| a | 2016 |
|
22
|
+
| b | 2010 |
|
23
|
+
Then the target should match the example 'expected result'
|
@@ -201,6 +201,22 @@ Then /^the target field '(.+)' is the date (.+)$/ do |target_field, date_referen
|
|
201
201
|
step "the target field '#{target_field}' is set to the value \"*#{date_reference}*\""
|
202
202
|
end
|
203
203
|
|
204
|
+
Then /^the target '(.+)' should match the example '([[:alnum:]\s]+)'$/ do |target_name, example_name|
|
205
|
+
@brt.run_transforms
|
206
|
+
|
207
|
+
target_hash = @brt.targets[target_name].column_hash
|
208
|
+
example_hash = @brt.examples[example_name].column_hash
|
209
|
+
common_keys = target_hash.keys & example_hash.keys
|
210
|
+
|
211
|
+
expect(target_hash.select { |k,v| common_keys.include? k })
|
212
|
+
.to eq example_hash.select { |k,v| common_keys.include? k }
|
213
|
+
end
|
214
|
+
|
215
|
+
Then /^the target should match the example '([[:alnum:]\s]+)'$/ do |example_name|
|
216
|
+
target_name = @brt.targets.keys.first
|
217
|
+
step "the target '#{target_name}' should match the example '#{example_name}'"
|
218
|
+
end
|
219
|
+
|
204
220
|
|
205
221
|
### Transforms
|
206
222
|
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require_relative 'all_jobs_shared'
|
2
|
+
|
3
|
+
class AggregateJob
|
4
|
+
include AllJobsShared
|
5
|
+
using Remi::Refinements::Daru
|
6
|
+
|
7
|
+
define_source :source_data, Remi::DataSource::DataFrame
|
8
|
+
define_target :target_data, Remi::DataTarget::DataFrame
|
9
|
+
|
10
|
+
define_transform :main, sources: :source_data, targets: :target_data do
|
11
|
+
|
12
|
+
mymin = lambda do |field, df, indicies|
|
13
|
+
values = indicies.map { |idx| df.row[idx][field] }
|
14
|
+
values.min
|
15
|
+
end
|
16
|
+
|
17
|
+
target_data.df = source_data.df.aggregate(by: :alpha, func: mymin.curry.(:year)).detach_index
|
18
|
+
target_data.df.vectors = Daru::Index.new([:alpha, :year])
|
19
|
+
end
|
20
|
+
end
|
@@ -2,11 +2,12 @@ require_relative 'all_jobs_shared'
|
|
2
2
|
|
3
3
|
class CopySourceJob
|
4
4
|
include AllJobsShared
|
5
|
+
using Remi::Refinements::Daru
|
5
6
|
|
6
7
|
define_source :source_data, Remi::DataSource::DataFrame
|
7
8
|
define_source :target_data, Remi::DataSource::DataFrame
|
8
9
|
|
9
10
|
define_transform :main, sources: :source_data, targets: :target_data do
|
10
|
-
target_data.df = source_data.df.
|
11
|
+
target_data.df = source_data.df.dup
|
11
12
|
end
|
12
13
|
end
|
@@ -4,6 +4,7 @@ require_relative 'all_jobs_shared'
|
|
4
4
|
|
5
5
|
class SampleJob
|
6
6
|
include AllJobsShared
|
7
|
+
using Remi::Refinements::Daru
|
7
8
|
|
8
9
|
define_source :existing_contacts, Remi::DataSource::Salesforce,
|
9
10
|
object: :Contact,
|
@@ -77,7 +78,7 @@ class SampleJob
|
|
77
78
|
define_transform :map_common_fields, sources: [:sample_file, :existing_contacts], targets: :all_contacts do
|
78
79
|
|
79
80
|
# Exclude all source records with an invalid program name
|
80
|
-
all_contacts.df = sample_file.df.
|
81
|
+
all_contacts.df = sample_file.df.dup
|
81
82
|
Remi::SourceToTargetMap.apply(all_contacts.df) do
|
82
83
|
map source(:program) .target(:Major__c)
|
83
84
|
.transform(Remi::Transform[:lookup][program_name_lookup])
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module Remi
|
2
|
+
module Refinements
|
3
|
+
module Daru
|
4
|
+
refine ::Daru::DataFrame do
|
5
|
+
|
6
|
+
# Public: Fixes dup issues in the Daru library (vectors not being duped).
|
7
|
+
def dup
|
8
|
+
dupdf = ::Daru::DataFrame.new([], index: self.index)
|
9
|
+
self.vectors.each do |v|
|
10
|
+
dupdf[v] = self[v]
|
11
|
+
end
|
12
|
+
|
13
|
+
dupdf
|
14
|
+
end
|
15
|
+
|
16
|
+
# Public: Saves a Dataframe to a file.
|
17
|
+
def hash_dump(filename)
|
18
|
+
File.write(filename, Marshal.dump(self.to_hash))
|
19
|
+
end
|
20
|
+
|
21
|
+
# Public: Creates a DataFrame by reading the dumped version from a file.
|
22
|
+
def self.from_hash_dump(filename)
|
23
|
+
::Daru::DataFrame.new(Marshal.load(File.read(filename)))
|
24
|
+
end
|
25
|
+
|
26
|
+
# Public: Allows the user to define an arbitrary aggregation function.
|
27
|
+
#
|
28
|
+
# by - The name of the DataFrame vector to use to group records.
|
29
|
+
# func - A lambda function that accepts two arguments - the first argument
|
30
|
+
# is the DataFrame and the second is the index of the elements belonging
|
31
|
+
# to a group.
|
32
|
+
#
|
33
|
+
# Example:
|
34
|
+
# df = Daru::DataFrame.new( { a: ['a','a','a','b','b'], year: ['2018','2015','2019', '2014', '2013'] })
|
35
|
+
#
|
36
|
+
# mymin = lambda do |field, df, indicies|
|
37
|
+
# values = indicies.map { |idx| df.row[idx][field] }
|
38
|
+
# values.min
|
39
|
+
# end
|
40
|
+
#
|
41
|
+
# df.aggregate(by: :a, func: mymin.curry.(:year))
|
42
|
+
#
|
43
|
+
#
|
44
|
+
# Returns a Daru::Vector.
|
45
|
+
def aggregate(by:, func:)
|
46
|
+
grouped = self.group_by(by)
|
47
|
+
::Daru::Vector.new(
|
48
|
+
grouped.groups.reduce({}) do |h, (key, indicies)|
|
49
|
+
h[key.size == 1 ? key.first : key] = func.(self, indicies)
|
50
|
+
h
|
51
|
+
end
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
data/lib/remi/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sterling Paramore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daru
|
@@ -198,9 +198,6 @@ files:
|
|
198
198
|
- doc/install-rbenv-os_x.md
|
199
199
|
- lib/remi.rb
|
200
200
|
- lib/remi/cli.rb
|
201
|
-
- lib/remi/core/daru.rb
|
202
|
-
- lib/remi/core/refinements.rb
|
203
|
-
- lib/remi/core/string.rb
|
204
201
|
- lib/remi/cucumber.rb
|
205
202
|
- lib/remi/cucumber/business_rules.rb
|
206
203
|
- lib/remi/cucumber/data_source.rb
|
@@ -218,6 +215,7 @@ files:
|
|
218
215
|
- lib/remi/field_symbolizers.rb
|
219
216
|
- lib/remi/job.rb
|
220
217
|
- lib/remi/lookup/regex_sieve.rb
|
218
|
+
- lib/remi/project/features/aggregate.feature
|
221
219
|
- lib/remi/project/features/examples.feature
|
222
220
|
- lib/remi/project/features/formulas.feature
|
223
221
|
- lib/remi/project/features/sample_job.feature
|
@@ -227,6 +225,7 @@ files:
|
|
227
225
|
- lib/remi/project/features/transforms/date_diff.feature
|
228
226
|
- lib/remi/project/features/transforms/parse_date.feature
|
229
227
|
- lib/remi/project/features/transforms/prefix.feature
|
228
|
+
- lib/remi/project/jobs/aggregate_job.rb
|
230
229
|
- lib/remi/project/jobs/all_jobs_shared.rb
|
231
230
|
- lib/remi/project/jobs/copy_source_job.rb
|
232
231
|
- lib/remi/project/jobs/sample_job.rb
|
@@ -234,6 +233,8 @@ files:
|
|
234
233
|
- lib/remi/project/jobs/transforms/parse_date_job.rb
|
235
234
|
- lib/remi/project/jobs/transforms/prefix_job.rb
|
236
235
|
- lib/remi/project/jobs/transforms/transform_jobs.rb
|
236
|
+
- lib/remi/refinements/daru.rb
|
237
|
+
- lib/remi/refinements/symbolizer.rb
|
237
238
|
- lib/remi/settings.rb
|
238
239
|
- lib/remi/sf_bulk_helper.rb
|
239
240
|
- lib/remi/source_to_target_map.rb
|
data/lib/remi/core/daru.rb
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
module Daru
|
2
|
-
class DataFrame
|
3
|
-
def monkey_dup
|
4
|
-
dupdf = Daru::DataFrame.new([], index: self.index)
|
5
|
-
self.vectors.each do |v|
|
6
|
-
dupdf[v] = self[v]
|
7
|
-
end
|
8
|
-
|
9
|
-
dupdf
|
10
|
-
end
|
11
|
-
|
12
|
-
def monkey_merge(other)
|
13
|
-
other.vectors.each do |v|
|
14
|
-
self[v] = other[v]
|
15
|
-
end
|
16
|
-
|
17
|
-
self
|
18
|
-
end
|
19
|
-
|
20
|
-
def hash_dump(filename)
|
21
|
-
File.write(filename, Marshal.dump(self.to_hash))
|
22
|
-
end
|
23
|
-
|
24
|
-
def self.from_hash_dump(filename)
|
25
|
-
Daru::DataFrame.new(Marshal.load(File.read(filename)))
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
data/lib/remi/core/string.rb
DELETED