remi 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/remi.rb +4 -3
- data/lib/remi/cli.rb +1 -1
- data/lib/remi/cucumber/business_rules.rb +21 -1
- data/lib/remi/project/features/aggregate.feature +23 -0
- data/lib/remi/project/features/step_definitions/remi_step.rb +16 -0
- data/lib/remi/project/features/support/env_app.rb +1 -0
- data/lib/remi/project/jobs/aggregate_job.rb +20 -0
- data/lib/remi/project/jobs/copy_source_job.rb +2 -1
- data/lib/remi/project/jobs/sample_job.rb +2 -1
- data/lib/remi/refinements/daru.rb +58 -0
- data/lib/remi/{core/refinements.rb → refinements/symbolizer.rb} +2 -2
- data/lib/remi/version.rb +1 -1
- metadata +6 -5
- data/lib/remi/core/daru.rb +0 -28
- data/lib/remi/core/string.rb +0 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 92b5bf2de20c5287b1879fb4602e39e71da6f6a7
|
4
|
+
data.tar.gz: 05071d5457f84bbc42cf14fc4ee566c658492962
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d41fdb4d52f15b3632538fffc3d987c35b204ddb3a68f06db71ab44073884250ec7be7495531fa2965365a4d41a9508bb346d31dd91c203c877b4ed5821c83d3
|
7
|
+
data.tar.gz: 92be48c0d23b22dbcfbaca25a43e94edda2c89947abf6505425a6439a665b99f39e317daaf6190bf4f0e3c7086ae73178c2dd688c400aeefefd8c736963c7264
|
data/Gemfile.lock
CHANGED
data/lib/remi.rb
CHANGED
@@ -18,6 +18,8 @@ require 'active_support/core_ext/object/blank'
|
|
18
18
|
require 'active_support/core_ext/object/try'
|
19
19
|
require 'active_support/core_ext/object/inclusion'
|
20
20
|
require 'active_support/core_ext/string/inflections'
|
21
|
+
require 'active_support/core_ext/string/strip'
|
22
|
+
require 'active_support/core_ext/string/filters'
|
21
23
|
require 'active_support/core_ext/numeric/time'
|
22
24
|
require 'active_support/core_ext/numeric/conversions'
|
23
25
|
require 'active_support/core_ext/date/calculations'
|
@@ -30,7 +32,6 @@ require 'salesforce_bulk_api'
|
|
30
32
|
|
31
33
|
# Remi
|
32
34
|
require 'remi/version.rb'
|
33
|
-
require 'remi/core/string.rb'
|
34
35
|
|
35
36
|
require 'remi/settings'
|
36
37
|
require 'remi/job'
|
@@ -39,8 +40,8 @@ require 'remi/field_symbolizers'
|
|
39
40
|
require 'remi/data_subject'
|
40
41
|
require 'remi/sf_bulk_helper' # separate into SF support package
|
41
42
|
|
42
|
-
require 'remi/
|
43
|
-
require 'remi/
|
43
|
+
require 'remi/refinements/symbolizer'
|
44
|
+
require 'remi/refinements/daru'
|
44
45
|
|
45
46
|
require 'remi/extractor/sftp_file'
|
46
47
|
|
data/lib/remi/cli.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module Remi::BusinessRules
|
2
|
-
using Remi::
|
2
|
+
using Remi::Refinements::Symbolizer
|
3
3
|
|
4
4
|
def self.parse_full_field(full_field_name)
|
5
5
|
full_field_name.split(':').map(&:strip)
|
@@ -230,6 +230,15 @@ module Remi::BusinessRules
|
|
230
230
|
@data_obj.df.size
|
231
231
|
end
|
232
232
|
|
233
|
+
# Public: Converts the data subject to a hash where the keys are the table
|
234
|
+
# columns and the values are an array for the value of column for each row.
|
235
|
+
def column_hash
|
236
|
+
@data_obj.df.to_hash.reduce({}) do |h, (k,v)|
|
237
|
+
h[k.symbolize] = v.to_a
|
238
|
+
h
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
233
242
|
# For debugging only
|
234
243
|
def _df
|
235
244
|
@data_obj.df
|
@@ -429,6 +438,17 @@ module Remi::BusinessRules
|
|
429
438
|
end
|
430
439
|
df
|
431
440
|
end
|
441
|
+
|
442
|
+
# Public: Converts a Cucumber::Ast::Table to a hash where the keys are the table
|
443
|
+
# columns and the values are an array for the value of column for each row.
|
444
|
+
def column_hash
|
445
|
+
@table.hashes.reduce({}) do |h, row|
|
446
|
+
row.each do |k,v|
|
447
|
+
(h[k.symbolize] ||= []) << v
|
448
|
+
end
|
449
|
+
h
|
450
|
+
end
|
451
|
+
end
|
432
452
|
end
|
433
453
|
|
434
454
|
|
@@ -0,0 +1,23 @@
|
|
1
|
+
Feature: Tests the aggregate refinement to the Daru library
|
2
|
+
|
3
|
+
Background:
|
4
|
+
Given the job is 'Aggregate'
|
5
|
+
And the job source 'Source Data'
|
6
|
+
And the job target 'Target Data'
|
7
|
+
|
8
|
+
And the source 'Source Data'
|
9
|
+
And the target 'Target Data'
|
10
|
+
|
11
|
+
Scenario: The aggregator should find the minimum year for each 'Alpha'
|
12
|
+
Given the following example record for 'Source Data':
|
13
|
+
| Alpha | Year | something |
|
14
|
+
| a | 2016 | 1 |
|
15
|
+
| a | 2018 | 1 |
|
16
|
+
| b | 2016 | 2 |
|
17
|
+
| b | 2010 | 3 |
|
18
|
+
| a | 2017 | 4 |
|
19
|
+
And the following example record called 'expected result':
|
20
|
+
| Alpha | Year |
|
21
|
+
| a | 2016 |
|
22
|
+
| b | 2010 |
|
23
|
+
Then the target should match the example 'expected result'
|
@@ -201,6 +201,22 @@ Then /^the target field '(.+)' is the date (.+)$/ do |target_field, date_referen
|
|
201
201
|
step "the target field '#{target_field}' is set to the value \"*#{date_reference}*\""
|
202
202
|
end
|
203
203
|
|
204
|
+
Then /^the target '(.+)' should match the example '([[:alnum:]\s]+)'$/ do |target_name, example_name|
|
205
|
+
@brt.run_transforms
|
206
|
+
|
207
|
+
target_hash = @brt.targets[target_name].column_hash
|
208
|
+
example_hash = @brt.examples[example_name].column_hash
|
209
|
+
common_keys = target_hash.keys & example_hash.keys
|
210
|
+
|
211
|
+
expect(target_hash.select { |k,v| common_keys.include? k })
|
212
|
+
.to eq example_hash.select { |k,v| common_keys.include? k }
|
213
|
+
end
|
214
|
+
|
215
|
+
Then /^the target should match the example '([[:alnum:]\s]+)'$/ do |example_name|
|
216
|
+
target_name = @brt.targets.keys.first
|
217
|
+
step "the target '#{target_name}' should match the example '#{example_name}'"
|
218
|
+
end
|
219
|
+
|
204
220
|
|
205
221
|
### Transforms
|
206
222
|
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require_relative 'all_jobs_shared'
|
2
|
+
|
3
|
+
class AggregateJob
|
4
|
+
include AllJobsShared
|
5
|
+
using Remi::Refinements::Daru
|
6
|
+
|
7
|
+
define_source :source_data, Remi::DataSource::DataFrame
|
8
|
+
define_target :target_data, Remi::DataTarget::DataFrame
|
9
|
+
|
10
|
+
define_transform :main, sources: :source_data, targets: :target_data do
|
11
|
+
|
12
|
+
mymin = lambda do |field, df, indicies|
|
13
|
+
values = indicies.map { |idx| df.row[idx][field] }
|
14
|
+
values.min
|
15
|
+
end
|
16
|
+
|
17
|
+
target_data.df = source_data.df.aggregate(by: :alpha, func: mymin.curry.(:year)).detach_index
|
18
|
+
target_data.df.vectors = Daru::Index.new([:alpha, :year])
|
19
|
+
end
|
20
|
+
end
|
@@ -2,11 +2,12 @@ require_relative 'all_jobs_shared'
|
|
2
2
|
|
3
3
|
class CopySourceJob
|
4
4
|
include AllJobsShared
|
5
|
+
using Remi::Refinements::Daru
|
5
6
|
|
6
7
|
define_source :source_data, Remi::DataSource::DataFrame
|
7
8
|
define_source :target_data, Remi::DataSource::DataFrame
|
8
9
|
|
9
10
|
define_transform :main, sources: :source_data, targets: :target_data do
|
10
|
-
target_data.df = source_data.df.
|
11
|
+
target_data.df = source_data.df.dup
|
11
12
|
end
|
12
13
|
end
|
@@ -4,6 +4,7 @@ require_relative 'all_jobs_shared'
|
|
4
4
|
|
5
5
|
class SampleJob
|
6
6
|
include AllJobsShared
|
7
|
+
using Remi::Refinements::Daru
|
7
8
|
|
8
9
|
define_source :existing_contacts, Remi::DataSource::Salesforce,
|
9
10
|
object: :Contact,
|
@@ -77,7 +78,7 @@ class SampleJob
|
|
77
78
|
define_transform :map_common_fields, sources: [:sample_file, :existing_contacts], targets: :all_contacts do
|
78
79
|
|
79
80
|
# Exclude all source records with an invalid program name
|
80
|
-
all_contacts.df = sample_file.df.
|
81
|
+
all_contacts.df = sample_file.df.dup
|
81
82
|
Remi::SourceToTargetMap.apply(all_contacts.df) do
|
82
83
|
map source(:program) .target(:Major__c)
|
83
84
|
.transform(Remi::Transform[:lookup][program_name_lookup])
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module Remi
|
2
|
+
module Refinements
|
3
|
+
module Daru
|
4
|
+
refine ::Daru::DataFrame do
|
5
|
+
|
6
|
+
# Public: Fixes dup issues in the Daru library (vectors not being duped).
|
7
|
+
def dup
|
8
|
+
dupdf = ::Daru::DataFrame.new([], index: self.index)
|
9
|
+
self.vectors.each do |v|
|
10
|
+
dupdf[v] = self[v]
|
11
|
+
end
|
12
|
+
|
13
|
+
dupdf
|
14
|
+
end
|
15
|
+
|
16
|
+
# Public: Saves a Dataframe to a file.
|
17
|
+
def hash_dump(filename)
|
18
|
+
File.write(filename, Marshal.dump(self.to_hash))
|
19
|
+
end
|
20
|
+
|
21
|
+
# Public: Creates a DataFrame by reading the dumped version from a file.
|
22
|
+
def self.from_hash_dump(filename)
|
23
|
+
::Daru::DataFrame.new(Marshal.load(File.read(filename)))
|
24
|
+
end
|
25
|
+
|
26
|
+
# Public: Allows the user to define an arbitrary aggregation function.
|
27
|
+
#
|
28
|
+
# by - The name of the DataFrame vector to use to group records.
|
29
|
+
# func - A lambda function that accepts two arguments - the first argument
|
30
|
+
# is the DataFrame and the second is the index of the elements belonging
|
31
|
+
# to a group.
|
32
|
+
#
|
33
|
+
# Example:
|
34
|
+
# df = Daru::DataFrame.new( { a: ['a','a','a','b','b'], year: ['2018','2015','2019', '2014', '2013'] })
|
35
|
+
#
|
36
|
+
# mymin = lambda do |field, df, indicies|
|
37
|
+
# values = indicies.map { |idx| df.row[idx][field] }
|
38
|
+
# values.min
|
39
|
+
# end
|
40
|
+
#
|
41
|
+
# df.aggregate(by: :a, func: mymin.curry.(:year))
|
42
|
+
#
|
43
|
+
#
|
44
|
+
# Returns a Daru::Vector.
|
45
|
+
def aggregate(by:, func:)
|
46
|
+
grouped = self.group_by(by)
|
47
|
+
::Daru::Vector.new(
|
48
|
+
grouped.groups.reduce({}) do |h, (key, indicies)|
|
49
|
+
h[key.size == 1 ? key.first : key] = func.(self, indicies)
|
50
|
+
h
|
51
|
+
end
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
data/lib/remi/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sterling Paramore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daru
|
@@ -198,9 +198,6 @@ files:
|
|
198
198
|
- doc/install-rbenv-os_x.md
|
199
199
|
- lib/remi.rb
|
200
200
|
- lib/remi/cli.rb
|
201
|
-
- lib/remi/core/daru.rb
|
202
|
-
- lib/remi/core/refinements.rb
|
203
|
-
- lib/remi/core/string.rb
|
204
201
|
- lib/remi/cucumber.rb
|
205
202
|
- lib/remi/cucumber/business_rules.rb
|
206
203
|
- lib/remi/cucumber/data_source.rb
|
@@ -218,6 +215,7 @@ files:
|
|
218
215
|
- lib/remi/field_symbolizers.rb
|
219
216
|
- lib/remi/job.rb
|
220
217
|
- lib/remi/lookup/regex_sieve.rb
|
218
|
+
- lib/remi/project/features/aggregate.feature
|
221
219
|
- lib/remi/project/features/examples.feature
|
222
220
|
- lib/remi/project/features/formulas.feature
|
223
221
|
- lib/remi/project/features/sample_job.feature
|
@@ -227,6 +225,7 @@ files:
|
|
227
225
|
- lib/remi/project/features/transforms/date_diff.feature
|
228
226
|
- lib/remi/project/features/transforms/parse_date.feature
|
229
227
|
- lib/remi/project/features/transforms/prefix.feature
|
228
|
+
- lib/remi/project/jobs/aggregate_job.rb
|
230
229
|
- lib/remi/project/jobs/all_jobs_shared.rb
|
231
230
|
- lib/remi/project/jobs/copy_source_job.rb
|
232
231
|
- lib/remi/project/jobs/sample_job.rb
|
@@ -234,6 +233,8 @@ files:
|
|
234
233
|
- lib/remi/project/jobs/transforms/parse_date_job.rb
|
235
234
|
- lib/remi/project/jobs/transforms/prefix_job.rb
|
236
235
|
- lib/remi/project/jobs/transforms/transform_jobs.rb
|
236
|
+
- lib/remi/refinements/daru.rb
|
237
|
+
- lib/remi/refinements/symbolizer.rb
|
237
238
|
- lib/remi/settings.rb
|
238
239
|
- lib/remi/sf_bulk_helper.rb
|
239
240
|
- lib/remi/source_to_target_map.rb
|
data/lib/remi/core/daru.rb
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
module Daru
|
2
|
-
class DataFrame
|
3
|
-
def monkey_dup
|
4
|
-
dupdf = Daru::DataFrame.new([], index: self.index)
|
5
|
-
self.vectors.each do |v|
|
6
|
-
dupdf[v] = self[v]
|
7
|
-
end
|
8
|
-
|
9
|
-
dupdf
|
10
|
-
end
|
11
|
-
|
12
|
-
def monkey_merge(other)
|
13
|
-
other.vectors.each do |v|
|
14
|
-
self[v] = other[v]
|
15
|
-
end
|
16
|
-
|
17
|
-
self
|
18
|
-
end
|
19
|
-
|
20
|
-
def hash_dump(filename)
|
21
|
-
File.write(filename, Marshal.dump(self.to_hash))
|
22
|
-
end
|
23
|
-
|
24
|
-
def self.from_hash_dump(filename)
|
25
|
-
Daru::DataFrame.new(Marshal.load(File.read(filename)))
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
data/lib/remi/core/string.rb
DELETED