remi 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0e96cbd80b9948b85124da0bb1a7b5618c0692ec
4
- data.tar.gz: 8eee478cbfdb78fc3c670c5fb2978f076b9d4673
3
+ metadata.gz: 92b5bf2de20c5287b1879fb4602e39e71da6f6a7
4
+ data.tar.gz: 05071d5457f84bbc42cf14fc4ee566c658492962
5
5
  SHA512:
6
- metadata.gz: c355c76c0abbbd1bb9f37ff7d4b384b8c54772bbdfad3d1eff0a4531c3da063e9bb777aca0e69936cd5047c7f059b1f393da889b9ac047cbe0d77411d9e9fc18
7
- data.tar.gz: 87df4f46a2f4f1127ccd885baf89e2466829dd56db840ce2d227c2ae8a7a96ec145873aac2ab9220a15b78158b8ff0afde56f314a6ecfa3a8c5a5523df89a154
6
+ metadata.gz: d41fdb4d52f15b3632538fffc3d987c35b204ddb3a68f06db71ab44073884250ec7be7495531fa2965365a4d41a9508bb346d31dd91c203c877b4ed5821c83d3
7
+ data.tar.gz: 92be48c0d23b22dbcfbaca25a43e94edda2c89947abf6505425a6439a665b99f39e317daaf6190bf4f0e3c7086ae73178c2dd688c400aeefefd8c736963c7264
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- remi (0.2.1)
4
+ remi (0.2.3)
5
5
  activesupport (~> 4.2)
6
6
  bond (~> 0.5)
7
7
  cucumber (~> 2.1)
data/lib/remi.rb CHANGED
@@ -18,6 +18,8 @@ require 'active_support/core_ext/object/blank'
18
18
  require 'active_support/core_ext/object/try'
19
19
  require 'active_support/core_ext/object/inclusion'
20
20
  require 'active_support/core_ext/string/inflections'
21
+ require 'active_support/core_ext/string/strip'
22
+ require 'active_support/core_ext/string/filters'
21
23
  require 'active_support/core_ext/numeric/time'
22
24
  require 'active_support/core_ext/numeric/conversions'
23
25
  require 'active_support/core_ext/date/calculations'
@@ -30,7 +32,6 @@ require 'salesforce_bulk_api'
30
32
 
31
33
  # Remi
32
34
  require 'remi/version.rb'
33
- require 'remi/core/string.rb'
34
35
 
35
36
  require 'remi/settings'
36
37
  require 'remi/job'
@@ -39,8 +40,8 @@ require 'remi/field_symbolizers'
39
40
  require 'remi/data_subject'
40
41
  require 'remi/sf_bulk_helper' # separate into SF support package
41
42
 
42
- require 'remi/core/daru'
43
- require 'remi/core/refinements'
43
+ require 'remi/refinements/symbolizer'
44
+ require 'remi/refinements/daru'
44
45
 
45
46
  require 'remi/extractor/sftp_file'
46
47
 
data/lib/remi/cli.rb CHANGED
@@ -15,7 +15,7 @@ module Remi
15
15
  options = {}
16
16
 
17
17
  opt_parser = OptionParser.new do |opts|
18
- opts.banner = <<-EOT.unindent
18
+ opts.banner = <<-EOT.strip_heredoc
19
19
  Usage: Command line helpers for Remi.
20
20
  EOT
21
21
 
@@ -1,5 +1,5 @@
1
1
  module Remi::BusinessRules
2
- using Remi::Core::Refinements
2
+ using Remi::Refinements::Symbolizer
3
3
 
4
4
  def self.parse_full_field(full_field_name)
5
5
  full_field_name.split(':').map(&:strip)
@@ -230,6 +230,15 @@ module Remi::BusinessRules
230
230
  @data_obj.df.size
231
231
  end
232
232
 
233
+ # Public: Converts the data subject to a hash where the keys are the table
234
+ # columns and the values are an array for the value of column for each row.
235
+ def column_hash
236
+ @data_obj.df.to_hash.reduce({}) do |h, (k,v)|
237
+ h[k.symbolize] = v.to_a
238
+ h
239
+ end
240
+ end
241
+
233
242
  # For debugging only
234
243
  def _df
235
244
  @data_obj.df
@@ -429,6 +438,17 @@ module Remi::BusinessRules
429
438
  end
430
439
  df
431
440
  end
441
+
442
+ # Public: Converts a Cucumber::Ast::Table to a hash where the keys are the table
443
+ # columns and the values are an array for the value of column for each row.
444
+ def column_hash
445
+ @table.hashes.reduce({}) do |h, row|
446
+ row.each do |k,v|
447
+ (h[k.symbolize] ||= []) << v
448
+ end
449
+ h
450
+ end
451
+ end
432
452
  end
433
453
 
434
454
 
@@ -0,0 +1,23 @@
1
+ Feature: Tests the aggregate refinement to the Daru library
2
+
3
+ Background:
4
+ Given the job is 'Aggregate'
5
+ And the job source 'Source Data'
6
+ And the job target 'Target Data'
7
+
8
+ And the source 'Source Data'
9
+ And the target 'Target Data'
10
+
11
+ Scenario: The aggregator should find the minimum year for each 'Alpha'
12
+ Given the following example record for 'Source Data':
13
+ | Alpha | Year | something |
14
+ | a | 2016 | 1 |
15
+ | a | 2018 | 1 |
16
+ | b | 2016 | 2 |
17
+ | b | 2010 | 3 |
18
+ | a | 2017 | 4 |
19
+ And the following example record called 'expected result':
20
+ | Alpha | Year |
21
+ | a | 2016 |
22
+ | b | 2010 |
23
+ Then the target should match the example 'expected result'
@@ -201,6 +201,22 @@ Then /^the target field '(.+)' is the date (.+)$/ do |target_field, date_referen
201
201
  step "the target field '#{target_field}' is set to the value \"*#{date_reference}*\""
202
202
  end
203
203
 
204
+ Then /^the target '(.+)' should match the example '([[:alnum:]\s]+)'$/ do |target_name, example_name|
205
+ @brt.run_transforms
206
+
207
+ target_hash = @brt.targets[target_name].column_hash
208
+ example_hash = @brt.examples[example_name].column_hash
209
+ common_keys = target_hash.keys & example_hash.keys
210
+
211
+ expect(target_hash.select { |k,v| common_keys.include? k })
212
+ .to eq example_hash.select { |k,v| common_keys.include? k }
213
+ end
214
+
215
+ Then /^the target should match the example '([[:alnum:]\s]+)'$/ do |example_name|
216
+ target_name = @brt.targets.keys.first
217
+ step "the target '#{target_name}' should match the example '#{example_name}'"
218
+ end
219
+
204
220
 
205
221
  ### Transforms
206
222
 
@@ -1,3 +1,4 @@
1
1
  require_relative '../../jobs/sample_job'
2
2
  require_relative '../../jobs/copy_source_job'
3
3
  require_relative '../../jobs/transforms/transform_jobs'
4
+ require_relative '../../jobs/aggregate_job'
@@ -0,0 +1,20 @@
1
+ require_relative 'all_jobs_shared'
2
+
3
+ class AggregateJob
4
+ include AllJobsShared
5
+ using Remi::Refinements::Daru
6
+
7
+ define_source :source_data, Remi::DataSource::DataFrame
8
+ define_target :target_data, Remi::DataTarget::DataFrame
9
+
10
+ define_transform :main, sources: :source_data, targets: :target_data do
11
+
12
+ mymin = lambda do |field, df, indicies|
13
+ values = indicies.map { |idx| df.row[idx][field] }
14
+ values.min
15
+ end
16
+
17
+ target_data.df = source_data.df.aggregate(by: :alpha, func: mymin.curry.(:year)).detach_index
18
+ target_data.df.vectors = Daru::Index.new([:alpha, :year])
19
+ end
20
+ end
@@ -2,11 +2,12 @@ require_relative 'all_jobs_shared'
2
2
 
3
3
  class CopySourceJob
4
4
  include AllJobsShared
5
+ using Remi::Refinements::Daru
5
6
 
6
7
  define_source :source_data, Remi::DataSource::DataFrame
7
8
  define_source :target_data, Remi::DataSource::DataFrame
8
9
 
9
10
  define_transform :main, sources: :source_data, targets: :target_data do
10
- target_data.df = source_data.df.monkey_dup
11
+ target_data.df = source_data.df.dup
11
12
  end
12
13
  end
@@ -4,6 +4,7 @@ require_relative 'all_jobs_shared'
4
4
 
5
5
  class SampleJob
6
6
  include AllJobsShared
7
+ using Remi::Refinements::Daru
7
8
 
8
9
  define_source :existing_contacts, Remi::DataSource::Salesforce,
9
10
  object: :Contact,
@@ -77,7 +78,7 @@ class SampleJob
77
78
  define_transform :map_common_fields, sources: [:sample_file, :existing_contacts], targets: :all_contacts do
78
79
 
79
80
  # Exclude all source records with an invalid program name
80
- all_contacts.df = sample_file.df.monkey_dup
81
+ all_contacts.df = sample_file.df.dup
81
82
  Remi::SourceToTargetMap.apply(all_contacts.df) do
82
83
  map source(:program) .target(:Major__c)
83
84
  .transform(Remi::Transform[:lookup][program_name_lookup])
@@ -0,0 +1,58 @@
1
+ module Remi
2
+ module Refinements
3
+ module Daru
4
+ refine ::Daru::DataFrame do
5
+
6
+ # Public: Fixes dup issues in the Daru library (vectors not being duped).
7
+ def dup
8
+ dupdf = ::Daru::DataFrame.new([], index: self.index)
9
+ self.vectors.each do |v|
10
+ dupdf[v] = self[v]
11
+ end
12
+
13
+ dupdf
14
+ end
15
+
16
+ # Public: Saves a Dataframe to a file.
17
+ def hash_dump(filename)
18
+ File.write(filename, Marshal.dump(self.to_hash))
19
+ end
20
+
21
+ # Public: Creates a DataFrame by reading the dumped version from a file.
22
+ def self.from_hash_dump(filename)
23
+ ::Daru::DataFrame.new(Marshal.load(File.read(filename)))
24
+ end
25
+
26
+ # Public: Allows the user to define an arbitrary aggregation function.
27
+ #
28
+ # by - The name of the DataFrame vector to use to group records.
29
+ # func - A lambda function that accepts two arguments - the first argument
30
+ # is the DataFrame and the second is the index of the elements belonging
31
+ # to a group.
32
+ #
33
+ # Example:
34
+ # df = Daru::DataFrame.new( { a: ['a','a','a','b','b'], year: ['2018','2015','2019', '2014', '2013'] })
35
+ #
36
+ # mymin = lambda do |field, df, indicies|
37
+ # values = indicies.map { |idx| df.row[idx][field] }
38
+ # values.min
39
+ # end
40
+ #
41
+ # df.aggregate(by: :a, func: mymin.curry.(:year))
42
+ #
43
+ #
44
+ # Returns a Daru::Vector.
45
+ def aggregate(by:, func:)
46
+ grouped = self.group_by(by)
47
+ ::Daru::Vector.new(
48
+ grouped.groups.reduce({}) do |h, (key, indicies)|
49
+ h[key.size == 1 ? key.first : key] = func.(self, indicies)
50
+ h
51
+ end
52
+ )
53
+ end
54
+
55
+ end
56
+ end
57
+ end
58
+ end
@@ -1,6 +1,6 @@
1
1
  module Remi
2
- module Core
3
- module Refinements
2
+ module Refinements
3
+ module Symbolizer
4
4
  refine String do
5
5
  def symbolize(symbolizer=nil)
6
6
  if symbolizer
data/lib/remi/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Remi
2
- VERSION = '0.2.2'
2
+ VERSION = '0.2.3'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remi
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sterling Paramore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-21 00:00:00.000000000 Z
11
+ date: 2016-01-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daru
@@ -198,9 +198,6 @@ files:
198
198
  - doc/install-rbenv-os_x.md
199
199
  - lib/remi.rb
200
200
  - lib/remi/cli.rb
201
- - lib/remi/core/daru.rb
202
- - lib/remi/core/refinements.rb
203
- - lib/remi/core/string.rb
204
201
  - lib/remi/cucumber.rb
205
202
  - lib/remi/cucumber/business_rules.rb
206
203
  - lib/remi/cucumber/data_source.rb
@@ -218,6 +215,7 @@ files:
218
215
  - lib/remi/field_symbolizers.rb
219
216
  - lib/remi/job.rb
220
217
  - lib/remi/lookup/regex_sieve.rb
218
+ - lib/remi/project/features/aggregate.feature
221
219
  - lib/remi/project/features/examples.feature
222
220
  - lib/remi/project/features/formulas.feature
223
221
  - lib/remi/project/features/sample_job.feature
@@ -227,6 +225,7 @@ files:
227
225
  - lib/remi/project/features/transforms/date_diff.feature
228
226
  - lib/remi/project/features/transforms/parse_date.feature
229
227
  - lib/remi/project/features/transforms/prefix.feature
228
+ - lib/remi/project/jobs/aggregate_job.rb
230
229
  - lib/remi/project/jobs/all_jobs_shared.rb
231
230
  - lib/remi/project/jobs/copy_source_job.rb
232
231
  - lib/remi/project/jobs/sample_job.rb
@@ -234,6 +233,8 @@ files:
234
233
  - lib/remi/project/jobs/transforms/parse_date_job.rb
235
234
  - lib/remi/project/jobs/transforms/prefix_job.rb
236
235
  - lib/remi/project/jobs/transforms/transform_jobs.rb
236
+ - lib/remi/refinements/daru.rb
237
+ - lib/remi/refinements/symbolizer.rb
237
238
  - lib/remi/settings.rb
238
239
  - lib/remi/sf_bulk_helper.rb
239
240
  - lib/remi/source_to_target_map.rb
@@ -1,28 +0,0 @@
1
- module Daru
2
- class DataFrame
3
- def monkey_dup
4
- dupdf = Daru::DataFrame.new([], index: self.index)
5
- self.vectors.each do |v|
6
- dupdf[v] = self[v]
7
- end
8
-
9
- dupdf
10
- end
11
-
12
- def monkey_merge(other)
13
- other.vectors.each do |v|
14
- self[v] = other[v]
15
- end
16
-
17
- self
18
- end
19
-
20
- def hash_dump(filename)
21
- File.write(filename, Marshal.dump(self.to_hash))
22
- end
23
-
24
- def self.from_hash_dump(filename)
25
- Daru::DataFrame.new(Marshal.load(File.read(filename)))
26
- end
27
- end
28
- end
@@ -1,8 +0,0 @@
1
- class String
2
- # Strip leading whitespace from each line that is the same as the
3
- # amount of whitespace on the first line of the string.
4
- # Leaves _additional_ indentation on later lines intact.
5
- def unindent
6
- gsub /^#{self[/\A\s*/]}/, ''
7
- end
8
- end