remi 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0e96cbd80b9948b85124da0bb1a7b5618c0692ec
4
- data.tar.gz: 8eee478cbfdb78fc3c670c5fb2978f076b9d4673
3
+ metadata.gz: 92b5bf2de20c5287b1879fb4602e39e71da6f6a7
4
+ data.tar.gz: 05071d5457f84bbc42cf14fc4ee566c658492962
5
5
  SHA512:
6
- metadata.gz: c355c76c0abbbd1bb9f37ff7d4b384b8c54772bbdfad3d1eff0a4531c3da063e9bb777aca0e69936cd5047c7f059b1f393da889b9ac047cbe0d77411d9e9fc18
7
- data.tar.gz: 87df4f46a2f4f1127ccd885baf89e2466829dd56db840ce2d227c2ae8a7a96ec145873aac2ab9220a15b78158b8ff0afde56f314a6ecfa3a8c5a5523df89a154
6
+ metadata.gz: d41fdb4d52f15b3632538fffc3d987c35b204ddb3a68f06db71ab44073884250ec7be7495531fa2965365a4d41a9508bb346d31dd91c203c877b4ed5821c83d3
7
+ data.tar.gz: 92be48c0d23b22dbcfbaca25a43e94edda2c89947abf6505425a6439a665b99f39e317daaf6190bf4f0e3c7086ae73178c2dd688c400aeefefd8c736963c7264
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- remi (0.2.1)
4
+ remi (0.2.3)
5
5
  activesupport (~> 4.2)
6
6
  bond (~> 0.5)
7
7
  cucumber (~> 2.1)
data/lib/remi.rb CHANGED
@@ -18,6 +18,8 @@ require 'active_support/core_ext/object/blank'
18
18
  require 'active_support/core_ext/object/try'
19
19
  require 'active_support/core_ext/object/inclusion'
20
20
  require 'active_support/core_ext/string/inflections'
21
+ require 'active_support/core_ext/string/strip'
22
+ require 'active_support/core_ext/string/filters'
21
23
  require 'active_support/core_ext/numeric/time'
22
24
  require 'active_support/core_ext/numeric/conversions'
23
25
  require 'active_support/core_ext/date/calculations'
@@ -30,7 +32,6 @@ require 'salesforce_bulk_api'
30
32
 
31
33
  # Remi
32
34
  require 'remi/version.rb'
33
- require 'remi/core/string.rb'
34
35
 
35
36
  require 'remi/settings'
36
37
  require 'remi/job'
@@ -39,8 +40,8 @@ require 'remi/field_symbolizers'
39
40
  require 'remi/data_subject'
40
41
  require 'remi/sf_bulk_helper' # separate into SF support package
41
42
 
42
- require 'remi/core/daru'
43
- require 'remi/core/refinements'
43
+ require 'remi/refinements/symbolizer'
44
+ require 'remi/refinements/daru'
44
45
 
45
46
  require 'remi/extractor/sftp_file'
46
47
 
data/lib/remi/cli.rb CHANGED
@@ -15,7 +15,7 @@ module Remi
15
15
  options = {}
16
16
 
17
17
  opt_parser = OptionParser.new do |opts|
18
- opts.banner = <<-EOT.unindent
18
+ opts.banner = <<-EOT.strip_heredoc
19
19
  Usage: Command line helpers for Remi.
20
20
  EOT
21
21
 
@@ -1,5 +1,5 @@
1
1
  module Remi::BusinessRules
2
- using Remi::Core::Refinements
2
+ using Remi::Refinements::Symbolizer
3
3
 
4
4
  def self.parse_full_field(full_field_name)
5
5
  full_field_name.split(':').map(&:strip)
@@ -230,6 +230,15 @@ module Remi::BusinessRules
230
230
  @data_obj.df.size
231
231
  end
232
232
 
233
+ # Public: Converts the data subject to a hash where the keys are the table
234
+ # columns and the values are an array for the value of column for each row.
235
+ def column_hash
236
+ @data_obj.df.to_hash.reduce({}) do |h, (k,v)|
237
+ h[k.symbolize] = v.to_a
238
+ h
239
+ end
240
+ end
241
+
233
242
  # For debugging only
234
243
  def _df
235
244
  @data_obj.df
@@ -429,6 +438,17 @@ module Remi::BusinessRules
429
438
  end
430
439
  df
431
440
  end
441
+
442
+ # Public: Converts a Cucumber::Ast::Table to a hash where the keys are the table
443
+ # columns and the values are an array for the value of column for each row.
444
+ def column_hash
445
+ @table.hashes.reduce({}) do |h, row|
446
+ row.each do |k,v|
447
+ (h[k.symbolize] ||= []) << v
448
+ end
449
+ h
450
+ end
451
+ end
432
452
  end
433
453
 
434
454
 
@@ -0,0 +1,23 @@
1
+ Feature: Tests the aggregate refinement to the Daru library
2
+
3
+ Background:
4
+ Given the job is 'Aggregate'
5
+ And the job source 'Source Data'
6
+ And the job target 'Target Data'
7
+
8
+ And the source 'Source Data'
9
+ And the target 'Target Data'
10
+
11
+ Scenario: The aggregator should find the minimum year for each 'Alpha'
12
+ Given the following example record for 'Source Data':
13
+ | Alpha | Year | something |
14
+ | a | 2016 | 1 |
15
+ | a | 2018 | 1 |
16
+ | b | 2016 | 2 |
17
+ | b | 2010 | 3 |
18
+ | a | 2017 | 4 |
19
+ And the following example record called 'expected result':
20
+ | Alpha | Year |
21
+ | a | 2016 |
22
+ | b | 2010 |
23
+ Then the target should match the example 'expected result'
@@ -201,6 +201,22 @@ Then /^the target field '(.+)' is the date (.+)$/ do |target_field, date_referen
201
201
  step "the target field '#{target_field}' is set to the value \"*#{date_reference}*\""
202
202
  end
203
203
 
204
+ Then /^the target '(.+)' should match the example '([[:alnum:]\s]+)'$/ do |target_name, example_name|
205
+ @brt.run_transforms
206
+
207
+ target_hash = @brt.targets[target_name].column_hash
208
+ example_hash = @brt.examples[example_name].column_hash
209
+ common_keys = target_hash.keys & example_hash.keys
210
+
211
+ expect(target_hash.select { |k,v| common_keys.include? k })
212
+ .to eq example_hash.select { |k,v| common_keys.include? k }
213
+ end
214
+
215
+ Then /^the target should match the example '([[:alnum:]\s]+)'$/ do |example_name|
216
+ target_name = @brt.targets.keys.first
217
+ step "the target '#{target_name}' should match the example '#{example_name}'"
218
+ end
219
+
204
220
 
205
221
  ### Transforms
206
222
 
@@ -1,3 +1,4 @@
1
1
  require_relative '../../jobs/sample_job'
2
2
  require_relative '../../jobs/copy_source_job'
3
3
  require_relative '../../jobs/transforms/transform_jobs'
4
+ require_relative '../../jobs/aggregate_job'
@@ -0,0 +1,20 @@
1
+ require_relative 'all_jobs_shared'
2
+
3
+ class AggregateJob
4
+ include AllJobsShared
5
+ using Remi::Refinements::Daru
6
+
7
+ define_source :source_data, Remi::DataSource::DataFrame
8
+ define_target :target_data, Remi::DataTarget::DataFrame
9
+
10
+ define_transform :main, sources: :source_data, targets: :target_data do
11
+
12
+ mymin = lambda do |field, df, indicies|
13
+ values = indicies.map { |idx| df.row[idx][field] }
14
+ values.min
15
+ end
16
+
17
+ target_data.df = source_data.df.aggregate(by: :alpha, func: mymin.curry.(:year)).detach_index
18
+ target_data.df.vectors = Daru::Index.new([:alpha, :year])
19
+ end
20
+ end
@@ -2,11 +2,12 @@ require_relative 'all_jobs_shared'
2
2
 
3
3
  class CopySourceJob
4
4
  include AllJobsShared
5
+ using Remi::Refinements::Daru
5
6
 
6
7
  define_source :source_data, Remi::DataSource::DataFrame
7
8
  define_source :target_data, Remi::DataSource::DataFrame
8
9
 
9
10
  define_transform :main, sources: :source_data, targets: :target_data do
10
- target_data.df = source_data.df.monkey_dup
11
+ target_data.df = source_data.df.dup
11
12
  end
12
13
  end
@@ -4,6 +4,7 @@ require_relative 'all_jobs_shared'
4
4
 
5
5
  class SampleJob
6
6
  include AllJobsShared
7
+ using Remi::Refinements::Daru
7
8
 
8
9
  define_source :existing_contacts, Remi::DataSource::Salesforce,
9
10
  object: :Contact,
@@ -77,7 +78,7 @@ class SampleJob
77
78
  define_transform :map_common_fields, sources: [:sample_file, :existing_contacts], targets: :all_contacts do
78
79
 
79
80
  # Exclude all source records with an invalid program name
80
- all_contacts.df = sample_file.df.monkey_dup
81
+ all_contacts.df = sample_file.df.dup
81
82
  Remi::SourceToTargetMap.apply(all_contacts.df) do
82
83
  map source(:program) .target(:Major__c)
83
84
  .transform(Remi::Transform[:lookup][program_name_lookup])
@@ -0,0 +1,58 @@
1
+ module Remi
2
+ module Refinements
3
+ module Daru
4
+ refine ::Daru::DataFrame do
5
+
6
+ # Public: Fixes dup issues in the Daru library (vectors not being duped).
7
+ def dup
8
+ dupdf = ::Daru::DataFrame.new([], index: self.index)
9
+ self.vectors.each do |v|
10
+ dupdf[v] = self[v]
11
+ end
12
+
13
+ dupdf
14
+ end
15
+
16
+ # Public: Saves a Dataframe to a file.
17
+ def hash_dump(filename)
18
+ File.write(filename, Marshal.dump(self.to_hash))
19
+ end
20
+
21
+ # Public: Creates a DataFrame by reading the dumped version from a file.
22
+ def self.from_hash_dump(filename)
23
+ ::Daru::DataFrame.new(Marshal.load(File.read(filename)))
24
+ end
25
+
26
+ # Public: Allows the user to define an arbitrary aggregation function.
27
+ #
28
+ # by - The name of the DataFrame vector to use to group records.
29
+ # func - A lambda function that accepts two arguments - the first argument
30
+ # is the DataFrame and the second is the index of the elements belonging
31
+ # to a group.
32
+ #
33
+ # Example:
34
+ # df = Daru::DataFrame.new( { a: ['a','a','a','b','b'], year: ['2018','2015','2019', '2014', '2013'] })
35
+ #
36
+ # mymin = lambda do |field, df, indicies|
37
+ # values = indicies.map { |idx| df.row[idx][field] }
38
+ # values.min
39
+ # end
40
+ #
41
+ # df.aggregate(by: :a, func: mymin.curry.(:year))
42
+ #
43
+ #
44
+ # Returns a Daru::Vector.
45
+ def aggregate(by:, func:)
46
+ grouped = self.group_by(by)
47
+ ::Daru::Vector.new(
48
+ grouped.groups.reduce({}) do |h, (key, indicies)|
49
+ h[key.size == 1 ? key.first : key] = func.(self, indicies)
50
+ h
51
+ end
52
+ )
53
+ end
54
+
55
+ end
56
+ end
57
+ end
58
+ end
@@ -1,6 +1,6 @@
1
1
  module Remi
2
- module Core
3
- module Refinements
2
+ module Refinements
3
+ module Symbolizer
4
4
  refine String do
5
5
  def symbolize(symbolizer=nil)
6
6
  if symbolizer
data/lib/remi/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Remi
2
- VERSION = '0.2.2'
2
+ VERSION = '0.2.3'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remi
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sterling Paramore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-21 00:00:00.000000000 Z
11
+ date: 2016-01-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daru
@@ -198,9 +198,6 @@ files:
198
198
  - doc/install-rbenv-os_x.md
199
199
  - lib/remi.rb
200
200
  - lib/remi/cli.rb
201
- - lib/remi/core/daru.rb
202
- - lib/remi/core/refinements.rb
203
- - lib/remi/core/string.rb
204
201
  - lib/remi/cucumber.rb
205
202
  - lib/remi/cucumber/business_rules.rb
206
203
  - lib/remi/cucumber/data_source.rb
@@ -218,6 +215,7 @@ files:
218
215
  - lib/remi/field_symbolizers.rb
219
216
  - lib/remi/job.rb
220
217
  - lib/remi/lookup/regex_sieve.rb
218
+ - lib/remi/project/features/aggregate.feature
221
219
  - lib/remi/project/features/examples.feature
222
220
  - lib/remi/project/features/formulas.feature
223
221
  - lib/remi/project/features/sample_job.feature
@@ -227,6 +225,7 @@ files:
227
225
  - lib/remi/project/features/transforms/date_diff.feature
228
226
  - lib/remi/project/features/transforms/parse_date.feature
229
227
  - lib/remi/project/features/transforms/prefix.feature
228
+ - lib/remi/project/jobs/aggregate_job.rb
230
229
  - lib/remi/project/jobs/all_jobs_shared.rb
231
230
  - lib/remi/project/jobs/copy_source_job.rb
232
231
  - lib/remi/project/jobs/sample_job.rb
@@ -234,6 +233,8 @@ files:
234
233
  - lib/remi/project/jobs/transforms/parse_date_job.rb
235
234
  - lib/remi/project/jobs/transforms/prefix_job.rb
236
235
  - lib/remi/project/jobs/transforms/transform_jobs.rb
236
+ - lib/remi/refinements/daru.rb
237
+ - lib/remi/refinements/symbolizer.rb
237
238
  - lib/remi/settings.rb
238
239
  - lib/remi/sf_bulk_helper.rb
239
240
  - lib/remi/source_to_target_map.rb
@@ -1,28 +0,0 @@
1
- module Daru
2
- class DataFrame
3
- def monkey_dup
4
- dupdf = Daru::DataFrame.new([], index: self.index)
5
- self.vectors.each do |v|
6
- dupdf[v] = self[v]
7
- end
8
-
9
- dupdf
10
- end
11
-
12
- def monkey_merge(other)
13
- other.vectors.each do |v|
14
- self[v] = other[v]
15
- end
16
-
17
- self
18
- end
19
-
20
- def hash_dump(filename)
21
- File.write(filename, Marshal.dump(self.to_hash))
22
- end
23
-
24
- def self.from_hash_dump(filename)
25
- Daru::DataFrame.new(Marshal.load(File.read(filename)))
26
- end
27
- end
28
- end
@@ -1,8 +0,0 @@
1
- class String
2
- # Strip leading whitespace from each line that is the same as the
3
- # amount of whitespace on the first line of the string.
4
- # Leaves _additional_ indentation on later lines intact.
5
- def unindent
6
- gsub /^#{self[/\A\s*/]}/, ''
7
- end
8
- end