RubyGems - remi - Versions diffs - 0.2.5 → 0.2.6 - Mend

remi 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/Gemfile.lock +1 -1
data/lib/remi/project/features/aggregate.feature +27 -8
data/lib/remi/project/jobs/aggregate_job.rb +13 -1
data/lib/remi/refinements/daru.rb +8 -4
data/lib/remi/version.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: a68e65a6146a37cea022e1f7cbf18574e16905f7
-  data.tar.gz: c944f32038d24deff54396591ca98a64b69ba296
+  metadata.gz: 2c802335b8494b30ff89e4c31b1ce4df34b2fc8a
+  data.tar.gz: b405fbefb668bf07db5bcbb4a0cf6d8550658f13
 SHA512:
-  metadata.gz: c016ce1f58bb37da6214c29263ddee66c3b46c9557539210be25517c2676ffc0abd6a3c3fb5b6406290a350c280a277bce0a4aedb14856266d423169f650eeaf
-  data.tar.gz: 4dc14a6c9653c12dea6ffaaef74ab7d1acc116308ec99e181d6756ae63db8d5a740f6c72e3d8b258f52fce160f8c3c2892405f96c82ce3985a3f11753db690d5
+  metadata.gz: e2380bbb4cc87d67cfdb554763f8614cfcc791ceb3b70f711ed2fb975dcaf142438f2937453734479a43d3735ea61c8bcb4e093e3f380b5ae107e2dba5fe9522
+  data.tar.gz: 56f42c6b3608157959713478fbeee5cea4898238a6af176ab0b632cc0bb75ec5c60c66fe0ad60b45f6483aee09bb2943ddfabe586a88ef223ceaf9349eeded07

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    remi (0.2.4)
+    remi (0.2.6)
       activesupport (~> 4.2)
       bond (~> 0.5)
       cucumber (~> 2.1)

data/lib/remi/project/features/aggregate.feature CHANGED Viewed

@@ -4,20 +4,39 @@ Feature: Tests the aggregate refinement to the Daru library
     Given the job is 'Aggregate'
     And the job source 'Source Data'
     And the job target 'Target Data'
+    And the job target 'Multigroup Target Data'
     And the source 'Source Data'
-    And the target 'Target Data'
   Scenario: The aggregator should find the minimum year for each 'Alpha'
-    Given the following example record for 'Source Data':
-      | Alpha | Year | something |
-      | a     | 2016 | 1 |
-      | a     | 2018 | 1 |
-      | b     | 2016 | 2 |
-      | b     | 2010 | 3 |
-      | a     | 2017 | 4 |
+    Given the target 'Target Data'
+    And the following example record for 'Source Data':
+      | Alpha | Beta | Year |
+      | a     | aa   | 2016 |
+      | a     | aa   | 2018 |
+      | b     | bb   | 2016 |
+      | b     | bb   | 2010 |
+      | a     | ab   | 2017 |
     And the following example record called 'expected result':
       | Alpha | Year |
       | a     | Group a has a minimum value of 2016 |
       | b     | Group b has a minimum value of 2010 |
     Then the target should match the example 'expected result'
+  Scenario: The aggregator should find the minimum year for each 'Alpha'
+    Given the target 'Multigroup Target Data'
+    And the following example record for 'Source Data':
+      | Alpha | Beta | Year |
+      | a     | aa   | 2016 |
+      | a     | aa   | 2018 |
+      | b     | bb   | 2016 |
+      | b     | bb   | 2010 |
+      | a     | ab   | 2017 |
+    And the following example record called 'expected result':
+      | Alpha | Beta | Year |
+      | a     | aa   | Group ["a", "aa"] has a minimum value of 2016 |
+      | a     | ab   | Group ["a", "ab"] has a minimum value of 2017 |
+      | b     | bb   | Group ["b", "bb"] has a minimum value of 2010 |
+    Then the target should match the example 'expected result'

data/lib/remi/project/jobs/aggregate_job.rb CHANGED Viewed

@@ -6,15 +6,27 @@ class AggregateJob
   define_source :source_data, Remi::DataSource::DataFrame
   define_target :target_data, Remi::DataTarget::DataFrame
+  define_target :multigroup_target_data, Remi::DataTarget::DataFrame
   define_transform :main, sources: :source_data, targets: :target_data do
     mymin = lambda do |field, df, group_key, indicies|
       values = indicies.map { |idx| df.row[idx][field] }
       "Group #{group_key} has a minimum value of #{values.min}"
     end
+    # Daru groups don't use the index of the dataframe when returning groups (WTF?).
+    # Instead they return the position of the record in the dataframe.  Here, we
+    # shift the indexes which causes a failure if this artifact is not handled
+    # properly in the aggregate function
+    source_data.df.index = Daru::Index.new(1.upto(source_data.df.size).to_a)
     target_data.df = source_data.df.aggregate(by: :alpha, func: mymin.curry.(:year)).detach_index
     target_data.df.vectors = Daru::Index.new([:alpha, :year])
+    multigroup_target_data.df = source_data.df.aggregate(by: [:alpha,:beta], func: mymin.curry.(:year)).detach_index
+    multigroup_target_data.df.vectors = Daru::Index.new([:alpha_beta, :year])
   end
 end

data/lib/remi/refinements/daru.rb CHANGED Viewed

@@ -29,8 +29,8 @@ module Remi
         # Example:
         #   df = Daru::DataFrame.new( { a: ['a','a','a','b','b'], year: ['2018','2015','2019', '2014', '2013'] })
         #
-        #   mymin = lambda do |field, df, group_key, indicies|
-        #     values = indicies.map { |idx| df.row[idx][field] }
+        #   mymin = lambda do |field, df, group_key, indices|
+        #     values = indices.map { |idx| df.row[idx][field] }
         #     "Group #{group_key} has a minimum value of #{values.min}"
         #   end
         #
@@ -40,10 +40,14 @@ module Remi
         # Returns a Daru::Vector.
         def aggregate(by:, func:)
           grouped = self.group_by(by)
+          df_indices = self.index.to_a
           ::Daru::Vector.new(
-            grouped.groups.reduce({}) do |h, (key, indicies)|
+            grouped.groups.reduce({}) do |h, (key, indices)|
+              # Daru groups don't use the index of the dataframe when returning groups (WTF?).
+              # Instead they return the position of the record in the dataframe.  Here, we
+              group_df_indices = indices.map { |v| df_indices[v] }
               group_key = key.size == 1 ? key.first : key
-              h[group_key] = func.(self, group_key, indicies)
+              h[group_key] = func.(self, group_key, group_df_indices)
               h
             end
           )

data/lib/remi/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Remi
-  VERSION = '0.2.5'
+  VERSION = '0.2.6'
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: remi
 version: !ruby/object:Gem::Version
-  version: 0.2.5
+  version: 0.2.6
 platform: ruby
 authors:
 - Sterling Paramore