remi 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a68e65a6146a37cea022e1f7cbf18574e16905f7
4
- data.tar.gz: c944f32038d24deff54396591ca98a64b69ba296
3
+ metadata.gz: 2c802335b8494b30ff89e4c31b1ce4df34b2fc8a
4
+ data.tar.gz: b405fbefb668bf07db5bcbb4a0cf6d8550658f13
5
5
  SHA512:
6
- metadata.gz: c016ce1f58bb37da6214c29263ddee66c3b46c9557539210be25517c2676ffc0abd6a3c3fb5b6406290a350c280a277bce0a4aedb14856266d423169f650eeaf
7
- data.tar.gz: 4dc14a6c9653c12dea6ffaaef74ab7d1acc116308ec99e181d6756ae63db8d5a740f6c72e3d8b258f52fce160f8c3c2892405f96c82ce3985a3f11753db690d5
6
+ metadata.gz: e2380bbb4cc87d67cfdb554763f8614cfcc791ceb3b70f711ed2fb975dcaf142438f2937453734479a43d3735ea61c8bcb4e093e3f380b5ae107e2dba5fe9522
7
+ data.tar.gz: 56f42c6b3608157959713478fbeee5cea4898238a6af176ab0b632cc0bb75ec5c60c66fe0ad60b45f6483aee09bb2943ddfabe586a88ef223ceaf9349eeded07
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- remi (0.2.4)
4
+ remi (0.2.6)
5
5
  activesupport (~> 4.2)
6
6
  bond (~> 0.5)
7
7
  cucumber (~> 2.1)
@@ -4,20 +4,39 @@ Feature: Tests the aggregate refinement to the Daru library
4
4
  Given the job is 'Aggregate'
5
5
  And the job source 'Source Data'
6
6
  And the job target 'Target Data'
7
+ And the job target 'Multigroup Target Data'
7
8
 
8
9
  And the source 'Source Data'
9
- And the target 'Target Data'
10
+
10
11
 
11
12
  Scenario: The aggregator should find the minimum year for each 'Alpha'
12
- Given the following example record for 'Source Data':
13
- | Alpha | Year | something |
14
- | a | 2016 | 1 |
15
- | a | 2018 | 1 |
16
- | b | 2016 | 2 |
17
- | b | 2010 | 3 |
18
- | a | 2017 | 4 |
13
+ Given the target 'Target Data'
14
+ And the following example record for 'Source Data':
15
+ | Alpha | Beta | Year |
16
+ | a | aa | 2016 |
17
+ | a | aa | 2018 |
18
+ | b | bb | 2016 |
19
+ | b | bb | 2010 |
20
+ | a | ab | 2017 |
19
21
  And the following example record called 'expected result':
20
22
  | Alpha | Year |
21
23
  | a | Group a has a minimum value of 2016 |
22
24
  | b | Group b has a minimum value of 2010 |
23
25
  Then the target should match the example 'expected result'
26
+
27
+
28
+ Scenario: The aggregator should find the minimum year for each 'Alpha'
29
+ Given the target 'Multigroup Target Data'
30
+ And the following example record for 'Source Data':
31
+ | Alpha | Beta | Year |
32
+ | a | aa | 2016 |
33
+ | a | aa | 2018 |
34
+ | b | bb | 2016 |
35
+ | b | bb | 2010 |
36
+ | a | ab | 2017 |
37
+ And the following example record called 'expected result':
38
+ | Alpha | Beta | Year |
39
+ | a | aa | Group ["a", "aa"] has a minimum value of 2016 |
40
+ | a | ab | Group ["a", "ab"] has a minimum value of 2017 |
41
+ | b | bb | Group ["b", "bb"] has a minimum value of 2010 |
42
+ Then the target should match the example 'expected result'
@@ -6,15 +6,27 @@ class AggregateJob
6
6
 
7
7
  define_source :source_data, Remi::DataSource::DataFrame
8
8
  define_target :target_data, Remi::DataTarget::DataFrame
9
+ define_target :multigroup_target_data, Remi::DataTarget::DataFrame
9
10
 
10
11
  define_transform :main, sources: :source_data, targets: :target_data do
11
-
12
12
  mymin = lambda do |field, df, group_key, indicies|
13
13
  values = indicies.map { |idx| df.row[idx][field] }
14
14
  "Group #{group_key} has a minimum value of #{values.min}"
15
15
  end
16
16
 
17
+ # Daru groups don't use the index of the dataframe when returning groups (WTF?).
18
+ # Instead they return the position of the record in the dataframe. Here, we
19
+ # shift the indexes which causes a failure if this artifact is not handled
20
+ # properly in the aggregate function
21
+ source_data.df.index = Daru::Index.new(1.upto(source_data.df.size).to_a)
22
+
17
23
  target_data.df = source_data.df.aggregate(by: :alpha, func: mymin.curry.(:year)).detach_index
18
24
  target_data.df.vectors = Daru::Index.new([:alpha, :year])
25
+
26
+ multigroup_target_data.df = source_data.df.aggregate(by: [:alpha,:beta], func: mymin.curry.(:year)).detach_index
27
+ multigroup_target_data.df.vectors = Daru::Index.new([:alpha_beta, :year])
28
+
29
+
30
+
19
31
  end
20
32
  end
@@ -29,8 +29,8 @@ module Remi
29
29
  # Example:
30
30
  # df = Daru::DataFrame.new( { a: ['a','a','a','b','b'], year: ['2018','2015','2019', '2014', '2013'] })
31
31
  #
32
- # mymin = lambda do |field, df, group_key, indicies|
33
- # values = indicies.map { |idx| df.row[idx][field] }
32
+ # mymin = lambda do |field, df, group_key, indices|
33
+ # values = indices.map { |idx| df.row[idx][field] }
34
34
  # "Group #{group_key} has a minimum value of #{values.min}"
35
35
  # end
36
36
  #
@@ -40,10 +40,14 @@ module Remi
40
40
  # Returns a Daru::Vector.
41
41
  def aggregate(by:, func:)
42
42
  grouped = self.group_by(by)
43
+ df_indices = self.index.to_a
43
44
  ::Daru::Vector.new(
44
- grouped.groups.reduce({}) do |h, (key, indicies)|
45
+ grouped.groups.reduce({}) do |h, (key, indices)|
46
+ # Daru groups don't use the index of the dataframe when returning groups (WTF?).
47
+ # Instead they return the position of the record in the dataframe. Here, we
48
+ group_df_indices = indices.map { |v| df_indices[v] }
45
49
  group_key = key.size == 1 ? key.first : key
46
- h[group_key] = func.(self, group_key, indicies)
50
+ h[group_key] = func.(self, group_key, group_df_indices)
47
51
  h
48
52
  end
49
53
  )
data/lib/remi/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Remi
2
- VERSION = '0.2.5'
2
+ VERSION = '0.2.6'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remi
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sterling Paramore