remi 0.2.5 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a68e65a6146a37cea022e1f7cbf18574e16905f7
4
- data.tar.gz: c944f32038d24deff54396591ca98a64b69ba296
3
+ metadata.gz: 2c802335b8494b30ff89e4c31b1ce4df34b2fc8a
4
+ data.tar.gz: b405fbefb668bf07db5bcbb4a0cf6d8550658f13
5
5
  SHA512:
6
- metadata.gz: c016ce1f58bb37da6214c29263ddee66c3b46c9557539210be25517c2676ffc0abd6a3c3fb5b6406290a350c280a277bce0a4aedb14856266d423169f650eeaf
7
- data.tar.gz: 4dc14a6c9653c12dea6ffaaef74ab7d1acc116308ec99e181d6756ae63db8d5a740f6c72e3d8b258f52fce160f8c3c2892405f96c82ce3985a3f11753db690d5
6
+ metadata.gz: e2380bbb4cc87d67cfdb554763f8614cfcc791ceb3b70f711ed2fb975dcaf142438f2937453734479a43d3735ea61c8bcb4e093e3f380b5ae107e2dba5fe9522
7
+ data.tar.gz: 56f42c6b3608157959713478fbeee5cea4898238a6af176ab0b632cc0bb75ec5c60c66fe0ad60b45f6483aee09bb2943ddfabe586a88ef223ceaf9349eeded07
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- remi (0.2.4)
4
+ remi (0.2.6)
5
5
  activesupport (~> 4.2)
6
6
  bond (~> 0.5)
7
7
  cucumber (~> 2.1)
@@ -4,20 +4,39 @@ Feature: Tests the aggregate refinement to the Daru library
4
4
  Given the job is 'Aggregate'
5
5
  And the job source 'Source Data'
6
6
  And the job target 'Target Data'
7
+ And the job target 'Multigroup Target Data'
7
8
 
8
9
  And the source 'Source Data'
9
- And the target 'Target Data'
10
+
10
11
 
11
12
  Scenario: The aggregator should find the minimum year for each 'Alpha'
12
- Given the following example record for 'Source Data':
13
- | Alpha | Year | something |
14
- | a | 2016 | 1 |
15
- | a | 2018 | 1 |
16
- | b | 2016 | 2 |
17
- | b | 2010 | 3 |
18
- | a | 2017 | 4 |
13
+ Given the target 'Target Data'
14
+ And the following example record for 'Source Data':
15
+ | Alpha | Beta | Year |
16
+ | a | aa | 2016 |
17
+ | a | aa | 2018 |
18
+ | b | bb | 2016 |
19
+ | b | bb | 2010 |
20
+ | a | ab | 2017 |
19
21
  And the following example record called 'expected result':
20
22
  | Alpha | Year |
21
23
  | a | Group a has a minimum value of 2016 |
22
24
  | b | Group b has a minimum value of 2010 |
23
25
  Then the target should match the example 'expected result'
26
+
27
+
28
+ Scenario: The aggregator should find the minimum year for each 'Alpha'
29
+ Given the target 'Multigroup Target Data'
30
+ And the following example record for 'Source Data':
31
+ | Alpha | Beta | Year |
32
+ | a | aa | 2016 |
33
+ | a | aa | 2018 |
34
+ | b | bb | 2016 |
35
+ | b | bb | 2010 |
36
+ | a | ab | 2017 |
37
+ And the following example record called 'expected result':
38
+ | Alpha | Beta | Year |
39
+ | a | aa | Group ["a", "aa"] has a minimum value of 2016 |
40
+ | a | ab | Group ["a", "ab"] has a minimum value of 2017 |
41
+ | b | bb | Group ["b", "bb"] has a minimum value of 2010 |
42
+ Then the target should match the example 'expected result'
@@ -6,15 +6,27 @@ class AggregateJob
6
6
 
7
7
  define_source :source_data, Remi::DataSource::DataFrame
8
8
  define_target :target_data, Remi::DataTarget::DataFrame
9
+ define_target :multigroup_target_data, Remi::DataTarget::DataFrame
9
10
 
10
11
  define_transform :main, sources: :source_data, targets: :target_data do
11
-
12
12
  mymin = lambda do |field, df, group_key, indicies|
13
13
  values = indicies.map { |idx| df.row[idx][field] }
14
14
  "Group #{group_key} has a minimum value of #{values.min}"
15
15
  end
16
16
 
17
+ # Daru groups don't use the index of the dataframe when returning groups (WTF?).
18
+ # Instead they return the position of the record in the dataframe. Here, we
19
+ # shift the indexes which causes a failure if this artifact is not handled
20
+ # properly in the aggregate function
21
+ source_data.df.index = Daru::Index.new(1.upto(source_data.df.size).to_a)
22
+
17
23
  target_data.df = source_data.df.aggregate(by: :alpha, func: mymin.curry.(:year)).detach_index
18
24
  target_data.df.vectors = Daru::Index.new([:alpha, :year])
25
+
26
+ multigroup_target_data.df = source_data.df.aggregate(by: [:alpha,:beta], func: mymin.curry.(:year)).detach_index
27
+ multigroup_target_data.df.vectors = Daru::Index.new([:alpha_beta, :year])
28
+
29
+
30
+
19
31
  end
20
32
  end
@@ -29,8 +29,8 @@ module Remi
29
29
  # Example:
30
30
  # df = Daru::DataFrame.new( { a: ['a','a','a','b','b'], year: ['2018','2015','2019', '2014', '2013'] })
31
31
  #
32
- # mymin = lambda do |field, df, group_key, indicies|
33
- # values = indicies.map { |idx| df.row[idx][field] }
32
+ # mymin = lambda do |field, df, group_key, indices|
33
+ # values = indices.map { |idx| df.row[idx][field] }
34
34
  # "Group #{group_key} has a minimum value of #{values.min}"
35
35
  # end
36
36
  #
@@ -40,10 +40,14 @@ module Remi
40
40
  # Returns a Daru::Vector.
41
41
  def aggregate(by:, func:)
42
42
  grouped = self.group_by(by)
43
+ df_indices = self.index.to_a
43
44
  ::Daru::Vector.new(
44
- grouped.groups.reduce({}) do |h, (key, indicies)|
45
+ grouped.groups.reduce({}) do |h, (key, indices)|
46
+ # Daru groups don't use the index of the dataframe when returning groups (WTF?).
47
+ # Instead they return the position of the record in the dataframe. Here, we
48
+ group_df_indices = indices.map { |v| df_indices[v] }
45
49
  group_key = key.size == 1 ? key.first : key
46
- h[group_key] = func.(self, group_key, indicies)
50
+ h[group_key] = func.(self, group_key, group_df_indices)
47
51
  h
48
52
  end
49
53
  )
data/lib/remi/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Remi
2
- VERSION = '0.2.5'
2
+ VERSION = '0.2.6'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remi
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sterling Paramore