match_reduce 1.0.0.pre.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require_relative 'result'
11
+
12
+ module MatchReduce
13
+ class Processor
14
+ # This class understands how to take an aggregate and derive a result for it.
15
+ class ResultBuilder
16
+ def initialize(aggregate, resolver)
17
+ raise ArgumentError, 'aggregate is required' unless aggregate
18
+ raise ArgumentError, 'resolver is required' unless resolver
19
+
20
+ @aggregate = aggregate
21
+ @resolver = resolver
22
+
23
+ @records = []
24
+ @value = nil
25
+ @group_ids = Set.new
26
+ end
27
+
28
+ def add(record, group_id)
29
+ if aggregate.grouped?
30
+ return self if group_ids.include?(group_id)
31
+
32
+ group_ids << group_id
33
+ end
34
+
35
+ records << record
36
+
37
+ @value = aggregate.reduce(value, record, resolver)
38
+
39
+ self
40
+ end
41
+
42
+ def result
43
+ Result.new(aggregate.name, records, value)
44
+ end
45
+
46
+ private
47
+
48
+ attr_reader :aggregate,
49
+ :group_ids,
50
+ :records,
51
+ :resolver,
52
+ :value
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require_relative 'result_builder'
11
+
12
+ module MatchReduce
13
+ class Processor
14
+ # This class knows how to group together aggregates in order to produce results.
15
+ class ResultsBuilder
16
+ attr_reader :resolver
17
+
18
+ def initialize(aggregates, resolver)
19
+ raise ArgumentError, 'aggregates are required' unless aggregates
20
+
21
+ @result_by_name = aggregates.map { |a| [a.name, ResultBuilder.new(a, resolver)] }.to_h
22
+ @resolver = resolver
23
+
24
+ freeze
25
+ end
26
+
27
+ def add(aggregate, record, group_id)
28
+ tap { result_by_name[aggregate.name].add(record, group_id) }
29
+ end
30
+
31
+ def results
32
+ result_by_name.values.map(&:result)
33
+ end
34
+
35
+ private
36
+
37
+ attr_reader :result_by_name
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require_relative 'processor/results_builder'
11
+
12
+ module MatchReduce
13
+ # This is the main lifecycle of the algorithm. You initialize a new instance of this
14
+ # class using aggregates, then you pump in records into it. Once done, call #results
15
+ # to get the results.
16
+ class Processor
17
+ extend Forwardable
18
+
19
+ def_delegators :results_builder, :results, :resolver
20
+
21
+ def_delegators :index, :aggregates
22
+
23
+ def initialize(aggregates, resolver)
24
+ @index = Index.new(aggregates)
25
+ @results_builder = ResultsBuilder.new(index.aggregates, resolver)
26
+
27
+ freeze
28
+ end
29
+
30
+ def add_each(records)
31
+ tap { records.each { |record| add(record) } }
32
+ end
33
+
34
+ def add(record)
35
+ hit_aggregates = Set.new
36
+
37
+ record_patterns(record).each do |hash_pattern|
38
+ # Each index find hit means the aggregate matched on the record
39
+ index.find(hash_pattern).each do |aggregate|
40
+ next if hit_aggregates.include?(aggregate)
41
+
42
+ add_to_results_builder(aggregate, record)
43
+
44
+ hit_aggregates << aggregate
45
+ end
46
+ end
47
+
48
+ self
49
+ end
50
+
51
+ private
52
+
53
+ attr_reader :index,
54
+ :results_builder
55
+
56
+ def make_group_id(aggregate, record)
57
+ aggregate.group_keys.map { |group_key| resolver.get(record, group_key) }
58
+ end
59
+
60
+ def record_matrix(record)
61
+ index.keys.each_with_object(HashMath::Matrix.new) do |key, memo|
62
+ value = resolver.get(record, key)
63
+
64
+ memo.add_each(key, [value, ANY])
65
+ end
66
+ end
67
+
68
+ def record_patterns(record)
69
+ [{}] + record_matrix(record).to_a
70
+ end
71
+
72
+ def add_to_results_builder(aggregate, record)
73
+ group_id = make_group_id(aggregate, record)
74
+
75
+ results_builder.add(aggregate, record, group_id)
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ module MatchReduce
11
+ VERSION = '1.0.0-alpha'
12
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require 'acts_as_hashable'
11
+ require 'hash_math'
12
+ require 'objectable'
13
+
14
+ require_relative 'match_reduce/any'
15
+ require_relative 'match_reduce/aggregate'
16
+ require_relative 'match_reduce/index'
17
+ require_relative 'match_reduce/processor'
18
+
19
+ # Top-level namespace
20
+ module MatchReduce
21
+ # Define the only instance as a helper constant for the entire library to share.
22
+ # Technically it is not a singleton, but it does not have to be because it will still
23
+ # provide equality where we need it: #hash, #eql?, and #==. We are using this as a
24
+ # special flag indicating: "match on any value". So even if we were to instantiate
25
+ # multiple Any objects, the point is moot.
26
+ ANY = Any.new
27
+
28
+ class << self
29
+ def process(aggregates, records, resolver = Objectable.resolver)
30
+ Processor.new(aggregates, resolver)
31
+ .add_each(records)
32
+ .results
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/match_reduce/version'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'match_reduce'
7
+ s.version = MatchReduce::VERSION
8
+ s.summary = 'Dataset aggregation and reducer algorithm'
9
+
10
+ s.description = <<-DESCRIPTION
11
+ High speed data aggregator and reducer algorithm based on key-value exact matching and grouping.
12
+ DESCRIPTION
13
+
14
+ s.authors = ['Matthew Ruggio']
15
+ s.email = ['mruggio@bluemarblepayroll.com']
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
19
+ s.homepage = 'https://github.com/bluemarblepayroll/match_reduce'
20
+ s.license = 'MIT'
21
+
22
+ s.required_ruby_version = '>= 2.3.8'
23
+
24
+ s.add_dependency('acts_as_hashable', '~>1', '>=1.1.0')
25
+ s.add_dependency('hash_math', '>=1.0.0-alpha')
26
+ s.add_dependency('objectable', '~>1')
27
+
28
+ s.add_development_dependency('guard-rspec', '~>4.7')
29
+ s.add_development_dependency('pry', '~>0')
30
+ s.add_development_dependency('rake', '~> 12')
31
+ s.add_development_dependency('rspec')
32
+ s.add_development_dependency('rubocop', '~>0.74.0')
33
+ s.add_development_dependency('simplecov', '~>0.17.0')
34
+ s.add_development_dependency('simplecov-console', '~>0.5.0')
35
+ end
@@ -0,0 +1,68 @@
1
+ records:
2
+ - { a: a1, b: b1, c: c1, d: d1, e: e1, val: 1 }
3
+ - { a: a2, b: b2, c: c2, d: d2, e: e2, val: 2 }
4
+ - { a: a3, b: b3, c: c3, d: d3, e: e3, val: 3 }
5
+
6
+ aggregates:
7
+ - name: all (no patterns)
8
+ sum_reducer_key: val
9
+ patterns:
10
+ - name: all (match all patterns)
11
+ sum_reducer_key: val
12
+ patterns:
13
+ - a: a1
14
+ - b: b2
15
+ - c: c3
16
+ - name: match 1
17
+ sum_reducer_key: val
18
+ patterns:
19
+ - a: a1
20
+ d: d1
21
+ - z: z2
22
+ a: a2
23
+ - name: match 1 and 3
24
+ sum_reducer_key: val
25
+ patterns:
26
+ - a: a1
27
+ d: d1
28
+ - z: z2
29
+ a: a2
30
+ - a: a3
31
+ c: c3
32
+ - name: no matches
33
+ sum_reducer_key: val
34
+ patterns:
35
+ - a: a1
36
+ b: b2
37
+ - a: a8
38
+ b: a8
39
+ - z: z1
40
+ - A: a1
41
+ - a: A1
42
+ - a: 1
43
+
44
+ results:
45
+ - name: all (no patterns)
46
+ value: 6
47
+ records:
48
+ - { a: a1, b: b1, c: c1, d: d1, e: e1, val: 1 }
49
+ - { a: a2, b: b2, c: c2, d: d2, e: e2, val: 2 }
50
+ - { a: a3, b: b3, c: c3, d: d3, e: e3, val: 3 }
51
+ - name: all (match all patterns)
52
+ value: 6
53
+ records:
54
+ - { a: a1, b: b1, c: c1, d: d1, e: e1, val: 1 }
55
+ - { a: a2, b: b2, c: c2, d: d2, e: e2, val: 2 }
56
+ - { a: a3, b: b3, c: c3, d: d3, e: e3, val: 3 }
57
+ - name: match 1
58
+ value: 1
59
+ records:
60
+ - { a: a1, b: b1, c: c1, d: d1, e: e1, val: 1 }
61
+ - name: match 1 and 3
62
+ value: 4
63
+ records:
64
+ - { a: a1, b: b1, c: c1, d: d1, e: e1, val: 1 }
65
+ - { a: a3, b: b3, c: c3, d: d3, e: e3, val: 3 }
66
+ - name: no matches
67
+ value:
68
+ records: []
@@ -0,0 +1,173 @@
1
+ records:
2
+ - team: bulls
3
+ team_points: 1000
4
+ player: michael_jordan
5
+ player_points: 750
6
+
7
+ - team: bulls
8
+ team_points: 1000
9
+ player: scottie_pippen
10
+ player_points: 150
11
+
12
+ - team: wizards
13
+ team_points: 800
14
+ player: michael_jordan
15
+ player_points: 600
16
+
17
+ - team: wizards
18
+ team_points: 800
19
+ player: rip_hamilton
20
+ player_points: 200
21
+
22
+ - team: pistons
23
+ team_points: 1200
24
+ player: rip_hamilton
25
+ player_points: 300
26
+
27
+ - team: bulls
28
+ team_points: 1000
29
+ player: rip_hamilton
30
+ player_points: 100
31
+
32
+ aggregates:
33
+ - name: total_points_by_team_player
34
+ patterns:
35
+ sum_reducer_key: player_points
36
+
37
+ - name: total_points_by_team
38
+ patterns:
39
+ sum_reducer_key: team_points
40
+ group_keys: team
41
+
42
+ - name: bulls_points_by_team
43
+ patterns:
44
+ team: bulls
45
+ sum_reducer_key: team_points
46
+ group_keys: team
47
+
48
+ - name: wizards_points_by_team
49
+ patterns:
50
+ team: wizards
51
+ sum_reducer_key: team_points
52
+ group_keys: team
53
+
54
+ - name: jordan_points_for_bulls
55
+ patterns:
56
+ team: bulls
57
+ player: michael_jordan
58
+ sum_reducer_key: player_points
59
+
60
+ - name: all_jordan_points
61
+ patterns:
62
+ player: michael_jordan
63
+ sum_reducer_key: player_points
64
+
65
+ - name: rip_points_for_detroit_and_wizards
66
+ patterns:
67
+ - player: rip_hamilton
68
+ team: wizards
69
+ - player: rip_hamilton
70
+ team: pistons
71
+ sum_reducer_key: player_points
72
+
73
+ results:
74
+ - name: total_points_by_team_player
75
+ records:
76
+ - team: bulls
77
+ team_points: 1000
78
+ player: michael_jordan
79
+ player_points: 750
80
+
81
+ - team: bulls
82
+ team_points: 1000
83
+ player: scottie_pippen
84
+ player_points: 150
85
+
86
+ - team: wizards
87
+ team_points: 800
88
+ player: michael_jordan
89
+ player_points: 600
90
+
91
+ - team: wizards
92
+ team_points: 800
93
+ player: rip_hamilton
94
+ player_points: 200
95
+
96
+ - team: pistons
97
+ team_points: 1200
98
+ player: rip_hamilton
99
+ player_points: 300
100
+
101
+ - team: bulls
102
+ team_points: 1000
103
+ player: rip_hamilton
104
+ player_points: 100
105
+ value: 2100
106
+
107
+ - name: total_points_by_team
108
+ records:
109
+ - team: bulls
110
+ team_points: 1000
111
+ player: michael_jordan
112
+ player_points: 750
113
+
114
+ - team: wizards
115
+ team_points: 800
116
+ player: michael_jordan
117
+ player_points: 600
118
+
119
+ - team: pistons
120
+ team_points: 1200
121
+ player: rip_hamilton
122
+ player_points: 300
123
+ value: 3000
124
+
125
+ - name: bulls_points_by_team
126
+ records:
127
+ - team: bulls
128
+ team_points: 1000
129
+ player: michael_jordan
130
+ player_points: 750
131
+ value: 1000
132
+
133
+ - name: wizards_points_by_team
134
+ records:
135
+ - team: wizards
136
+ team_points: 800
137
+ player: michael_jordan
138
+ player_points: 600
139
+ value: 800
140
+
141
+ - name: jordan_points_for_bulls
142
+ records:
143
+ - team: bulls
144
+ team_points: 1000
145
+ player: michael_jordan
146
+ player_points: 750
147
+ value: 750
148
+
149
+ - name: all_jordan_points
150
+ records:
151
+ - team: bulls
152
+ team_points: 1000
153
+ player: michael_jordan
154
+ player_points: 750
155
+
156
+ - team: wizards
157
+ team_points: 800
158
+ player: michael_jordan
159
+ player_points: 600
160
+ value: 1350
161
+
162
+ - name: rip_points_for_detroit_and_wizards
163
+ records:
164
+ - team: wizards
165
+ team_points: 800
166
+ player: rip_hamilton
167
+ player_points: 200
168
+
169
+ - team: pistons
170
+ team_points: 1200
171
+ player: rip_hamilton
172
+ player_points: 300
173
+ value: 500
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require 'spec_helper'
11
+
12
+ describe MatchReduce::Aggregate do
13
+ describe '#initialize' do
14
+ it 'sets patterns to at least one hash' do
15
+ subject = described_class.new(name: :sig1)
16
+
17
+ expect(subject.patterns).to eq([{}])
18
+ end
19
+
20
+ it 'patterns can be a hash' do
21
+ subject = described_class.new(name: :sig1, patterns: { a: :b })
22
+
23
+ expect(subject.patterns).to eq([{ 'a' => :b }])
24
+ end
25
+
26
+ it 'patterns can be an array of hashes' do
27
+ subject = described_class.new(name: :sig1, patterns: [{ a: :b }])
28
+
29
+ expect(subject.patterns).to eq([{ 'a' => :b }])
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require 'spec_helper'
11
+
12
+ describe MatchReduce::Any do
13
+ describe '#hash' do
14
+ it 'should be based on an array of the class name and the symbol :any' do
15
+ expect(described_class.new.hash).to eq(['MatchReduce::Any', :any].hash)
16
+ end
17
+ end
18
+
19
+ describe 'equality' do
20
+ specify '#== should always be equal if the classes and hash are the same' do
21
+ expect(described_class.new).to eq(described_class.new)
22
+
23
+ expect(described_class.new).not_to eq(:any)
24
+ end
25
+
26
+ specify '#eql? should always be equal if the classes and hash are the same' do
27
+ expect(described_class.new).to eql(described_class.new)
28
+
29
+ expect(described_class.new).not_to eq(:any)
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require 'spec_helper'
11
+
12
+ describe MatchReduce::Index do
13
+ def lookup_by_name(lookup)
14
+ lookup.each_with_object({}) do |(pattern, aggregates), memo|
15
+ memo[pattern] = aggregates.map(&:name)
16
+ end
17
+ end
18
+
19
+ let(:base_value) { MatchReduce::ANY }
20
+
21
+ describe '#initialization' do
22
+ context 'constructing aggregates' do
23
+ specify 'when all aggregates when empty' do
24
+ subject = described_class.new
25
+
26
+ expect(subject.aggregates).to eq([])
27
+ end
28
+
29
+ specify 'only each first unique aggregate name is kept' do
30
+ aggregates = [
31
+ { name: :sig3 },
32
+ { name: :sig1 },
33
+ { name: 'sig2' },
34
+ { name: :sig2 },
35
+ { name: :sig3 },
36
+ { name: 'sig4' },
37
+ { name: :sig4 },
38
+ { name: :sig5 },
39
+ { name: :sig5 }
40
+ ]
41
+
42
+ subject = described_class.new(aggregates)
43
+
44
+ expect(subject.aggregates.length).to eq(7)
45
+ end
46
+ end
47
+
48
+ context 'constructing lookup' do
49
+ it 'creates lookup with aggregates' do
50
+ subject = described_class.new
51
+
52
+ expected = {}
53
+
54
+ expect(subject.lookup).to eq(expected)
55
+ end
56
+
57
+ it 'creates lookup with aggregates that have no patterns' do
58
+ aggregates = [
59
+ { name: :sig3 },
60
+ { name: :sig1 }
61
+ ]
62
+
63
+ subject = described_class.new(aggregates)
64
+
65
+ expected = {
66
+ {} => %i[sig3 sig1]
67
+ }
68
+
69
+ expect(lookup_by_name(subject.lookup)).to eq(expected)
70
+ end
71
+
72
+ it 'creates lookup with aggregates that have patterns and no patterns' do
73
+ aggregates = [
74
+ { name: :sig3 },
75
+ { name: :sig1 },
76
+ {
77
+ name: :sig2,
78
+ patterns: { 'a' => '1', 'b' => [nil], 'c' => :c }
79
+ }
80
+ ]
81
+
82
+ subject = described_class.new(aggregates)
83
+
84
+ expected = {
85
+ { 'a' => base_value, 'b' => base_value, 'c' => base_value } => %i[sig3 sig1],
86
+ { 'a' => '1', 'b' => [nil], 'c' => :c } => %i[sig2]
87
+ }
88
+
89
+ expect(lookup_by_name(subject.lookup)).to eq(expected)
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require 'spec_helper'
11
+
12
+ describe MatchReduce::Processor::Result do
13
+ let(:config) do
14
+ {
15
+ name: 'abc',
16
+ records: [
17
+ { something: :else }
18
+ ],
19
+ value: 123
20
+ }
21
+ end
22
+
23
+ let(:args) { config.values }
24
+
25
+ describe 'equality' do
26
+ let(:object_a) { described_class.new(*args) }
27
+
28
+ let(:object_b) { described_class.new(*args) }
29
+
30
+ specify '#== compares class type and attributes' do
31
+ expect(object_a).to eq(object_b)
32
+ end
33
+
34
+ specify '#== compares class type and attributes' do
35
+ expect(object_a).to eql(object_b)
36
+ end
37
+ end
38
+
39
+ specify '#hash is a computed hash of attributes' do
40
+ subject = described_class.new(*args)
41
+
42
+ expected = [
43
+ config[:name], config[:records], config[:value]
44
+ ].hash
45
+
46
+ expect(subject.hash).to eq(expected)
47
+ end
48
+ end