match_reduce 1.0.0.pre.alpha

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require_relative 'result'
11
+
12
+ module MatchReduce
13
+ class Processor
14
+ # This class understands how to take an aggregate and derive a result for it.
15
+ class ResultBuilder
16
+ def initialize(aggregate, resolver)
17
+ raise ArgumentError, 'aggregate is required' unless aggregate
18
+ raise ArgumentError, 'resolver is required' unless resolver
19
+
20
+ @aggregate = aggregate
21
+ @resolver = resolver
22
+
23
+ @records = []
24
+ @value = nil
25
+ @group_ids = Set.new
26
+ end
27
+
28
+ def add(record, group_id)
29
+ if aggregate.grouped?
30
+ return self if group_ids.include?(group_id)
31
+
32
+ group_ids << group_id
33
+ end
34
+
35
+ records << record
36
+
37
+ @value = aggregate.reduce(value, record, resolver)
38
+
39
+ self
40
+ end
41
+
42
+ def result
43
+ Result.new(aggregate.name, records, value)
44
+ end
45
+
46
+ private
47
+
48
+ attr_reader :aggregate,
49
+ :group_ids,
50
+ :records,
51
+ :resolver,
52
+ :value
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require_relative 'result_builder'
11
+
12
+ module MatchReduce
13
+ class Processor
14
+ # This class knows how to group together aggregates in order to produce results.
15
+ class ResultsBuilder
16
+ attr_reader :resolver
17
+
18
+ def initialize(aggregates, resolver)
19
+ raise ArgumentError, 'aggregates are required' unless aggregates
20
+
21
+ @result_by_name = aggregates.map { |a| [a.name, ResultBuilder.new(a, resolver)] }.to_h
22
+ @resolver = resolver
23
+
24
+ freeze
25
+ end
26
+
27
+ def add(aggregate, record, group_id)
28
+ tap { result_by_name[aggregate.name].add(record, group_id) }
29
+ end
30
+
31
+ def results
32
+ result_by_name.values.map(&:result)
33
+ end
34
+
35
+ private
36
+
37
+ attr_reader :result_by_name
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require_relative 'processor/results_builder'
11
+
12
+ module MatchReduce
13
+ # This is the main lifecycle of the algorithm. You initialize a new instance of this
14
+ # class using aggregates, then you pump in records into it. Once done, call #results
15
+ # to get the results.
16
+ class Processor
17
+ extend Forwardable
18
+
19
+ def_delegators :results_builder, :results, :resolver
20
+
21
+ def_delegators :index, :aggregates
22
+
23
+ def initialize(aggregates, resolver)
24
+ @index = Index.new(aggregates)
25
+ @results_builder = ResultsBuilder.new(index.aggregates, resolver)
26
+
27
+ freeze
28
+ end
29
+
30
+ def add_each(records)
31
+ tap { records.each { |record| add(record) } }
32
+ end
33
+
34
+ def add(record)
35
+ hit_aggregates = Set.new
36
+
37
+ record_patterns(record).each do |hash_pattern|
38
+ # Each index find hit means the aggregate matched on the record
39
+ index.find(hash_pattern).each do |aggregate|
40
+ next if hit_aggregates.include?(aggregate)
41
+
42
+ add_to_results_builder(aggregate, record)
43
+
44
+ hit_aggregates << aggregate
45
+ end
46
+ end
47
+
48
+ self
49
+ end
50
+
51
+ private
52
+
53
+ attr_reader :index,
54
+ :results_builder
55
+
56
+ def make_group_id(aggregate, record)
57
+ aggregate.group_keys.map { |group_key| resolver.get(record, group_key) }
58
+ end
59
+
60
+ def record_matrix(record)
61
+ index.keys.each_with_object(HashMath::Matrix.new) do |key, memo|
62
+ value = resolver.get(record, key)
63
+
64
+ memo.add_each(key, [value, ANY])
65
+ end
66
+ end
67
+
68
+ def record_patterns(record)
69
+ [{}] + record_matrix(record).to_a
70
+ end
71
+
72
+ def add_to_results_builder(aggregate, record)
73
+ group_id = make_group_id(aggregate, record)
74
+
75
+ results_builder.add(aggregate, record, group_id)
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ module MatchReduce
11
+ VERSION = '1.0.0-alpha'
12
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require 'acts_as_hashable'
11
+ require 'hash_math'
12
+ require 'objectable'
13
+
14
+ require_relative 'match_reduce/any'
15
+ require_relative 'match_reduce/aggregate'
16
+ require_relative 'match_reduce/index'
17
+ require_relative 'match_reduce/processor'
18
+
19
+ # Top-level namespace
20
+ module MatchReduce
21
+ # Define the only instance as a helper constant for the entire library to share.
22
+ # Technically it is not a singleton, but it does not have to be because it will still
23
+ # provide equality where we need it: #hash, #eql?, and #==. We are using this as a
24
+ # special flag indicating: "match on any value". So even if we were to instantiate
25
+ # multiple Any objects, the point is moot.
26
+ ANY = Any.new
27
+
28
+ class << self
29
+ def process(aggregates, records, resolver = Objectable.resolver)
30
+ Processor.new(aggregates, resolver)
31
+ .add_each(records)
32
+ .results
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/match_reduce/version'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'match_reduce'
7
+ s.version = MatchReduce::VERSION
8
+ s.summary = 'Dataset aggregation and reducer algorithm'
9
+
10
+ s.description = <<-DESCRIPTION
11
+ High speed data aggregator and reducer algorithm based on key-value exact matching and grouping.
12
+ DESCRIPTION
13
+
14
+ s.authors = ['Matthew Ruggio']
15
+ s.email = ['mruggio@bluemarblepayroll.com']
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
19
+ s.homepage = 'https://github.com/bluemarblepayroll/match_reduce'
20
+ s.license = 'MIT'
21
+
22
+ s.required_ruby_version = '>= 2.3.8'
23
+
24
+ s.add_dependency('acts_as_hashable', '~>1', '>=1.1.0')
25
+ s.add_dependency('hash_math', '>=1.0.0-alpha')
26
+ s.add_dependency('objectable', '~>1')
27
+
28
+ s.add_development_dependency('guard-rspec', '~>4.7')
29
+ s.add_development_dependency('pry', '~>0')
30
+ s.add_development_dependency('rake', '~> 12')
31
+ s.add_development_dependency('rspec')
32
+ s.add_development_dependency('rubocop', '~>0.74.0')
33
+ s.add_development_dependency('simplecov', '~>0.17.0')
34
+ s.add_development_dependency('simplecov-console', '~>0.5.0')
35
+ end
@@ -0,0 +1,68 @@
1
+ records:
2
+ - { a: a1, b: b1, c: c1, d: d1, e: e1, val: 1 }
3
+ - { a: a2, b: b2, c: c2, d: d2, e: e2, val: 2 }
4
+ - { a: a3, b: b3, c: c3, d: d3, e: e3, val: 3 }
5
+
6
+ aggregates:
7
+ - name: all (no patterns)
8
+ sum_reducer_key: val
9
+ patterns:
10
+ - name: all (match all patterns)
11
+ sum_reducer_key: val
12
+ patterns:
13
+ - a: a1
14
+ - b: b2
15
+ - c: c3
16
+ - name: match 1
17
+ sum_reducer_key: val
18
+ patterns:
19
+ - a: a1
20
+ d: d1
21
+ - z: z2
22
+ a: a2
23
+ - name: match 1 and 3
24
+ sum_reducer_key: val
25
+ patterns:
26
+ - a: a1
27
+ d: d1
28
+ - z: z2
29
+ a: a2
30
+ - a: a3
31
+ c: c3
32
+ - name: no matches
33
+ sum_reducer_key: val
34
+ patterns:
35
+ - a: a1
36
+ b: b2
37
+ - a: a8
38
+ b: a8
39
+ - z: z1
40
+ - A: a1
41
+ - a: A1
42
+ - a: 1
43
+
44
+ results:
45
+ - name: all (no patterns)
46
+ value: 6
47
+ records:
48
+ - { a: a1, b: b1, c: c1, d: d1, e: e1, val: 1 }
49
+ - { a: a2, b: b2, c: c2, d: d2, e: e2, val: 2 }
50
+ - { a: a3, b: b3, c: c3, d: d3, e: e3, val: 3 }
51
+ - name: all (match all patterns)
52
+ value: 6
53
+ records:
54
+ - { a: a1, b: b1, c: c1, d: d1, e: e1, val: 1 }
55
+ - { a: a2, b: b2, c: c2, d: d2, e: e2, val: 2 }
56
+ - { a: a3, b: b3, c: c3, d: d3, e: e3, val: 3 }
57
+ - name: match 1
58
+ value: 1
59
+ records:
60
+ - { a: a1, b: b1, c: c1, d: d1, e: e1, val: 1 }
61
+ - name: match 1 and 3
62
+ value: 4
63
+ records:
64
+ - { a: a1, b: b1, c: c1, d: d1, e: e1, val: 1 }
65
+ - { a: a3, b: b3, c: c3, d: d3, e: e3, val: 3 }
66
+ - name: no matches
67
+ value:
68
+ records: []
@@ -0,0 +1,173 @@
1
+ records:
2
+ - team: bulls
3
+ team_points: 1000
4
+ player: michael_jordan
5
+ player_points: 750
6
+
7
+ - team: bulls
8
+ team_points: 1000
9
+ player: scottie_pippen
10
+ player_points: 150
11
+
12
+ - team: wizards
13
+ team_points: 800
14
+ player: michael_jordan
15
+ player_points: 600
16
+
17
+ - team: wizards
18
+ team_points: 800
19
+ player: rip_hamilton
20
+ player_points: 200
21
+
22
+ - team: pistons
23
+ team_points: 1200
24
+ player: rip_hamilton
25
+ player_points: 300
26
+
27
+ - team: bulls
28
+ team_points: 1000
29
+ player: rip_hamilton
30
+ player_points: 100
31
+
32
+ aggregates:
33
+ - name: total_points_by_team_player
34
+ patterns:
35
+ sum_reducer_key: player_points
36
+
37
+ - name: total_points_by_team
38
+ patterns:
39
+ sum_reducer_key: team_points
40
+ group_keys: team
41
+
42
+ - name: bulls_points_by_team
43
+ patterns:
44
+ team: bulls
45
+ sum_reducer_key: team_points
46
+ group_keys: team
47
+
48
+ - name: wizards_points_by_team
49
+ patterns:
50
+ team: wizards
51
+ sum_reducer_key: team_points
52
+ group_keys: team
53
+
54
+ - name: jordan_points_for_bulls
55
+ patterns:
56
+ team: bulls
57
+ player: michael_jordan
58
+ sum_reducer_key: player_points
59
+
60
+ - name: all_jordan_points
61
+ patterns:
62
+ player: michael_jordan
63
+ sum_reducer_key: player_points
64
+
65
+ - name: rip_points_for_detroit_and_wizards
66
+ patterns:
67
+ - player: rip_hamilton
68
+ team: wizards
69
+ - player: rip_hamilton
70
+ team: pistons
71
+ sum_reducer_key: player_points
72
+
73
+ results:
74
+ - name: total_points_by_team_player
75
+ records:
76
+ - team: bulls
77
+ team_points: 1000
78
+ player: michael_jordan
79
+ player_points: 750
80
+
81
+ - team: bulls
82
+ team_points: 1000
83
+ player: scottie_pippen
84
+ player_points: 150
85
+
86
+ - team: wizards
87
+ team_points: 800
88
+ player: michael_jordan
89
+ player_points: 600
90
+
91
+ - team: wizards
92
+ team_points: 800
93
+ player: rip_hamilton
94
+ player_points: 200
95
+
96
+ - team: pistons
97
+ team_points: 1200
98
+ player: rip_hamilton
99
+ player_points: 300
100
+
101
+ - team: bulls
102
+ team_points: 1000
103
+ player: rip_hamilton
104
+ player_points: 100
105
+ value: 2100
106
+
107
+ - name: total_points_by_team
108
+ records:
109
+ - team: bulls
110
+ team_points: 1000
111
+ player: michael_jordan
112
+ player_points: 750
113
+
114
+ - team: wizards
115
+ team_points: 800
116
+ player: michael_jordan
117
+ player_points: 600
118
+
119
+ - team: pistons
120
+ team_points: 1200
121
+ player: rip_hamilton
122
+ player_points: 300
123
+ value: 3000
124
+
125
+ - name: bulls_points_by_team
126
+ records:
127
+ - team: bulls
128
+ team_points: 1000
129
+ player: michael_jordan
130
+ player_points: 750
131
+ value: 1000
132
+
133
+ - name: wizards_points_by_team
134
+ records:
135
+ - team: wizards
136
+ team_points: 800
137
+ player: michael_jordan
138
+ player_points: 600
139
+ value: 800
140
+
141
+ - name: jordan_points_for_bulls
142
+ records:
143
+ - team: bulls
144
+ team_points: 1000
145
+ player: michael_jordan
146
+ player_points: 750
147
+ value: 750
148
+
149
+ - name: all_jordan_points
150
+ records:
151
+ - team: bulls
152
+ team_points: 1000
153
+ player: michael_jordan
154
+ player_points: 750
155
+
156
+ - team: wizards
157
+ team_points: 800
158
+ player: michael_jordan
159
+ player_points: 600
160
+ value: 1350
161
+
162
+ - name: rip_points_for_detroit_and_wizards
163
+ records:
164
+ - team: wizards
165
+ team_points: 800
166
+ player: rip_hamilton
167
+ player_points: 200
168
+
169
+ - team: pistons
170
+ team_points: 1200
171
+ player: rip_hamilton
172
+ player_points: 300
173
+ value: 500
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require 'spec_helper'
11
+
12
+ describe MatchReduce::Aggregate do
13
+ describe '#initialize' do
14
+ it 'sets patterns to at least one hash' do
15
+ subject = described_class.new(name: :sig1)
16
+
17
+ expect(subject.patterns).to eq([{}])
18
+ end
19
+
20
+ it 'patterns can be a hash' do
21
+ subject = described_class.new(name: :sig1, patterns: { a: :b })
22
+
23
+ expect(subject.patterns).to eq([{ 'a' => :b }])
24
+ end
25
+
26
+ it 'patterns can be an array of hashes' do
27
+ subject = described_class.new(name: :sig1, patterns: [{ a: :b }])
28
+
29
+ expect(subject.patterns).to eq([{ 'a' => :b }])
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require 'spec_helper'
11
+
12
+ describe MatchReduce::Any do
13
+ describe '#hash' do
14
+ it 'should be based on an array of the class name and the symbol :any' do
15
+ expect(described_class.new.hash).to eq(['MatchReduce::Any', :any].hash)
16
+ end
17
+ end
18
+
19
+ describe 'equality' do
20
+ specify '#== should always be equal if the classes and hash are the same' do
21
+ expect(described_class.new).to eq(described_class.new)
22
+
23
+ expect(described_class.new).not_to eq(:any)
24
+ end
25
+
26
+ specify '#eql? should always be equal if the classes and hash are the same' do
27
+ expect(described_class.new).to eql(described_class.new)
28
+
29
+ expect(described_class.new).not_to eq(:any)
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require 'spec_helper'
11
+
12
+ describe MatchReduce::Index do
13
+ def lookup_by_name(lookup)
14
+ lookup.each_with_object({}) do |(pattern, aggregates), memo|
15
+ memo[pattern] = aggregates.map(&:name)
16
+ end
17
+ end
18
+
19
+ let(:base_value) { MatchReduce::ANY }
20
+
21
+ describe '#initialization' do
22
+ context 'constructing aggregates' do
23
+ specify 'when all aggregates when empty' do
24
+ subject = described_class.new
25
+
26
+ expect(subject.aggregates).to eq([])
27
+ end
28
+
29
+ specify 'only each first unique aggregate name is kept' do
30
+ aggregates = [
31
+ { name: :sig3 },
32
+ { name: :sig1 },
33
+ { name: 'sig2' },
34
+ { name: :sig2 },
35
+ { name: :sig3 },
36
+ { name: 'sig4' },
37
+ { name: :sig4 },
38
+ { name: :sig5 },
39
+ { name: :sig5 }
40
+ ]
41
+
42
+ subject = described_class.new(aggregates)
43
+
44
+ expect(subject.aggregates.length).to eq(7)
45
+ end
46
+ end
47
+
48
+ context 'constructing lookup' do
49
+ it 'creates lookup with aggregates' do
50
+ subject = described_class.new
51
+
52
+ expected = {}
53
+
54
+ expect(subject.lookup).to eq(expected)
55
+ end
56
+
57
+ it 'creates lookup with aggregates that have no patterns' do
58
+ aggregates = [
59
+ { name: :sig3 },
60
+ { name: :sig1 }
61
+ ]
62
+
63
+ subject = described_class.new(aggregates)
64
+
65
+ expected = {
66
+ {} => %i[sig3 sig1]
67
+ }
68
+
69
+ expect(lookup_by_name(subject.lookup)).to eq(expected)
70
+ end
71
+
72
+ it 'creates lookup with aggregates that have patterns and no patterns' do
73
+ aggregates = [
74
+ { name: :sig3 },
75
+ { name: :sig1 },
76
+ {
77
+ name: :sig2,
78
+ patterns: { 'a' => '1', 'b' => [nil], 'c' => :c }
79
+ }
80
+ ]
81
+
82
+ subject = described_class.new(aggregates)
83
+
84
+ expected = {
85
+ { 'a' => base_value, 'b' => base_value, 'c' => base_value } => %i[sig3 sig1],
86
+ { 'a' => '1', 'b' => [nil], 'c' => :c } => %i[sig2]
87
+ }
88
+
89
+ expect(lookup_by_name(subject.lookup)).to eq(expected)
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require 'spec_helper'
11
+
12
+ describe MatchReduce::Processor::Result do
13
+ let(:config) do
14
+ {
15
+ name: 'abc',
16
+ records: [
17
+ { something: :else }
18
+ ],
19
+ value: 123
20
+ }
21
+ end
22
+
23
+ let(:args) { config.values }
24
+
25
+ describe 'equality' do
26
+ let(:object_a) { described_class.new(*args) }
27
+
28
+ let(:object_b) { described_class.new(*args) }
29
+
30
+ specify '#== compares class type and attributes' do
31
+ expect(object_a).to eq(object_b)
32
+ end
33
+
34
+ specify '#== compares class type and attributes' do
35
+ expect(object_a).to eql(object_b)
36
+ end
37
+ end
38
+
39
+ specify '#hash is a computed hash of attributes' do
40
+ subject = described_class.new(*args)
41
+
42
+ expected = [
43
+ config[:name], config[:records], config[:value]
44
+ ].hash
45
+
46
+ expect(subject.hash).to eq(expected)
47
+ end
48
+ end