bmg 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +2 -0
- data/LICENSE.md +22 -0
- data/README.md +3 -0
- data/Rakefile +11 -0
- data/lib/bmg.rb +18 -0
- data/lib/bmg/operator.rb +14 -0
- data/lib/bmg/operator/allbut.rb +44 -0
- data/lib/bmg/operator/autosummarize.rb +190 -0
- data/lib/bmg/operator/autowrap.rb +109 -0
- data/lib/bmg/operator/project.rb +43 -0
- data/lib/bmg/operator/rename.rb +45 -0
- data/lib/bmg/reader.rb +11 -0
- data/lib/bmg/reader/csv.rb +56 -0
- data/lib/bmg/reader/excel.rb +35 -0
- data/lib/bmg/relation.rb +34 -0
- data/lib/bmg/version.rb +8 -0
- data/spec/unit/operator/test_allbut.rb +18 -0
- data/spec/unit/operator/test_autosummarize.rb +183 -0
- data/spec/unit/operator/test_autowrap.rb +88 -0
- data/spec/unit/operator/test_project.rb +18 -0
- data/spec/unit/operator/test_rename.rb +13 -0
- data/spec/unit/reader/example.csv +3 -0
- data/spec/unit/reader/example.numbers +0 -0
- data/spec/unit/reader/example.xlsx +0 -0
- data/spec/unit/reader/test_csv.rb +17 -0
- data/spec/unit/reader/test_excel.rb +17 -0
- data/spec/unit/spec_helper.rb +11 -0
- data/spec/unit/test_relation.rb +132 -0
- data/tasks/gem.rake +39 -0
- data/tasks/test.rake +17 -0
- metadata +130 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 9e90b71aeb478997a7f9efcea3a18cec7ad3fac3
|
4
|
+
data.tar.gz: 9ebdd9b06a635ef12cef5d7c35d4746a56932bba
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a662b415f76df21dfad0d4e4b6f7c9dece45c994a7200c4d547f21007fd8158812a0e8c0fb263f0e4d3da597d334e03ed8394a28b052927f7f58081bf65ee392
|
7
|
+
data.tar.gz: 520d850977384a3cff1aa7e8fe58f52ea34e9f7f8caa0d9ec1f4849a92bab50814bbdaeb4ad8b0cd551ab8b3424d1d57f42fbab51bbe22fa7f6824e5aa610159
|
data/Gemfile
ADDED
data/LICENSE.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# The MIT Licence
|
2
|
+
|
3
|
+
Copyright (c) 2017 - Enspirit SPRL (Bernard Lambeau)
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
data/Rakefile
ADDED
data/lib/bmg.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'path'
|
2
|
+
module Bmg
|
3
|
+
|
4
|
+
def csv(path, options = {})
|
5
|
+
Relation.new Reader::Csv.new path, options
|
6
|
+
end
|
7
|
+
module_function :csv
|
8
|
+
|
9
|
+
def excel(path, options = {})
|
10
|
+
Relation.new Reader::Excel.new path, options
|
11
|
+
end
|
12
|
+
module_function :excel
|
13
|
+
|
14
|
+
end
|
15
|
+
require_relative 'bmg/version'
|
16
|
+
require_relative 'bmg/operator'
|
17
|
+
require_relative 'bmg/relation'
|
18
|
+
require_relative 'bmg/reader'
|
data/lib/bmg/operator.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Operator
|
3
|
+
|
4
|
+
def to_a
|
5
|
+
to_enum(:each).to_a
|
6
|
+
end
|
7
|
+
|
8
|
+
end
|
9
|
+
end
|
10
|
+
require_relative 'operator/allbut'
|
11
|
+
require_relative 'operator/autosummarize'
|
12
|
+
require_relative 'operator/autowrap'
|
13
|
+
require_relative 'operator/project'
|
14
|
+
require_relative 'operator/rename'
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Operator
|
3
|
+
#
|
4
|
+
# Allbut operator.
|
5
|
+
#
|
6
|
+
# Projects operand's tuples on all but given attributes, that is,
|
7
|
+
# removes attributes in the list. The operator takes care of removing
|
8
|
+
# duplicates.
|
9
|
+
#
|
10
|
+
# Example:
|
11
|
+
#
|
12
|
+
# [{ a: 1, b: 2 }] allbut [:b] => [{ a: 1 }]
|
13
|
+
#
|
14
|
+
# All attributes in the butlist SHOULD be existing attributes of the
|
15
|
+
# input tuples.
|
16
|
+
#
|
17
|
+
class Allbut
|
18
|
+
include Operator
|
19
|
+
|
20
|
+
def initialize(operand, butlist)
|
21
|
+
@operand = operand
|
22
|
+
@butlist = butlist
|
23
|
+
end
|
24
|
+
|
25
|
+
def each
|
26
|
+
seen = {}
|
27
|
+
@operand.each do |tuple|
|
28
|
+
allbuted = allbut(tuple)
|
29
|
+
unless seen.has_key?(allbuted)
|
30
|
+
yield(allbuted)
|
31
|
+
seen[allbuted] = true
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def allbut(tuple)
|
39
|
+
tuple.delete_if{|k,_| @butlist.include?(k) }
|
40
|
+
end
|
41
|
+
|
42
|
+
end # class Allbut
|
43
|
+
end # module Operator
|
44
|
+
end # module Bmg
|
@@ -0,0 +1,190 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Operator
|
3
|
+
#
|
4
|
+
# Autosummarize operator.
|
5
|
+
#
|
6
|
+
# Autosummarize helps structuring the results of a big flat join.
|
7
|
+
#
|
8
|
+
# This operator is still largely experimental and should be used with
|
9
|
+
# care...
|
10
|
+
#
|
11
|
+
class Autosummarize
|
12
|
+
include Operator
|
13
|
+
|
14
|
+
def initialize(operand, by, sums)
|
15
|
+
@operand = operand
|
16
|
+
@by = by
|
17
|
+
@sums = sums.each_with_object({}){|(k,v),h| h[k] = to_summarizer(v) }
|
18
|
+
end
|
19
|
+
|
20
|
+
def each(&bl)
|
21
|
+
h = {}
|
22
|
+
@operand.each do |tuple|
|
23
|
+
key = key(tuple)
|
24
|
+
h[key] ||= init(key, tuple)
|
25
|
+
h[key] = sum(h[key], tuple)
|
26
|
+
end
|
27
|
+
h.each_pair do |k,v|
|
28
|
+
h[k] = term(v)
|
29
|
+
end
|
30
|
+
h.values.each(&bl)
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
# Returns the tuple determinant.
|
36
|
+
def key(tuple)
|
37
|
+
@by.map{|by| tuple[by] }
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns the initial tuple to use for a given determinant.
|
41
|
+
def init(key, tuple)
|
42
|
+
tuple.each_with_object({}){|(k,v),h|
|
43
|
+
h.merge!(k => summarizer(k).init(v))
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns the summarizer to use for a given key.
|
48
|
+
def summarizer(k)
|
49
|
+
@sums[k] || Same.new
|
50
|
+
end
|
51
|
+
|
52
|
+
# Sums `tuple` on `memo`, returning the new tuple to use as memo.
|
53
|
+
def sum(memo, tuple)
|
54
|
+
tuple.each_with_object(memo.dup){|(k,v),h|
|
55
|
+
h.merge!(k => summarizer(k).sum(h[k], v))
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
# Terminates the summarization of a given tuple.
|
60
|
+
def term(tuple)
|
61
|
+
tuple.each_with_object({}){|(k,v),h|
|
62
|
+
h.merge!(k => summarizer(k).term(v))
|
63
|
+
}
|
64
|
+
end
|
65
|
+
|
66
|
+
def to_summarizer(x)
|
67
|
+
case x
|
68
|
+
when :same then Same.new
|
69
|
+
when :group then DistinctList.new
|
70
|
+
else
|
71
|
+
x
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
#
|
76
|
+
# Summarizes by enforcing that the same dependent is observed for a given
|
77
|
+
# determinant, returning the dependent as summarization.
|
78
|
+
#
|
79
|
+
class Same
|
80
|
+
|
81
|
+
def init(v)
|
82
|
+
v
|
83
|
+
end
|
84
|
+
|
85
|
+
def sum(v1, v2)
|
86
|
+
raise "Same values expected, got `#{v1}` vs. `#{v2}`" unless v1 == v2
|
87
|
+
v1
|
88
|
+
end
|
89
|
+
|
90
|
+
def term(v)
|
91
|
+
v
|
92
|
+
end
|
93
|
+
|
94
|
+
end # class Same
|
95
|
+
|
96
|
+
#
|
97
|
+
# Summarizes by putting distinct dependents inside an Array, ignoring nils,
|
98
|
+
# and optionally sorting the array.
|
99
|
+
#
|
100
|
+
class DistinctList
|
101
|
+
|
102
|
+
def initialize(&sorter)
|
103
|
+
@sorter = sorter
|
104
|
+
end
|
105
|
+
|
106
|
+
def init(v)
|
107
|
+
Set.new v.nil? ? [] : [v]
|
108
|
+
end
|
109
|
+
|
110
|
+
def sum(v1, v2)
|
111
|
+
v1 << v2 unless v2.nil?
|
112
|
+
v1
|
113
|
+
end
|
114
|
+
|
115
|
+
def term(v)
|
116
|
+
v = v.to_a
|
117
|
+
v = v.sort(&@sorter) if @sorter
|
118
|
+
v
|
119
|
+
end
|
120
|
+
|
121
|
+
end # class DistinctList
|
122
|
+
|
123
|
+
#
|
124
|
+
# Summarizes by converting dependents to { x => y, ... } such that `x` is not
|
125
|
+
# null and `y` is the value observed for `x`.
|
126
|
+
#
|
127
|
+
class YByX
|
128
|
+
|
129
|
+
def initialize(y, x, preserve_nulls = false)
|
130
|
+
@y = y
|
131
|
+
@x = x
|
132
|
+
@preserve_nulls = preserve_nulls
|
133
|
+
end
|
134
|
+
|
135
|
+
def init(v)
|
136
|
+
[v]
|
137
|
+
end
|
138
|
+
|
139
|
+
def sum(v1, v2)
|
140
|
+
v1 << v2
|
141
|
+
end
|
142
|
+
|
143
|
+
def term(v)
|
144
|
+
h = {}
|
145
|
+
v.each do |tuple|
|
146
|
+
next if tuple[@x].nil?
|
147
|
+
h[tuple[@x]] = tuple[@y] if not tuple[@y].nil? or @preserve_nulls
|
148
|
+
end
|
149
|
+
h
|
150
|
+
end
|
151
|
+
|
152
|
+
end # class YByX
|
153
|
+
|
154
|
+
#
|
155
|
+
# Summarizes by converting dependents to { x => [ys], ... } such that `x` is not
|
156
|
+
# null and `[ys]` is a distinct list of observed non-null `y`.
|
157
|
+
#
|
158
|
+
class YsByX
|
159
|
+
|
160
|
+
def initialize(y, x, &sorter)
|
161
|
+
@y = y
|
162
|
+
@x = x
|
163
|
+
@sorter = sorter
|
164
|
+
end
|
165
|
+
|
166
|
+
def init(v)
|
167
|
+
[v]
|
168
|
+
end
|
169
|
+
|
170
|
+
def sum(v1, v2)
|
171
|
+
v1 << v2
|
172
|
+
end
|
173
|
+
|
174
|
+
def term(v)
|
175
|
+
h = {}
|
176
|
+
v = v.reject{|tuple| tuple[@x].nil? }
|
177
|
+
v = v.sort(&@sorter) if @sorter
|
178
|
+
v.each do |tuple|
|
179
|
+
h[tuple[@x]] ||= []
|
180
|
+
h[tuple[@x]] << tuple[@y]
|
181
|
+
h[tuple[@x]].uniq!
|
182
|
+
end
|
183
|
+
h
|
184
|
+
end
|
185
|
+
|
186
|
+
end # class YsByX
|
187
|
+
|
188
|
+
end # class Autosummarize
|
189
|
+
end # module Operator
|
190
|
+
end # module Bmg
|
@@ -0,0 +1,109 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Operator
|
3
|
+
#
|
4
|
+
# Autowrap operator.
|
5
|
+
#
|
6
|
+
# Autowrap can be used to structure tuples ala Tutorial D' wrap, but it works
|
7
|
+
# with conventions instead of explicit wrapping, and supports multiple levels
|
8
|
+
# or wrapping.
|
9
|
+
#
|
10
|
+
# Examples:
|
11
|
+
#
|
12
|
+
# [{ a: 1, b_x: 2, b_y: 3 }] => [{ a: 1, b: { x: 2, y: 3 } }]
|
13
|
+
# [{ a: 1, b_x_y: 2, b_x_z: 3 }] => [{ a: 1, b: { x: { y: 2, z: 3 } } }]
|
14
|
+
#
|
15
|
+
# Autowrap supports the following options:
|
16
|
+
#
|
17
|
+
# - `postprocessor: :nil|:none|:delete|Hash|Proc` see NoLeftJoinNoise
|
18
|
+
# - `split: String` the seperator to use to split keys, defaults to `_`
|
19
|
+
#
|
20
|
+
class Autowrap
|
21
|
+
include Operator
|
22
|
+
|
23
|
+
DEFAULT_OPTIONS = {
|
24
|
+
:postprocessor => :none,
|
25
|
+
:split => "_"
|
26
|
+
}
|
27
|
+
|
28
|
+
def initialize(operand, options = {})
|
29
|
+
@operand = operand
|
30
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
31
|
+
@options[:postprocessor] = NoLeftJoinNoise.new(@options[:postprocessor])
|
32
|
+
end
|
33
|
+
|
34
|
+
def each
|
35
|
+
@operand.each do |tuple|
|
36
|
+
yield autowrap(tuple)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def autowrap(tuple)
|
43
|
+
separator = @options[:split]
|
44
|
+
autowrapped = tuple.each_with_object({}){|(k,v),h|
|
45
|
+
parts = k.to_s.split(separator).map(&:to_sym)
|
46
|
+
sub = h
|
47
|
+
parts[0...-1].each do |part|
|
48
|
+
sub = (sub[part] ||= {})
|
49
|
+
end
|
50
|
+
sub[parts[-1]] = v
|
51
|
+
h
|
52
|
+
}
|
53
|
+
autowrapped = postprocessor.call(autowrapped)
|
54
|
+
autowrapped
|
55
|
+
end
|
56
|
+
|
57
|
+
def postprocessor
|
58
|
+
@options[:postprocessor]
|
59
|
+
end
|
60
|
+
|
61
|
+
#
|
62
|
+
# Removes the noise generated by left joins that were not join.
|
63
|
+
#
|
64
|
+
# i.e. x is removed in { x: { id: nil, name: nil, ... } }
|
65
|
+
#
|
66
|
+
# Supported heuristics are:
|
67
|
+
#
|
68
|
+
# - nil: { x: { id: nil, name: nil, ... } } => { x: nil }
|
69
|
+
# - delete: { x: { id: nil, name: nil, ... } } => { }
|
70
|
+
# - none: { x: { id: nil, name: nil, ... } } => { x: { id: nil, name: nil, ... } }
|
71
|
+
# - a Hash, specifying a specific heuristic by tuple attribute
|
72
|
+
# - a Proc, `->(tuple,key){ ... }` that affects the tuple manually
|
73
|
+
#
|
74
|
+
class NoLeftJoinNoise
|
75
|
+
|
76
|
+
REMOVERS = {
|
77
|
+
nil: ->(t,k){ t[k] = nil },
|
78
|
+
delete: ->(t,k){ t.delete(k) },
|
79
|
+
none: ->(t,k){ t }
|
80
|
+
}
|
81
|
+
|
82
|
+
def initialize(remover)
|
83
|
+
@remover = case remover
|
84
|
+
when NilClass then REMOVERS[:none]
|
85
|
+
when Proc then remover
|
86
|
+
when Symbol then REMOVERS[remover]
|
87
|
+
when Hash then ->(t,k){ REMOVERS[remover[k] || :none].call(t,k) }
|
88
|
+
else
|
89
|
+
raise "Invalid remover `#{remover}`"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def call(tuple)
|
94
|
+
tuple.each_key do |k|
|
95
|
+
@remover.call(tuple, k) if tuple[k].is_a?(Hash) && all_nil?(tuple[k])
|
96
|
+
end
|
97
|
+
tuple
|
98
|
+
end
|
99
|
+
|
100
|
+
def all_nil?(tuple)
|
101
|
+
return false unless tuple.is_a?(Hash)
|
102
|
+
tuple.all?{|(k,v)| v.nil? || all_nil?(tuple[k]) }
|
103
|
+
end
|
104
|
+
|
105
|
+
end # NoLeftJoinNoise
|
106
|
+
|
107
|
+
end # class Autowrap
|
108
|
+
end # module Operator
|
109
|
+
end # module Bmg
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Operator
|
3
|
+
#
|
4
|
+
# Project operator.
|
5
|
+
#
|
6
|
+
# Projects operand's tuples on given attributes, that is, keep those attributes
|
7
|
+
# only. The operator takes care of removing duplicates.
|
8
|
+
#
|
9
|
+
# Example:
|
10
|
+
#
|
11
|
+
# [{ a: 1, b: 2 }] project [:b] => [{ b: 2 }]
|
12
|
+
#
|
13
|
+
# All attributes in the attrlist SHOULD be existing attributes of the
|
14
|
+
# input tuples.
|
15
|
+
#
|
16
|
+
class Project
|
17
|
+
include Operator
|
18
|
+
|
19
|
+
def initialize(operand, attrlist)
|
20
|
+
@operand = operand
|
21
|
+
@attrlist = attrlist
|
22
|
+
end
|
23
|
+
|
24
|
+
def each
|
25
|
+
seen = {}
|
26
|
+
@operand.each do |tuple|
|
27
|
+
projected = project(tuple)
|
28
|
+
unless seen.has_key?(projected)
|
29
|
+
yield(projected)
|
30
|
+
seen[projected] = true
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def project(tuple)
|
38
|
+
tuple.delete_if{|k,_| !@attrlist.include?(k) }
|
39
|
+
end
|
40
|
+
|
41
|
+
end # class Project
|
42
|
+
end # module Operator
|
43
|
+
end # module Bmg
|