bmg 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +2 -0
- data/LICENSE.md +22 -0
- data/README.md +3 -0
- data/Rakefile +11 -0
- data/lib/bmg.rb +18 -0
- data/lib/bmg/operator.rb +14 -0
- data/lib/bmg/operator/allbut.rb +44 -0
- data/lib/bmg/operator/autosummarize.rb +190 -0
- data/lib/bmg/operator/autowrap.rb +109 -0
- data/lib/bmg/operator/project.rb +43 -0
- data/lib/bmg/operator/rename.rb +45 -0
- data/lib/bmg/reader.rb +11 -0
- data/lib/bmg/reader/csv.rb +56 -0
- data/lib/bmg/reader/excel.rb +35 -0
- data/lib/bmg/relation.rb +34 -0
- data/lib/bmg/version.rb +8 -0
- data/spec/unit/operator/test_allbut.rb +18 -0
- data/spec/unit/operator/test_autosummarize.rb +183 -0
- data/spec/unit/operator/test_autowrap.rb +88 -0
- data/spec/unit/operator/test_project.rb +18 -0
- data/spec/unit/operator/test_rename.rb +13 -0
- data/spec/unit/reader/example.csv +3 -0
- data/spec/unit/reader/example.numbers +0 -0
- data/spec/unit/reader/example.xlsx +0 -0
- data/spec/unit/reader/test_csv.rb +17 -0
- data/spec/unit/reader/test_excel.rb +17 -0
- data/spec/unit/spec_helper.rb +11 -0
- data/spec/unit/test_relation.rb +132 -0
- data/tasks/gem.rake +39 -0
- data/tasks/test.rake +17 -0
- metadata +130 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 9e90b71aeb478997a7f9efcea3a18cec7ad3fac3
|
4
|
+
data.tar.gz: 9ebdd9b06a635ef12cef5d7c35d4746a56932bba
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a662b415f76df21dfad0d4e4b6f7c9dece45c994a7200c4d547f21007fd8158812a0e8c0fb263f0e4d3da597d334e03ed8394a28b052927f7f58081bf65ee392
|
7
|
+
data.tar.gz: 520d850977384a3cff1aa7e8fe58f52ea34e9f7f8caa0d9ec1f4849a92bab50814bbdaeb4ad8b0cd551ab8b3424d1d57f42fbab51bbe22fa7f6824e5aa610159
|
data/Gemfile
ADDED
data/LICENSE.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# The MIT Licence
|
2
|
+
|
3
|
+
Copyright (c) 2017 - Enspirit SPRL (Bernard Lambeau)
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
data/Rakefile
ADDED
data/lib/bmg.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'path'
|
2
|
+
module Bmg
|
3
|
+
|
4
|
+
def csv(path, options = {})
|
5
|
+
Relation.new Reader::Csv.new path, options
|
6
|
+
end
|
7
|
+
module_function :csv
|
8
|
+
|
9
|
+
def excel(path, options = {})
|
10
|
+
Relation.new Reader::Excel.new path, options
|
11
|
+
end
|
12
|
+
module_function :excel
|
13
|
+
|
14
|
+
end
|
15
|
+
require_relative 'bmg/version'
|
16
|
+
require_relative 'bmg/operator'
|
17
|
+
require_relative 'bmg/relation'
|
18
|
+
require_relative 'bmg/reader'
|
data/lib/bmg/operator.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Operator
|
3
|
+
|
4
|
+
def to_a
|
5
|
+
to_enum(:each).to_a
|
6
|
+
end
|
7
|
+
|
8
|
+
end
|
9
|
+
end
|
10
|
+
require_relative 'operator/allbut'
|
11
|
+
require_relative 'operator/autosummarize'
|
12
|
+
require_relative 'operator/autowrap'
|
13
|
+
require_relative 'operator/project'
|
14
|
+
require_relative 'operator/rename'
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Operator
|
3
|
+
#
|
4
|
+
# Allbut operator.
|
5
|
+
#
|
6
|
+
# Projects operand's tuples on all but given attributes, that is,
|
7
|
+
# removes attributes in the list. The operator takes care of removing
|
8
|
+
# duplicates.
|
9
|
+
#
|
10
|
+
# Example:
|
11
|
+
#
|
12
|
+
# [{ a: 1, b: 2 }] allbut [:b] => [{ a: 1 }]
|
13
|
+
#
|
14
|
+
# All attributes in the butlist SHOULD be existing attributes of the
|
15
|
+
# input tuples.
|
16
|
+
#
|
17
|
+
class Allbut
|
18
|
+
include Operator
|
19
|
+
|
20
|
+
def initialize(operand, butlist)
|
21
|
+
@operand = operand
|
22
|
+
@butlist = butlist
|
23
|
+
end
|
24
|
+
|
25
|
+
def each
|
26
|
+
seen = {}
|
27
|
+
@operand.each do |tuple|
|
28
|
+
allbuted = allbut(tuple)
|
29
|
+
unless seen.has_key?(allbuted)
|
30
|
+
yield(allbuted)
|
31
|
+
seen[allbuted] = true
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def allbut(tuple)
|
39
|
+
tuple.delete_if{|k,_| @butlist.include?(k) }
|
40
|
+
end
|
41
|
+
|
42
|
+
end # class Allbut
|
43
|
+
end # module Operator
|
44
|
+
end # module Bmg
|
@@ -0,0 +1,190 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Operator
|
3
|
+
#
|
4
|
+
# Autosummarize operator.
|
5
|
+
#
|
6
|
+
# Autosummarize helps structuring the results of a big flat join.
|
7
|
+
#
|
8
|
+
# This operator is still largely experimental and should be used with
|
9
|
+
# care...
|
10
|
+
#
|
11
|
+
class Autosummarize
|
12
|
+
include Operator
|
13
|
+
|
14
|
+
def initialize(operand, by, sums)
|
15
|
+
@operand = operand
|
16
|
+
@by = by
|
17
|
+
@sums = sums.each_with_object({}){|(k,v),h| h[k] = to_summarizer(v) }
|
18
|
+
end
|
19
|
+
|
20
|
+
def each(&bl)
|
21
|
+
h = {}
|
22
|
+
@operand.each do |tuple|
|
23
|
+
key = key(tuple)
|
24
|
+
h[key] ||= init(key, tuple)
|
25
|
+
h[key] = sum(h[key], tuple)
|
26
|
+
end
|
27
|
+
h.each_pair do |k,v|
|
28
|
+
h[k] = term(v)
|
29
|
+
end
|
30
|
+
h.values.each(&bl)
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
# Returns the tuple determinant.
|
36
|
+
def key(tuple)
|
37
|
+
@by.map{|by| tuple[by] }
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns the initial tuple to use for a given determinant.
|
41
|
+
def init(key, tuple)
|
42
|
+
tuple.each_with_object({}){|(k,v),h|
|
43
|
+
h.merge!(k => summarizer(k).init(v))
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns the summarizer to use for a given key.
|
48
|
+
def summarizer(k)
|
49
|
+
@sums[k] || Same.new
|
50
|
+
end
|
51
|
+
|
52
|
+
# Sums `tuple` on `memo`, returning the new tuple to use as memo.
|
53
|
+
def sum(memo, tuple)
|
54
|
+
tuple.each_with_object(memo.dup){|(k,v),h|
|
55
|
+
h.merge!(k => summarizer(k).sum(h[k], v))
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
# Terminates the summarization of a given tuple.
|
60
|
+
def term(tuple)
|
61
|
+
tuple.each_with_object({}){|(k,v),h|
|
62
|
+
h.merge!(k => summarizer(k).term(v))
|
63
|
+
}
|
64
|
+
end
|
65
|
+
|
66
|
+
def to_summarizer(x)
|
67
|
+
case x
|
68
|
+
when :same then Same.new
|
69
|
+
when :group then DistinctList.new
|
70
|
+
else
|
71
|
+
x
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
#
|
76
|
+
# Summarizes by enforcing that the same dependent is observed for a given
|
77
|
+
# determinant, returning the dependent as summarization.
|
78
|
+
#
|
79
|
+
class Same
|
80
|
+
|
81
|
+
def init(v)
|
82
|
+
v
|
83
|
+
end
|
84
|
+
|
85
|
+
def sum(v1, v2)
|
86
|
+
raise "Same values expected, got `#{v1}` vs. `#{v2}`" unless v1 == v2
|
87
|
+
v1
|
88
|
+
end
|
89
|
+
|
90
|
+
def term(v)
|
91
|
+
v
|
92
|
+
end
|
93
|
+
|
94
|
+
end # class Same
|
95
|
+
|
96
|
+
#
|
97
|
+
# Summarizes by putting distinct dependents inside an Array, ignoring nils,
|
98
|
+
# and optionally sorting the array.
|
99
|
+
#
|
100
|
+
class DistinctList
|
101
|
+
|
102
|
+
def initialize(&sorter)
|
103
|
+
@sorter = sorter
|
104
|
+
end
|
105
|
+
|
106
|
+
def init(v)
|
107
|
+
Set.new v.nil? ? [] : [v]
|
108
|
+
end
|
109
|
+
|
110
|
+
def sum(v1, v2)
|
111
|
+
v1 << v2 unless v2.nil?
|
112
|
+
v1
|
113
|
+
end
|
114
|
+
|
115
|
+
def term(v)
|
116
|
+
v = v.to_a
|
117
|
+
v = v.sort(&@sorter) if @sorter
|
118
|
+
v
|
119
|
+
end
|
120
|
+
|
121
|
+
end # class DistinctList
|
122
|
+
|
123
|
+
#
|
124
|
+
# Summarizes by converting dependents to { x => y, ... } such that `x` is not
|
125
|
+
# null and `y` is the value observed for `x`.
|
126
|
+
#
|
127
|
+
class YByX
|
128
|
+
|
129
|
+
def initialize(y, x, preserve_nulls = false)
|
130
|
+
@y = y
|
131
|
+
@x = x
|
132
|
+
@preserve_nulls = preserve_nulls
|
133
|
+
end
|
134
|
+
|
135
|
+
def init(v)
|
136
|
+
[v]
|
137
|
+
end
|
138
|
+
|
139
|
+
def sum(v1, v2)
|
140
|
+
v1 << v2
|
141
|
+
end
|
142
|
+
|
143
|
+
def term(v)
|
144
|
+
h = {}
|
145
|
+
v.each do |tuple|
|
146
|
+
next if tuple[@x].nil?
|
147
|
+
h[tuple[@x]] = tuple[@y] if not tuple[@y].nil? or @preserve_nulls
|
148
|
+
end
|
149
|
+
h
|
150
|
+
end
|
151
|
+
|
152
|
+
end # class YByX
|
153
|
+
|
154
|
+
#
|
155
|
+
# Summarizes by converting dependents to { x => [ys], ... } such that `x` is not
|
156
|
+
# null and `[ys]` is a distinct list of observed non-null `y`.
|
157
|
+
#
|
158
|
+
class YsByX
|
159
|
+
|
160
|
+
def initialize(y, x, &sorter)
|
161
|
+
@y = y
|
162
|
+
@x = x
|
163
|
+
@sorter = sorter
|
164
|
+
end
|
165
|
+
|
166
|
+
def init(v)
|
167
|
+
[v]
|
168
|
+
end
|
169
|
+
|
170
|
+
def sum(v1, v2)
|
171
|
+
v1 << v2
|
172
|
+
end
|
173
|
+
|
174
|
+
def term(v)
|
175
|
+
h = {}
|
176
|
+
v = v.reject{|tuple| tuple[@x].nil? }
|
177
|
+
v = v.sort(&@sorter) if @sorter
|
178
|
+
v.each do |tuple|
|
179
|
+
h[tuple[@x]] ||= []
|
180
|
+
h[tuple[@x]] << tuple[@y]
|
181
|
+
h[tuple[@x]].uniq!
|
182
|
+
end
|
183
|
+
h
|
184
|
+
end
|
185
|
+
|
186
|
+
end # class YsByX
|
187
|
+
|
188
|
+
end # class Autosummarize
|
189
|
+
end # module Operator
|
190
|
+
end # module Bmg
|
@@ -0,0 +1,109 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Operator
|
3
|
+
#
|
4
|
+
# Autowrap operator.
|
5
|
+
#
|
6
|
+
# Autowrap can be used to structure tuples ala Tutorial D' wrap, but it works
|
7
|
+
# with conventions instead of explicit wrapping, and supports multiple levels
|
8
|
+
# or wrapping.
|
9
|
+
#
|
10
|
+
# Examples:
|
11
|
+
#
|
12
|
+
# [{ a: 1, b_x: 2, b_y: 3 }] => [{ a: 1, b: { x: 2, y: 3 } }]
|
13
|
+
# [{ a: 1, b_x_y: 2, b_x_z: 3 }] => [{ a: 1, b: { x: { y: 2, z: 3 } } }]
|
14
|
+
#
|
15
|
+
# Autowrap supports the following options:
|
16
|
+
#
|
17
|
+
# - `postprocessor: :nil|:none|:delete|Hash|Proc` see NoLeftJoinNoise
|
18
|
+
# - `split: String` the seperator to use to split keys, defaults to `_`
|
19
|
+
#
|
20
|
+
class Autowrap
|
21
|
+
include Operator
|
22
|
+
|
23
|
+
DEFAULT_OPTIONS = {
|
24
|
+
:postprocessor => :none,
|
25
|
+
:split => "_"
|
26
|
+
}
|
27
|
+
|
28
|
+
def initialize(operand, options = {})
|
29
|
+
@operand = operand
|
30
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
31
|
+
@options[:postprocessor] = NoLeftJoinNoise.new(@options[:postprocessor])
|
32
|
+
end
|
33
|
+
|
34
|
+
def each
|
35
|
+
@operand.each do |tuple|
|
36
|
+
yield autowrap(tuple)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def autowrap(tuple)
|
43
|
+
separator = @options[:split]
|
44
|
+
autowrapped = tuple.each_with_object({}){|(k,v),h|
|
45
|
+
parts = k.to_s.split(separator).map(&:to_sym)
|
46
|
+
sub = h
|
47
|
+
parts[0...-1].each do |part|
|
48
|
+
sub = (sub[part] ||= {})
|
49
|
+
end
|
50
|
+
sub[parts[-1]] = v
|
51
|
+
h
|
52
|
+
}
|
53
|
+
autowrapped = postprocessor.call(autowrapped)
|
54
|
+
autowrapped
|
55
|
+
end
|
56
|
+
|
57
|
+
def postprocessor
|
58
|
+
@options[:postprocessor]
|
59
|
+
end
|
60
|
+
|
61
|
+
#
|
62
|
+
# Removes the noise generated by left joins that were not join.
|
63
|
+
#
|
64
|
+
# i.e. x is removed in { x: { id: nil, name: nil, ... } }
|
65
|
+
#
|
66
|
+
# Supported heuristics are:
|
67
|
+
#
|
68
|
+
# - nil: { x: { id: nil, name: nil, ... } } => { x: nil }
|
69
|
+
# - delete: { x: { id: nil, name: nil, ... } } => { }
|
70
|
+
# - none: { x: { id: nil, name: nil, ... } } => { x: { id: nil, name: nil, ... } }
|
71
|
+
# - a Hash, specifying a specific heuristic by tuple attribute
|
72
|
+
# - a Proc, `->(tuple,key){ ... }` that affects the tuple manually
|
73
|
+
#
|
74
|
+
class NoLeftJoinNoise
|
75
|
+
|
76
|
+
REMOVERS = {
|
77
|
+
nil: ->(t,k){ t[k] = nil },
|
78
|
+
delete: ->(t,k){ t.delete(k) },
|
79
|
+
none: ->(t,k){ t }
|
80
|
+
}
|
81
|
+
|
82
|
+
def initialize(remover)
|
83
|
+
@remover = case remover
|
84
|
+
when NilClass then REMOVERS[:none]
|
85
|
+
when Proc then remover
|
86
|
+
when Symbol then REMOVERS[remover]
|
87
|
+
when Hash then ->(t,k){ REMOVERS[remover[k] || :none].call(t,k) }
|
88
|
+
else
|
89
|
+
raise "Invalid remover `#{remover}`"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def call(tuple)
|
94
|
+
tuple.each_key do |k|
|
95
|
+
@remover.call(tuple, k) if tuple[k].is_a?(Hash) && all_nil?(tuple[k])
|
96
|
+
end
|
97
|
+
tuple
|
98
|
+
end
|
99
|
+
|
100
|
+
def all_nil?(tuple)
|
101
|
+
return false unless tuple.is_a?(Hash)
|
102
|
+
tuple.all?{|(k,v)| v.nil? || all_nil?(tuple[k]) }
|
103
|
+
end
|
104
|
+
|
105
|
+
end # NoLeftJoinNoise
|
106
|
+
|
107
|
+
end # class Autowrap
|
108
|
+
end # module Operator
|
109
|
+
end # module Bmg
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Bmg
|
2
|
+
module Operator
|
3
|
+
#
|
4
|
+
# Project operator.
|
5
|
+
#
|
6
|
+
# Projects operand's tuples on given attributes, that is, keep those attributes
|
7
|
+
# only. The operator takes care of removing duplicates.
|
8
|
+
#
|
9
|
+
# Example:
|
10
|
+
#
|
11
|
+
# [{ a: 1, b: 2 }] project [:b] => [{ b: 2 }]
|
12
|
+
#
|
13
|
+
# All attributes in the attrlist SHOULD be existing attributes of the
|
14
|
+
# input tuples.
|
15
|
+
#
|
16
|
+
class Project
|
17
|
+
include Operator
|
18
|
+
|
19
|
+
def initialize(operand, attrlist)
|
20
|
+
@operand = operand
|
21
|
+
@attrlist = attrlist
|
22
|
+
end
|
23
|
+
|
24
|
+
def each
|
25
|
+
seen = {}
|
26
|
+
@operand.each do |tuple|
|
27
|
+
projected = project(tuple)
|
28
|
+
unless seen.has_key?(projected)
|
29
|
+
yield(projected)
|
30
|
+
seen[projected] = true
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def project(tuple)
|
38
|
+
tuple.delete_if{|k,_| !@attrlist.include?(k) }
|
39
|
+
end
|
40
|
+
|
41
|
+
end # class Project
|
42
|
+
end # module Operator
|
43
|
+
end # module Bmg
|