bmg 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 9e90b71aeb478997a7f9efcea3a18cec7ad3fac3
4
+ data.tar.gz: 9ebdd9b06a635ef12cef5d7c35d4746a56932bba
5
+ SHA512:
6
+ metadata.gz: a662b415f76df21dfad0d4e4b6f7c9dece45c994a7200c4d547f21007fd8158812a0e8c0fb263f0e4d3da597d334e03ed8394a28b052927f7f58081bf65ee392
7
+ data.tar.gz: 520d850977384a3cff1aa7e8fe58f52ea34e9f7f8caa0d9ec1f4849a92bab50814bbdaeb4ad8b0cd551ab8b3424d1d57f42fbab51bbe22fa7f6824e5aa610159
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source "https://rubygems.org"
2
+ gemspec
data/LICENSE.md ADDED
@@ -0,0 +1,22 @@
1
+ # The MIT Licence
2
+
3
+ Copyright (c) 2017 - Enspirit SPRL (Bernard Lambeau)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,3 @@
1
+ ## Bmg, Alf's successor, the relational algebra!
2
+
3
+ Coming soon.
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ #
2
+ # Install all tasks found in tasks folder
3
+ #
4
+ # See .rake files there for complete documentation.
5
+ #
6
+ Dir["tasks/*.rake"].each do |taskfile|
7
+ load taskfile
8
+ end
9
+
10
+ # We run tests by default
11
+ task :default => :test
data/lib/bmg.rb ADDED
@@ -0,0 +1,18 @@
1
+ require 'path'
2
+ module Bmg
3
+
4
+ def csv(path, options = {})
5
+ Relation.new Reader::Csv.new path, options
6
+ end
7
+ module_function :csv
8
+
9
+ def excel(path, options = {})
10
+ Relation.new Reader::Excel.new path, options
11
+ end
12
+ module_function :excel
13
+
14
+ end
15
+ require_relative 'bmg/version'
16
+ require_relative 'bmg/operator'
17
+ require_relative 'bmg/relation'
18
+ require_relative 'bmg/reader'
@@ -0,0 +1,14 @@
1
+ module Bmg
2
+ module Operator
3
+
4
+ def to_a
5
+ to_enum(:each).to_a
6
+ end
7
+
8
+ end
9
+ end
10
+ require_relative 'operator/allbut'
11
+ require_relative 'operator/autosummarize'
12
+ require_relative 'operator/autowrap'
13
+ require_relative 'operator/project'
14
+ require_relative 'operator/rename'
@@ -0,0 +1,44 @@
1
+ module Bmg
2
+ module Operator
3
+ #
4
+ # Allbut operator.
5
+ #
6
+ # Projects operand's tuples on all but given attributes, that is,
7
+ # removes attributes in the list. The operator takes care of removing
8
+ # duplicates.
9
+ #
10
+ # Example:
11
+ #
12
+ # [{ a: 1, b: 2 }] allbut [:b] => [{ a: 1 }]
13
+ #
14
+ # All attributes in the butlist SHOULD be existing attributes of the
15
+ # input tuples.
16
+ #
17
+ class Allbut
18
+ include Operator
19
+
20
+ def initialize(operand, butlist)
21
+ @operand = operand
22
+ @butlist = butlist
23
+ end
24
+
25
+ def each
26
+ seen = {}
27
+ @operand.each do |tuple|
28
+ allbuted = allbut(tuple)
29
+ unless seen.has_key?(allbuted)
30
+ yield(allbuted)
31
+ seen[allbuted] = true
32
+ end
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ def allbut(tuple)
39
+ tuple.delete_if{|k,_| @butlist.include?(k) }
40
+ end
41
+
42
+ end # class Allbut
43
+ end # module Operator
44
+ end # module Bmg
@@ -0,0 +1,190 @@
1
+ module Bmg
2
+ module Operator
3
+ #
4
+ # Autosummarize operator.
5
+ #
6
+ # Autosummarize helps structuring the results of a big flat join.
7
+ #
8
+ # This operator is still largely experimental and should be used with
9
+ # care...
10
+ #
11
+ class Autosummarize
12
+ include Operator
13
+
14
+ def initialize(operand, by, sums)
15
+ @operand = operand
16
+ @by = by
17
+ @sums = sums.each_with_object({}){|(k,v),h| h[k] = to_summarizer(v) }
18
+ end
19
+
20
+ def each(&bl)
21
+ h = {}
22
+ @operand.each do |tuple|
23
+ key = key(tuple)
24
+ h[key] ||= init(key, tuple)
25
+ h[key] = sum(h[key], tuple)
26
+ end
27
+ h.each_pair do |k,v|
28
+ h[k] = term(v)
29
+ end
30
+ h.values.each(&bl)
31
+ end
32
+
33
+ private
34
+
35
+ # Returns the tuple determinant.
36
+ def key(tuple)
37
+ @by.map{|by| tuple[by] }
38
+ end
39
+
40
+ # Returns the initial tuple to use for a given determinant.
41
+ def init(key, tuple)
42
+ tuple.each_with_object({}){|(k,v),h|
43
+ h.merge!(k => summarizer(k).init(v))
44
+ }
45
+ end
46
+
47
+ # Returns the summarizer to use for a given key.
48
+ def summarizer(k)
49
+ @sums[k] || Same.new
50
+ end
51
+
52
+ # Sums `tuple` on `memo`, returning the new tuple to use as memo.
53
+ def sum(memo, tuple)
54
+ tuple.each_with_object(memo.dup){|(k,v),h|
55
+ h.merge!(k => summarizer(k).sum(h[k], v))
56
+ }
57
+ end
58
+
59
+ # Terminates the summarization of a given tuple.
60
+ def term(tuple)
61
+ tuple.each_with_object({}){|(k,v),h|
62
+ h.merge!(k => summarizer(k).term(v))
63
+ }
64
+ end
65
+
66
+ def to_summarizer(x)
67
+ case x
68
+ when :same then Same.new
69
+ when :group then DistinctList.new
70
+ else
71
+ x
72
+ end
73
+ end
74
+
75
+ #
76
+ # Summarizes by enforcing that the same dependent is observed for a given
77
+ # determinant, returning the dependent as summarization.
78
+ #
79
+ class Same
80
+
81
+ def init(v)
82
+ v
83
+ end
84
+
85
+ def sum(v1, v2)
86
+ raise "Same values expected, got `#{v1}` vs. `#{v2}`" unless v1 == v2
87
+ v1
88
+ end
89
+
90
+ def term(v)
91
+ v
92
+ end
93
+
94
+ end # class Same
95
+
96
+ #
97
+ # Summarizes by putting distinct dependents inside an Array, ignoring nils,
98
+ # and optionally sorting the array.
99
+ #
100
+ class DistinctList
101
+
102
+ def initialize(&sorter)
103
+ @sorter = sorter
104
+ end
105
+
106
+ def init(v)
107
+ Set.new v.nil? ? [] : [v]
108
+ end
109
+
110
+ def sum(v1, v2)
111
+ v1 << v2 unless v2.nil?
112
+ v1
113
+ end
114
+
115
+ def term(v)
116
+ v = v.to_a
117
+ v = v.sort(&@sorter) if @sorter
118
+ v
119
+ end
120
+
121
+ end # class DistinctList
122
+
123
+ #
124
+ # Summarizes by converting dependents to { x => y, ... } such that `x` is not
125
+ # null and `y` is the value observed for `x`.
126
+ #
127
+ class YByX
128
+
129
+ def initialize(y, x, preserve_nulls = false)
130
+ @y = y
131
+ @x = x
132
+ @preserve_nulls = preserve_nulls
133
+ end
134
+
135
+ def init(v)
136
+ [v]
137
+ end
138
+
139
+ def sum(v1, v2)
140
+ v1 << v2
141
+ end
142
+
143
+ def term(v)
144
+ h = {}
145
+ v.each do |tuple|
146
+ next if tuple[@x].nil?
147
+ h[tuple[@x]] = tuple[@y] if not tuple[@y].nil? or @preserve_nulls
148
+ end
149
+ h
150
+ end
151
+
152
+ end # class YByX
153
+
154
+ #
155
+ # Summarizes by converting dependents to { x => [ys], ... } such that `x` is not
156
+ # null and `[ys]` is a distinct list of observed non-null `y`.
157
+ #
158
+ class YsByX
159
+
160
+ def initialize(y, x, &sorter)
161
+ @y = y
162
+ @x = x
163
+ @sorter = sorter
164
+ end
165
+
166
+ def init(v)
167
+ [v]
168
+ end
169
+
170
+ def sum(v1, v2)
171
+ v1 << v2
172
+ end
173
+
174
+ def term(v)
175
+ h = {}
176
+ v = v.reject{|tuple| tuple[@x].nil? }
177
+ v = v.sort(&@sorter) if @sorter
178
+ v.each do |tuple|
179
+ h[tuple[@x]] ||= []
180
+ h[tuple[@x]] << tuple[@y]
181
+ h[tuple[@x]].uniq!
182
+ end
183
+ h
184
+ end
185
+
186
+ end # class YsByX
187
+
188
+ end # class Autosummarize
189
+ end # module Operator
190
+ end # module Bmg
@@ -0,0 +1,109 @@
1
+ module Bmg
2
+ module Operator
3
+ #
4
+ # Autowrap operator.
5
+ #
6
+ # Autowrap can be used to structure tuples ala Tutorial D' wrap, but it works
7
+ # with conventions instead of explicit wrapping, and supports multiple levels
8
+ # or wrapping.
9
+ #
10
+ # Examples:
11
+ #
12
+ # [{ a: 1, b_x: 2, b_y: 3 }] => [{ a: 1, b: { x: 2, y: 3 } }]
13
+ # [{ a: 1, b_x_y: 2, b_x_z: 3 }] => [{ a: 1, b: { x: { y: 2, z: 3 } } }]
14
+ #
15
+ # Autowrap supports the following options:
16
+ #
17
+ # - `postprocessor: :nil|:none|:delete|Hash|Proc` see NoLeftJoinNoise
18
+ # - `split: String` the seperator to use to split keys, defaults to `_`
19
+ #
20
+ class Autowrap
21
+ include Operator
22
+
23
+ DEFAULT_OPTIONS = {
24
+ :postprocessor => :none,
25
+ :split => "_"
26
+ }
27
+
28
+ def initialize(operand, options = {})
29
+ @operand = operand
30
+ @options = DEFAULT_OPTIONS.merge(options)
31
+ @options[:postprocessor] = NoLeftJoinNoise.new(@options[:postprocessor])
32
+ end
33
+
34
+ def each
35
+ @operand.each do |tuple|
36
+ yield autowrap(tuple)
37
+ end
38
+ end
39
+
40
+ private
41
+
42
+ def autowrap(tuple)
43
+ separator = @options[:split]
44
+ autowrapped = tuple.each_with_object({}){|(k,v),h|
45
+ parts = k.to_s.split(separator).map(&:to_sym)
46
+ sub = h
47
+ parts[0...-1].each do |part|
48
+ sub = (sub[part] ||= {})
49
+ end
50
+ sub[parts[-1]] = v
51
+ h
52
+ }
53
+ autowrapped = postprocessor.call(autowrapped)
54
+ autowrapped
55
+ end
56
+
57
+ def postprocessor
58
+ @options[:postprocessor]
59
+ end
60
+
61
+ #
62
+ # Removes the noise generated by left joins that were not join.
63
+ #
64
+ # i.e. x is removed in { x: { id: nil, name: nil, ... } }
65
+ #
66
+ # Supported heuristics are:
67
+ #
68
+ # - nil: { x: { id: nil, name: nil, ... } } => { x: nil }
69
+ # - delete: { x: { id: nil, name: nil, ... } } => { }
70
+ # - none: { x: { id: nil, name: nil, ... } } => { x: { id: nil, name: nil, ... } }
71
+ # - a Hash, specifying a specific heuristic by tuple attribute
72
+ # - a Proc, `->(tuple,key){ ... }` that affects the tuple manually
73
+ #
74
+ class NoLeftJoinNoise
75
+
76
+ REMOVERS = {
77
+ nil: ->(t,k){ t[k] = nil },
78
+ delete: ->(t,k){ t.delete(k) },
79
+ none: ->(t,k){ t }
80
+ }
81
+
82
+ def initialize(remover)
83
+ @remover = case remover
84
+ when NilClass then REMOVERS[:none]
85
+ when Proc then remover
86
+ when Symbol then REMOVERS[remover]
87
+ when Hash then ->(t,k){ REMOVERS[remover[k] || :none].call(t,k) }
88
+ else
89
+ raise "Invalid remover `#{remover}`"
90
+ end
91
+ end
92
+
93
+ def call(tuple)
94
+ tuple.each_key do |k|
95
+ @remover.call(tuple, k) if tuple[k].is_a?(Hash) && all_nil?(tuple[k])
96
+ end
97
+ tuple
98
+ end
99
+
100
+ def all_nil?(tuple)
101
+ return false unless tuple.is_a?(Hash)
102
+ tuple.all?{|(k,v)| v.nil? || all_nil?(tuple[k]) }
103
+ end
104
+
105
+ end # NoLeftJoinNoise
106
+
107
+ end # class Autowrap
108
+ end # module Operator
109
+ end # module Bmg
@@ -0,0 +1,43 @@
1
+ module Bmg
2
+ module Operator
3
+ #
4
+ # Project operator.
5
+ #
6
+ # Projects operand's tuples on given attributes, that is, keep those attributes
7
+ # only. The operator takes care of removing duplicates.
8
+ #
9
+ # Example:
10
+ #
11
+ # [{ a: 1, b: 2 }] project [:b] => [{ b: 2 }]
12
+ #
13
+ # All attributes in the attrlist SHOULD be existing attributes of the
14
+ # input tuples.
15
+ #
16
+ class Project
17
+ include Operator
18
+
19
+ def initialize(operand, attrlist)
20
+ @operand = operand
21
+ @attrlist = attrlist
22
+ end
23
+
24
+ def each
25
+ seen = {}
26
+ @operand.each do |tuple|
27
+ projected = project(tuple)
28
+ unless seen.has_key?(projected)
29
+ yield(projected)
30
+ seen[projected] = true
31
+ end
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def project(tuple)
38
+ tuple.delete_if{|k,_| !@attrlist.include?(k) }
39
+ end
40
+
41
+ end # class Project
42
+ end # module Operator
43
+ end # module Bmg