bmg 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 9e90b71aeb478997a7f9efcea3a18cec7ad3fac3
4
+ data.tar.gz: 9ebdd9b06a635ef12cef5d7c35d4746a56932bba
5
+ SHA512:
6
+ metadata.gz: a662b415f76df21dfad0d4e4b6f7c9dece45c994a7200c4d547f21007fd8158812a0e8c0fb263f0e4d3da597d334e03ed8394a28b052927f7f58081bf65ee392
7
+ data.tar.gz: 520d850977384a3cff1aa7e8fe58f52ea34e9f7f8caa0d9ec1f4849a92bab50814bbdaeb4ad8b0cd551ab8b3424d1d57f42fbab51bbe22fa7f6824e5aa610159
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source "https://rubygems.org"
2
+ gemspec
data/LICENSE.md ADDED
@@ -0,0 +1,22 @@
1
+ # The MIT Licence
2
+
3
+ Copyright (c) 2017 - Enspirit SPRL (Bernard Lambeau)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,3 @@
1
+ ## Bmg, Alf's successor, the relational algebra!
2
+
3
+ Coming soon.
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ #
2
+ # Install all tasks found in tasks folder
3
+ #
4
+ # See .rake files there for complete documentation.
5
+ #
6
+ Dir["tasks/*.rake"].each do |taskfile|
7
+ load taskfile
8
+ end
9
+
10
+ # We run tests by default
11
+ task :default => :test
data/lib/bmg.rb ADDED
@@ -0,0 +1,18 @@
1
+ require 'path'
2
+ module Bmg
3
+
4
+ def csv(path, options = {})
5
+ Relation.new Reader::Csv.new path, options
6
+ end
7
+ module_function :csv
8
+
9
+ def excel(path, options = {})
10
+ Relation.new Reader::Excel.new path, options
11
+ end
12
+ module_function :excel
13
+
14
+ end
15
+ require_relative 'bmg/version'
16
+ require_relative 'bmg/operator'
17
+ require_relative 'bmg/relation'
18
+ require_relative 'bmg/reader'
@@ -0,0 +1,14 @@
1
+ module Bmg
2
+ module Operator
3
+
4
+ def to_a
5
+ to_enum(:each).to_a
6
+ end
7
+
8
+ end
9
+ end
10
+ require_relative 'operator/allbut'
11
+ require_relative 'operator/autosummarize'
12
+ require_relative 'operator/autowrap'
13
+ require_relative 'operator/project'
14
+ require_relative 'operator/rename'
@@ -0,0 +1,44 @@
1
+ module Bmg
2
+ module Operator
3
+ #
4
+ # Allbut operator.
5
+ #
6
+ # Projects operand's tuples on all but given attributes, that is,
7
+ # removes attributes in the list. The operator takes care of removing
8
+ # duplicates.
9
+ #
10
+ # Example:
11
+ #
12
+ # [{ a: 1, b: 2 }] allbut [:b] => [{ a: 1 }]
13
+ #
14
+ # All attributes in the butlist SHOULD be existing attributes of the
15
+ # input tuples.
16
+ #
17
+ class Allbut
18
+ include Operator
19
+
20
+ def initialize(operand, butlist)
21
+ @operand = operand
22
+ @butlist = butlist
23
+ end
24
+
25
+ def each
26
+ seen = {}
27
+ @operand.each do |tuple|
28
+ allbuted = allbut(tuple)
29
+ unless seen.has_key?(allbuted)
30
+ yield(allbuted)
31
+ seen[allbuted] = true
32
+ end
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ def allbut(tuple)
39
+ tuple.delete_if{|k,_| @butlist.include?(k) }
40
+ end
41
+
42
+ end # class Allbut
43
+ end # module Operator
44
+ end # module Bmg
@@ -0,0 +1,190 @@
1
+ module Bmg
2
+ module Operator
3
+ #
4
+ # Autosummarize operator.
5
+ #
6
+ # Autosummarize helps structuring the results of a big flat join.
7
+ #
8
+ # This operator is still largely experimental and should be used with
9
+ # care...
10
+ #
11
+ class Autosummarize
12
+ include Operator
13
+
14
+ def initialize(operand, by, sums)
15
+ @operand = operand
16
+ @by = by
17
+ @sums = sums.each_with_object({}){|(k,v),h| h[k] = to_summarizer(v) }
18
+ end
19
+
20
+ def each(&bl)
21
+ h = {}
22
+ @operand.each do |tuple|
23
+ key = key(tuple)
24
+ h[key] ||= init(key, tuple)
25
+ h[key] = sum(h[key], tuple)
26
+ end
27
+ h.each_pair do |k,v|
28
+ h[k] = term(v)
29
+ end
30
+ h.values.each(&bl)
31
+ end
32
+
33
+ private
34
+
35
+ # Returns the tuple determinant.
36
+ def key(tuple)
37
+ @by.map{|by| tuple[by] }
38
+ end
39
+
40
+ # Returns the initial tuple to use for a given determinant.
41
+ def init(key, tuple)
42
+ tuple.each_with_object({}){|(k,v),h|
43
+ h.merge!(k => summarizer(k).init(v))
44
+ }
45
+ end
46
+
47
+ # Returns the summarizer to use for a given key.
48
+ def summarizer(k)
49
+ @sums[k] || Same.new
50
+ end
51
+
52
+ # Sums `tuple` on `memo`, returning the new tuple to use as memo.
53
+ def sum(memo, tuple)
54
+ tuple.each_with_object(memo.dup){|(k,v),h|
55
+ h.merge!(k => summarizer(k).sum(h[k], v))
56
+ }
57
+ end
58
+
59
+ # Terminates the summarization of a given tuple.
60
+ def term(tuple)
61
+ tuple.each_with_object({}){|(k,v),h|
62
+ h.merge!(k => summarizer(k).term(v))
63
+ }
64
+ end
65
+
66
+ def to_summarizer(x)
67
+ case x
68
+ when :same then Same.new
69
+ when :group then DistinctList.new
70
+ else
71
+ x
72
+ end
73
+ end
74
+
75
+ #
76
+ # Summarizes by enforcing that the same dependent is observed for a given
77
+ # determinant, returning the dependent as summarization.
78
+ #
79
+ class Same
80
+
81
+ def init(v)
82
+ v
83
+ end
84
+
85
+ def sum(v1, v2)
86
+ raise "Same values expected, got `#{v1}` vs. `#{v2}`" unless v1 == v2
87
+ v1
88
+ end
89
+
90
+ def term(v)
91
+ v
92
+ end
93
+
94
+ end # class Same
95
+
96
+ #
97
+ # Summarizes by putting distinct dependents inside an Array, ignoring nils,
98
+ # and optionally sorting the array.
99
+ #
100
+ class DistinctList
101
+
102
+ def initialize(&sorter)
103
+ @sorter = sorter
104
+ end
105
+
106
+ def init(v)
107
+ Set.new v.nil? ? [] : [v]
108
+ end
109
+
110
+ def sum(v1, v2)
111
+ v1 << v2 unless v2.nil?
112
+ v1
113
+ end
114
+
115
+ def term(v)
116
+ v = v.to_a
117
+ v = v.sort(&@sorter) if @sorter
118
+ v
119
+ end
120
+
121
+ end # class DistinctList
122
+
123
+ #
124
+ # Summarizes by converting dependents to { x => y, ... } such that `x` is not
125
+ # null and `y` is the value observed for `x`.
126
+ #
127
+ class YByX
128
+
129
+ def initialize(y, x, preserve_nulls = false)
130
+ @y = y
131
+ @x = x
132
+ @preserve_nulls = preserve_nulls
133
+ end
134
+
135
+ def init(v)
136
+ [v]
137
+ end
138
+
139
+ def sum(v1, v2)
140
+ v1 << v2
141
+ end
142
+
143
+ def term(v)
144
+ h = {}
145
+ v.each do |tuple|
146
+ next if tuple[@x].nil?
147
+ h[tuple[@x]] = tuple[@y] if not tuple[@y].nil? or @preserve_nulls
148
+ end
149
+ h
150
+ end
151
+
152
+ end # class YByX
153
+
154
+ #
155
+ # Summarizes by converting dependents to { x => [ys], ... } such that `x` is not
156
+ # null and `[ys]` is a distinct list of observed non-null `y`.
157
+ #
158
+ class YsByX
159
+
160
+ def initialize(y, x, &sorter)
161
+ @y = y
162
+ @x = x
163
+ @sorter = sorter
164
+ end
165
+
166
+ def init(v)
167
+ [v]
168
+ end
169
+
170
+ def sum(v1, v2)
171
+ v1 << v2
172
+ end
173
+
174
+ def term(v)
175
+ h = {}
176
+ v = v.reject{|tuple| tuple[@x].nil? }
177
+ v = v.sort(&@sorter) if @sorter
178
+ v.each do |tuple|
179
+ h[tuple[@x]] ||= []
180
+ h[tuple[@x]] << tuple[@y]
181
+ h[tuple[@x]].uniq!
182
+ end
183
+ h
184
+ end
185
+
186
+ end # class YsByX
187
+
188
+ end # class Autosummarize
189
+ end # module Operator
190
+ end # module Bmg
@@ -0,0 +1,109 @@
1
+ module Bmg
2
+ module Operator
3
+ #
4
+ # Autowrap operator.
5
+ #
6
+ # Autowrap can be used to structure tuples ala Tutorial D' wrap, but it works
7
+ # with conventions instead of explicit wrapping, and supports multiple levels
8
+ # or wrapping.
9
+ #
10
+ # Examples:
11
+ #
12
+ # [{ a: 1, b_x: 2, b_y: 3 }] => [{ a: 1, b: { x: 2, y: 3 } }]
13
+ # [{ a: 1, b_x_y: 2, b_x_z: 3 }] => [{ a: 1, b: { x: { y: 2, z: 3 } } }]
14
+ #
15
+ # Autowrap supports the following options:
16
+ #
17
+ # - `postprocessor: :nil|:none|:delete|Hash|Proc` see NoLeftJoinNoise
18
+ # - `split: String` the seperator to use to split keys, defaults to `_`
19
+ #
20
+ class Autowrap
21
+ include Operator
22
+
23
+ DEFAULT_OPTIONS = {
24
+ :postprocessor => :none,
25
+ :split => "_"
26
+ }
27
+
28
+ def initialize(operand, options = {})
29
+ @operand = operand
30
+ @options = DEFAULT_OPTIONS.merge(options)
31
+ @options[:postprocessor] = NoLeftJoinNoise.new(@options[:postprocessor])
32
+ end
33
+
34
+ def each
35
+ @operand.each do |tuple|
36
+ yield autowrap(tuple)
37
+ end
38
+ end
39
+
40
+ private
41
+
42
+ def autowrap(tuple)
43
+ separator = @options[:split]
44
+ autowrapped = tuple.each_with_object({}){|(k,v),h|
45
+ parts = k.to_s.split(separator).map(&:to_sym)
46
+ sub = h
47
+ parts[0...-1].each do |part|
48
+ sub = (sub[part] ||= {})
49
+ end
50
+ sub[parts[-1]] = v
51
+ h
52
+ }
53
+ autowrapped = postprocessor.call(autowrapped)
54
+ autowrapped
55
+ end
56
+
57
+ def postprocessor
58
+ @options[:postprocessor]
59
+ end
60
+
61
+ #
62
+ # Removes the noise generated by left joins that were not join.
63
+ #
64
+ # i.e. x is removed in { x: { id: nil, name: nil, ... } }
65
+ #
66
+ # Supported heuristics are:
67
+ #
68
+ # - nil: { x: { id: nil, name: nil, ... } } => { x: nil }
69
+ # - delete: { x: { id: nil, name: nil, ... } } => { }
70
+ # - none: { x: { id: nil, name: nil, ... } } => { x: { id: nil, name: nil, ... } }
71
+ # - a Hash, specifying a specific heuristic by tuple attribute
72
+ # - a Proc, `->(tuple,key){ ... }` that affects the tuple manually
73
+ #
74
+ class NoLeftJoinNoise
75
+
76
+ REMOVERS = {
77
+ nil: ->(t,k){ t[k] = nil },
78
+ delete: ->(t,k){ t.delete(k) },
79
+ none: ->(t,k){ t }
80
+ }
81
+
82
+ def initialize(remover)
83
+ @remover = case remover
84
+ when NilClass then REMOVERS[:none]
85
+ when Proc then remover
86
+ when Symbol then REMOVERS[remover]
87
+ when Hash then ->(t,k){ REMOVERS[remover[k] || :none].call(t,k) }
88
+ else
89
+ raise "Invalid remover `#{remover}`"
90
+ end
91
+ end
92
+
93
+ def call(tuple)
94
+ tuple.each_key do |k|
95
+ @remover.call(tuple, k) if tuple[k].is_a?(Hash) && all_nil?(tuple[k])
96
+ end
97
+ tuple
98
+ end
99
+
100
+ def all_nil?(tuple)
101
+ return false unless tuple.is_a?(Hash)
102
+ tuple.all?{|(k,v)| v.nil? || all_nil?(tuple[k]) }
103
+ end
104
+
105
+ end # NoLeftJoinNoise
106
+
107
+ end # class Autowrap
108
+ end # module Operator
109
+ end # module Bmg
@@ -0,0 +1,43 @@
1
+ module Bmg
2
+ module Operator
3
+ #
4
+ # Project operator.
5
+ #
6
+ # Projects operand's tuples on given attributes, that is, keep those attributes
7
+ # only. The operator takes care of removing duplicates.
8
+ #
9
+ # Example:
10
+ #
11
+ # [{ a: 1, b: 2 }] project [:b] => [{ b: 2 }]
12
+ #
13
+ # All attributes in the attrlist SHOULD be existing attributes of the
14
+ # input tuples.
15
+ #
16
+ class Project
17
+ include Operator
18
+
19
+ def initialize(operand, attrlist)
20
+ @operand = operand
21
+ @attrlist = attrlist
22
+ end
23
+
24
+ def each
25
+ seen = {}
26
+ @operand.each do |tuple|
27
+ projected = project(tuple)
28
+ unless seen.has_key?(projected)
29
+ yield(projected)
30
+ seen[projected] = true
31
+ end
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def project(tuple)
38
+ tuple.delete_if{|k,_| !@attrlist.include?(k) }
39
+ end
40
+
41
+ end # class Project
42
+ end # module Operator
43
+ end # module Bmg