alf 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +5 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +42 -0
- data/LICENCE.md +22 -0
- data/Manifest.txt +15 -0
- data/README.md +769 -0
- data/Rakefile +23 -0
- data/TODO.md +26 -0
- data/alf.gemspec +191 -0
- data/alf.noespec +30 -0
- data/bin/alf +31 -0
- data/examples/autonum.alf +6 -0
- data/examples/cities.rash +4 -0
- data/examples/clip.alf +3 -0
- data/examples/compact.alf +2 -0
- data/examples/database.alf +6 -0
- data/examples/defaults.alf +3 -0
- data/examples/extend.alf +3 -0
- data/examples/group.alf +3 -0
- data/examples/intersect.alf +4 -0
- data/examples/join.alf +2 -0
- data/examples/minus.alf +8 -0
- data/examples/nest.alf +2 -0
- data/examples/nulls.rash +3 -0
- data/examples/parts.rash +6 -0
- data/examples/project.alf +2 -0
- data/examples/quota.alf +4 -0
- data/examples/rename.alf +3 -0
- data/examples/restrict.alf +2 -0
- data/examples/runall.sh +26 -0
- data/examples/schema.yaml +28 -0
- data/examples/sort.alf +4 -0
- data/examples/summarize.alf +16 -0
- data/examples/suppliers.rash +5 -0
- data/examples/supplies.rash +12 -0
- data/examples/ungroup.alf +4 -0
- data/examples/union.alf +3 -0
- data/examples/unnest.alf +4 -0
- data/examples/with.alf +23 -0
- data/lib/alf.rb +2984 -0
- data/lib/alf/loader.rb +1 -0
- data/lib/alf/renderer/text.rb +153 -0
- data/lib/alf/renderer/yaml.rb +22 -0
- data/lib/alf/version.rb +14 -0
- data/spec/aggregator_spec.rb +62 -0
- data/spec/alf_spec.rb +47 -0
- data/spec/assumptions_spec.rb +15 -0
- data/spec/environment/explicit_spec.rb +15 -0
- data/spec/environment/folder_spec.rb +30 -0
- data/spec/examples_spec.rb +26 -0
- data/spec/lispy_spec.rb +23 -0
- data/spec/operator/command_methods_spec.rb +38 -0
- data/spec/operator/non_relational/autonum_spec.rb +61 -0
- data/spec/operator/non_relational/clip_spec.rb +49 -0
- data/spec/operator/non_relational/compact/buffer_based.rb +30 -0
- data/spec/operator/non_relational/compact/sort_based_spec.rb +30 -0
- data/spec/operator/non_relational/compact_spec.rb +38 -0
- data/spec/operator/non_relational/defaults_spec.rb +55 -0
- data/spec/operator/non_relational/sort_spec.rb +66 -0
- data/spec/operator/relational/extend_spec.rb +34 -0
- data/spec/operator/relational/group_spec.rb +54 -0
- data/spec/operator/relational/intersect_spec.rb +58 -0
- data/spec/operator/relational/join/hash_based_spec.rb +63 -0
- data/spec/operator/relational/minus_spec.rb +56 -0
- data/spec/operator/relational/nest_spec.rb +32 -0
- data/spec/operator/relational/project_spec.rb +65 -0
- data/spec/operator/relational/quota_spec.rb +44 -0
- data/spec/operator/relational/rename_spec.rb +32 -0
- data/spec/operator/relational/restrict_spec.rb +56 -0
- data/spec/operator/relational/summarize/sort_based_spec.rb +31 -0
- data/spec/operator/relational/summarize_spec.rb +41 -0
- data/spec/operator/relational/ungroup_spec.rb +35 -0
- data/spec/operator/relational/union_spec.rb +35 -0
- data/spec/operator/relational/unnest_spec.rb +32 -0
- data/spec/reader/alf_file_spec.rb +15 -0
- data/spec/reader/input.rb +2 -0
- data/spec/reader/rash_spec.rb +31 -0
- data/spec/reader_spec.rb +27 -0
- data/spec/renderer/text/cell_spec.rb +34 -0
- data/spec/renderer/text/row_spec.rb +30 -0
- data/spec/renderer/text/table_spec.rb +39 -0
- data/spec/renderer_spec.rb +42 -0
- data/spec/spec_helper.rb +26 -0
- data/spec/tools/ordering_key_spec.rb +81 -0
- data/spec/tools/projection_key_spec.rb +83 -0
- data/spec/tools/tools_spec.rb +25 -0
- data/spec/tools/tuple_handle_spec.rb +78 -0
- data/tasks/debug_mail.rake +78 -0
- data/tasks/debug_mail.txt +13 -0
- data/tasks/gem.rake +68 -0
- data/tasks/spec_test.rake +79 -0
- data/tasks/unit_test.rake +77 -0
- data/tasks/yard.rake +51 -0
- metadata +282 -0
data/examples/sort.alf
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env alf
|
2
|
+
|
3
|
+
# What is the sum of supplied quantities by supplier?
|
4
|
+
(summarize :supplies,
|
5
|
+
[:sid],
|
6
|
+
:total_qty => Agg::sum(:qty))
|
7
|
+
|
8
|
+
# Give the maximal supplied quantity by country, taking only into account
|
9
|
+
# suppliers that have a status greater than 10
|
10
|
+
(summarize \
|
11
|
+
(join \
|
12
|
+
(join (restrict :suppliers, lambda{ status > 10 }),
|
13
|
+
:supplies),
|
14
|
+
:cities),
|
15
|
+
[:country],
|
16
|
+
:maxqty => Agg::sum{ qty })
|
@@ -0,0 +1,5 @@
|
|
1
|
+
{:sid => 'S1', :name => 'Smith', :status => 20, :city => 'London'}
|
2
|
+
{:sid => 'S2', :name => 'Jones', :status => 10, :city => 'Paris'}
|
3
|
+
{:sid => 'S3', :name => 'Blake', :status => 30, :city => 'Paris'}
|
4
|
+
{:sid => 'S4', :name => 'Clark', :status => 20, :city => 'London'}
|
5
|
+
{:sid => 'S5', :name => 'Adams', :status => 30, :city => 'Athens'}
|
@@ -0,0 +1,12 @@
|
|
1
|
+
{:sid => 'S1', :pid => 'P1', :qty => 300}
|
2
|
+
{:sid => 'S1', :pid => 'P2', :qty => 200}
|
3
|
+
{:sid => 'S1', :pid => 'P3', :qty => 400}
|
4
|
+
{:sid => 'S1', :pid => 'P4', :qty => 200}
|
5
|
+
{:sid => 'S1', :pid => 'P5', :qty => 100}
|
6
|
+
{:sid => 'S1', :pid => 'P6', :qty => 100}
|
7
|
+
{:sid => 'S2', :pid => 'P1', :qty => 300}
|
8
|
+
{:sid => 'S2', :pid => 'P2', :qty => 400}
|
9
|
+
{:sid => 'S3', :pid => 'P2', :qty => 200}
|
10
|
+
{:sid => 'S4', :pid => 'P2', :qty => 200}
|
11
|
+
{:sid => 'S4', :pid => 'P4', :qty => 300}
|
12
|
+
{:sid => 'S4', :pid => 'P5', :qty => 400}
|
data/examples/union.alf
ADDED
data/examples/unnest.alf
ADDED
data/examples/with.alf
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env alf
|
2
|
+
|
3
|
+
# Compute the total qty supplied in each country together with the subset
|
4
|
+
# of products shipped there. Only consider suppliers that have a status
|
5
|
+
# greater than 10, however.
|
6
|
+
(summarize \
|
7
|
+
(join \
|
8
|
+
(join (restrict :suppliers, lambda{ status > 10 }),
|
9
|
+
:supplies),
|
10
|
+
:cities),
|
11
|
+
[:country],
|
12
|
+
:which => Agg::group(:pid),
|
13
|
+
:total => Agg::sum{ qty })
|
14
|
+
|
15
|
+
# Another equivalent way to write it
|
16
|
+
with( :kept_suppliers => (restrict :suppliers, lambda{ status > 10 }),
|
17
|
+
:with_countries => (join :kept_suppliers, :cities),
|
18
|
+
:supplying => (join :with_countries, :supplies) ) do
|
19
|
+
(summarize :supplying,
|
20
|
+
[:country],
|
21
|
+
:which => Agg::group(:pid),
|
22
|
+
:total => Agg::sum{ qty })
|
23
|
+
end
|
data/lib/alf.rb
ADDED
@@ -0,0 +1,2984 @@
|
|
1
|
+
require "enumerator"
|
2
|
+
require "stringio"
|
3
|
+
require "alf/version"
|
4
|
+
require "alf/loader"
|
5
|
+
|
6
|
+
#
|
7
|
+
# Classy data-manipulation dressed in a DSL (+ commandline)
|
8
|
+
#
|
9
|
+
module Alf
|
10
|
+
|
11
|
+
#
|
12
|
+
# Provides tooling methods that are used here and there in Alf.
|
13
|
+
#
|
14
|
+
module Tools
|
15
|
+
|
16
|
+
#
|
17
|
+
# Returns the unqualified name of a ruby class or module
|
18
|
+
#
|
19
|
+
# Example
|
20
|
+
#
|
21
|
+
# class_name(Alf::Tools) -> :Tools
|
22
|
+
#
|
23
|
+
def class_name(clazz)
|
24
|
+
clazz.name.to_s =~ /([A-Za-z0-9_]+)$/
|
25
|
+
$1.to_sym
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# Converts an unqualified class or module name to a ruby case method name.
|
30
|
+
#
|
31
|
+
# Example
|
32
|
+
#
|
33
|
+
# ruby_case(:Alf) -> "alf"
|
34
|
+
# ruby_case(:HelloWorld) -> "hello_world"
|
35
|
+
#
|
36
|
+
def ruby_case(s)
|
37
|
+
s.to_s.gsub(/[A-Z]/){|x| "_#{x.downcase}"}[1..-1]
|
38
|
+
end
|
39
|
+
|
40
|
+
#
|
41
|
+
# Returns the first non nil values from arguments
|
42
|
+
#
|
43
|
+
# Example
|
44
|
+
#
|
45
|
+
# coalesce(nil, 1, "abc") -> 1
|
46
|
+
#
|
47
|
+
def coalesce(*args)
|
48
|
+
args.find{|x| !x.nil?}
|
49
|
+
end
|
50
|
+
|
51
|
+
#
|
52
|
+
# Iterates over enum and yields the block on each element.
|
53
|
+
# Collect block results as key/value pairs returns them as
|
54
|
+
# a Hash.
|
55
|
+
#
|
56
|
+
def tuple_collect(enum)
|
57
|
+
tuple = {}
|
58
|
+
enum.each do |elm|
|
59
|
+
k, v = yield(elm)
|
60
|
+
tuple[k] = v
|
61
|
+
end
|
62
|
+
tuple
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
# Provides a handle, implementing a flyweight design pattern on tuples.
|
67
|
+
#
|
68
|
+
class TupleHandle
|
69
|
+
|
70
|
+
# Creates an handle instance
|
71
|
+
def initialize
|
72
|
+
@tuple = nil
|
73
|
+
end
|
74
|
+
|
75
|
+
#
|
76
|
+
# Sets the next tuple to use.
|
77
|
+
#
|
78
|
+
# This method installs the handle as a side effect
|
79
|
+
# on first call.
|
80
|
+
#
|
81
|
+
def set(tuple)
|
82
|
+
build(tuple) if @tuple.nil?
|
83
|
+
@tuple = tuple
|
84
|
+
self
|
85
|
+
end
|
86
|
+
|
87
|
+
#
|
88
|
+
# Compiles a tuple expression and returns a lambda
|
89
|
+
# instance that can be passed to evaluate later.
|
90
|
+
#
|
91
|
+
def self.compile(expr)
|
92
|
+
case expr
|
93
|
+
when Proc
|
94
|
+
expr
|
95
|
+
when NilClass
|
96
|
+
compile('true')
|
97
|
+
when Hash
|
98
|
+
if expr.empty?
|
99
|
+
compile(nil)
|
100
|
+
else
|
101
|
+
# TODO: replace inspect by to_ruby
|
102
|
+
compile expr.each_pair.collect{|k,v|
|
103
|
+
"(#{k} == #{v.inspect})"
|
104
|
+
}.join(" && ")
|
105
|
+
end
|
106
|
+
when Array
|
107
|
+
compile(Hash[*expr])
|
108
|
+
when String, Symbol
|
109
|
+
eval("lambda{ #{expr} }")
|
110
|
+
else
|
111
|
+
raise ArgumentError, "Unable to compile #{expr} to a TupleHandle"
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
#
|
116
|
+
# Evaluates an expression on the current tuple. Expression
|
117
|
+
# can be a lambda or a string (immediately compiled in the
|
118
|
+
# later case).
|
119
|
+
#
|
120
|
+
def evaluate(expr)
|
121
|
+
if RUBY_VERSION < "1.9"
|
122
|
+
instance_eval(&TupleHandle.compile(expr))
|
123
|
+
else
|
124
|
+
instance_exec(&TupleHandle.compile(expr))
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
private
|
129
|
+
|
130
|
+
#
|
131
|
+
# Builds this handle with a tuple.
|
132
|
+
#
|
133
|
+
# This method should be called only once and installs
|
134
|
+
# instance methods on the handle with keys of _tuple_.
|
135
|
+
#
|
136
|
+
def build(tuple)
|
137
|
+
tuple.keys.each do |k|
|
138
|
+
(class << self; self; end).send(:define_method, k) do
|
139
|
+
@tuple[k]
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
end # class TupleHandle
|
145
|
+
|
146
|
+
#
|
147
|
+
# Defines a projection key
|
148
|
+
#
|
149
|
+
class ProjectionKey
|
150
|
+
include Tools
|
151
|
+
|
152
|
+
# Projection attributes
|
153
|
+
attr_accessor :attributes
|
154
|
+
|
155
|
+
# Allbut projection?
|
156
|
+
attr_accessor :allbut
|
157
|
+
|
158
|
+
def initialize(attributes, allbut = false)
|
159
|
+
@attributes = attributes
|
160
|
+
@allbut = allbut
|
161
|
+
end
|
162
|
+
|
163
|
+
def self.coerce(arg)
|
164
|
+
case arg
|
165
|
+
when Array
|
166
|
+
ProjectionKey.new(arg, false)
|
167
|
+
when OrderingKey
|
168
|
+
ProjectionKey.new(arg.attributes, false)
|
169
|
+
when ProjectionKey
|
170
|
+
arg
|
171
|
+
else
|
172
|
+
raise ArgumentError, "Unable to coerce #{arg} to a projection key"
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def to_ordering_key
|
177
|
+
OrderingKey.new attributes.collect{|arg|
|
178
|
+
[arg, :asc]
|
179
|
+
}
|
180
|
+
end
|
181
|
+
|
182
|
+
def project(tuple)
|
183
|
+
split(tuple).first
|
184
|
+
end
|
185
|
+
|
186
|
+
def split(tuple)
|
187
|
+
projection, rest = {}, tuple.dup
|
188
|
+
attributes.each do |a|
|
189
|
+
projection[a] = tuple[a]
|
190
|
+
rest.delete(a)
|
191
|
+
end
|
192
|
+
@allbut ? [rest, projection] : [projection, rest]
|
193
|
+
end
|
194
|
+
|
195
|
+
end # class ProjectionKey
|
196
|
+
|
197
|
+
#
|
198
|
+
# Encapsulates tools for computing orders on tuples
|
199
|
+
#
|
200
|
+
class OrderingKey
|
201
|
+
|
202
|
+
attr_reader :ordering
|
203
|
+
|
204
|
+
def initialize(ordering = [])
|
205
|
+
@ordering = ordering
|
206
|
+
@sorter = nil
|
207
|
+
end
|
208
|
+
|
209
|
+
def self.coerce(arg)
|
210
|
+
case arg
|
211
|
+
when Array
|
212
|
+
if arg.all?{|a| a.is_a?(Symbol)}
|
213
|
+
arg = arg.collect{|a| [a, :asc]}
|
214
|
+
end
|
215
|
+
OrderingKey.new(arg)
|
216
|
+
when ProjectionKey
|
217
|
+
arg.to_ordering_key
|
218
|
+
when OrderingKey
|
219
|
+
arg
|
220
|
+
else
|
221
|
+
raise ArgumentError, "Unable to coerce #{arg} to an ordering key"
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
def attributes
|
226
|
+
@ordering.collect{|arg| arg.first}
|
227
|
+
end
|
228
|
+
|
229
|
+
def order_by(attr, order = :asc)
|
230
|
+
@ordering << [attr, order]
|
231
|
+
@sorter = nil
|
232
|
+
self
|
233
|
+
end
|
234
|
+
|
235
|
+
def order_of(attr)
|
236
|
+
@ordering.find{|arg| arg.first == attr}.last
|
237
|
+
end
|
238
|
+
|
239
|
+
def compare(t1,t2)
|
240
|
+
@ordering.each do |attr,order|
|
241
|
+
comp = (t1[attr] <=> t2[attr])
|
242
|
+
comp *= -1 if order == :desc
|
243
|
+
return comp unless comp == 0
|
244
|
+
end
|
245
|
+
return 0
|
246
|
+
end
|
247
|
+
|
248
|
+
def sorter
|
249
|
+
@sorter ||= lambda{|t1,t2| compare(t1, t2)}
|
250
|
+
end
|
251
|
+
|
252
|
+
def +(other)
|
253
|
+
other = OrderingKey.coerce(other)
|
254
|
+
OrderingKey.new(@ordering + other.ordering)
|
255
|
+
end
|
256
|
+
|
257
|
+
end # class OrderingKey
|
258
|
+
|
259
|
+
extend Tools
|
260
|
+
end # module Tools
|
261
|
+
|
262
|
+
#
|
263
|
+
# Builds and returns a lispy engine on a specific environment.
|
264
|
+
#
|
265
|
+
# Example(s):
|
266
|
+
#
|
267
|
+
# # Returns a lispy instance on the default environment
|
268
|
+
# lispy = Alf.lispy
|
269
|
+
#
|
270
|
+
# # Returns a lispy instance on the examples' environment
|
271
|
+
# lispy = Alf.lispy(Alf::Environment.examples)
|
272
|
+
#
|
273
|
+
# # Returns a lispy instance on a folder environment of your choice
|
274
|
+
# lispy = Alf.lispy(Alf::Environment.folder('path/to/a/folder'))
|
275
|
+
#
|
276
|
+
# @see Alf::Environment about available environments and their contract
|
277
|
+
#
|
278
|
+
def self.lispy(env = Alf::Environment.default)
|
279
|
+
Command::Main.new(env)
|
280
|
+
end
|
281
|
+
|
282
|
+
#
|
283
|
+
# Implements a small LISP-like DSL on top of Alf.
|
284
|
+
#
|
285
|
+
# The lispy dialect is the functional one used in .alf files and in compiled
|
286
|
+
# expressions as below:
|
287
|
+
#
|
288
|
+
# Alf.lispy.compile do
|
289
|
+
# (restrict :suppliers, lambda{ city == 'London' })
|
290
|
+
# end
|
291
|
+
#
|
292
|
+
# The DSL this module provides is part of Alf's public API and won't be broken
|
293
|
+
# without a major version change. The module itself and its inclusion pre-
|
294
|
+
# conditions are not part of the DSL itself, thus not considered as part of
|
295
|
+
# the API, and may therefore evolve at any time. In other words, this module
|
296
|
+
# is not intended to be directly included by third-party classes.
|
297
|
+
#
|
298
|
+
module Lispy
|
299
|
+
|
300
|
+
# The environment
|
301
|
+
attr_accessor :environment
|
302
|
+
|
303
|
+
#
|
304
|
+
# Compiles a query expression given by a String or a block and returns
|
305
|
+
# the result (typically a tuple iterator)
|
306
|
+
#
|
307
|
+
def compile(expr = nil, &block)
|
308
|
+
expr.nil? ? instance_eval(&block) : instance_eval(expr)
|
309
|
+
end
|
310
|
+
|
311
|
+
# Delegated to the environment
|
312
|
+
def dataset(name)
|
313
|
+
raise "Environment not set" unless @environment
|
314
|
+
@environment.dataset(name)
|
315
|
+
end
|
316
|
+
|
317
|
+
#
|
318
|
+
# Compiles the subexpression given by the block in the context of
|
319
|
+
# additional temporary expressions given by definitions
|
320
|
+
#
|
321
|
+
def with(definitions)
|
322
|
+
# We branch with the definitions for compilation
|
323
|
+
self.environment = environment.branch(definitions)
|
324
|
+
|
325
|
+
# this is to ensure that sub definitions can reuse other
|
326
|
+
# ones
|
327
|
+
definitions.each_value do |defn|
|
328
|
+
defn.environment = self.environment
|
329
|
+
end
|
330
|
+
|
331
|
+
# compile now
|
332
|
+
op = compile(&Proc.new)
|
333
|
+
|
334
|
+
# We now unbranch for next expression
|
335
|
+
self.environment = environment.unbranch
|
336
|
+
|
337
|
+
op
|
338
|
+
end
|
339
|
+
|
340
|
+
#
|
341
|
+
# Chains some elements as a new operator
|
342
|
+
#
|
343
|
+
def chain(*elements)
|
344
|
+
elements = elements.reverse
|
345
|
+
elements[1..-1].inject(elements.first) do |c, elm|
|
346
|
+
elm.pipe(c, environment)
|
347
|
+
elm
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
[ :Autonum, :Clip, :Compact, :Defaults, :Sort ].each do |op_name|
|
352
|
+
meth_name = Tools.ruby_case(op_name).to_sym
|
353
|
+
define_method(meth_name) do |child, *args|
|
354
|
+
chain(Operator::NonRelational.const_get(op_name).new(*args), child)
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
358
|
+
[:Project,
|
359
|
+
:Extend,
|
360
|
+
:Rename,
|
361
|
+
:Restrict,
|
362
|
+
:Nest,
|
363
|
+
:Unnest,
|
364
|
+
:Group,
|
365
|
+
:Ungroup,
|
366
|
+
:Summarize,
|
367
|
+
:Quota ].each do |op_name|
|
368
|
+
meth_name = Tools.ruby_case(op_name).to_sym
|
369
|
+
define_method(meth_name) do |child, *args|
|
370
|
+
chain(Operator::Relational.const_get(op_name).new(*args), child)
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
def allbut(child, attributes)
|
375
|
+
chain(Operator::Relational::Project.new(attributes, true), child)
|
376
|
+
end
|
377
|
+
|
378
|
+
[ :Join,
|
379
|
+
:Union,
|
380
|
+
:Intersect,
|
381
|
+
:Minus ].each do |op_name|
|
382
|
+
meth_name = Tools.ruby_case(op_name).to_sym
|
383
|
+
define_method(meth_name) do |left, right, *args|
|
384
|
+
chain(Operator::Relational.const_get(op_name).new(*args), [left, right])
|
385
|
+
end
|
386
|
+
end
|
387
|
+
|
388
|
+
end # module Lispy
|
389
|
+
|
390
|
+
#
|
391
|
+
# Encapsulates the interface with the outside world, providing base iterators
|
392
|
+
# for named datasets, among others.
|
393
|
+
#
|
394
|
+
# An environment is typically obtained through the factory defined by this
|
395
|
+
# class:
|
396
|
+
#
|
397
|
+
# # Returns the default environment (examples, for now)
|
398
|
+
# Alf::Environment.default
|
399
|
+
#
|
400
|
+
# # Returns an environment on Alf's examples
|
401
|
+
# Alf::Environment.examples
|
402
|
+
#
|
403
|
+
# # Returns an environment on a specific folder, automatically
|
404
|
+
# # resolving datasources via Readers' recognized file extensions
|
405
|
+
# Alf::Environment.folder('path/to/a/folder')
|
406
|
+
#
|
407
|
+
# You can implement your own environment by subclassing this class and
|
408
|
+
# implementing the {#dataset} method. As additional support is implemented
|
409
|
+
# in the base class, Environment should never be mimiced.
|
410
|
+
#
|
411
|
+
class Environment
|
412
|
+
|
413
|
+
#
|
414
|
+
# Returns a dataset whose name is provided.
|
415
|
+
#
|
416
|
+
# This method resolves named datasets to tuple enumerables. When the
|
417
|
+
# dataset exists, this method must return an Iterator, typically a
|
418
|
+
# Reader instance. Otherwise, it must throw a NoSuchDatasetError.
|
419
|
+
#
|
420
|
+
# @param [Symbol] name the name of a dataset
|
421
|
+
# @return [Iterator] an iterator, typically a Reader instance
|
422
|
+
# @raise [NoSuchDatasetError] when the dataset does not exists
|
423
|
+
#
|
424
|
+
def dataset(name)
|
425
|
+
end
|
426
|
+
undef :dataset
|
427
|
+
|
428
|
+
#
|
429
|
+
# Branches this environment and puts some additional explicit
|
430
|
+
# definitions.
|
431
|
+
#
|
432
|
+
# This method is provided for (with ...) expressions and should not
|
433
|
+
# be overriden by subclasses.
|
434
|
+
#
|
435
|
+
# @param [Hash] a set of (name, Iterator) pairs.
|
436
|
+
# @return [Environment] an environment instance with new definitions set
|
437
|
+
#
|
438
|
+
def branch(defs)
|
439
|
+
Explicit.new(defs, self)
|
440
|
+
end
|
441
|
+
|
442
|
+
#
|
443
|
+
# Specialization of Environment that works with explicitely defined
|
444
|
+
# datasources and allow branching and unbranching.
|
445
|
+
#
|
446
|
+
class Explicit < Environment
|
447
|
+
|
448
|
+
#
|
449
|
+
# Creates a new environment instance with initial definitions
|
450
|
+
# and optional child environment.
|
451
|
+
#
|
452
|
+
def initialize(defs = {}, child = nil)
|
453
|
+
@defs = defs
|
454
|
+
@child = child
|
455
|
+
end
|
456
|
+
|
457
|
+
#
|
458
|
+
# Unbranches this environment and returns its child
|
459
|
+
#
|
460
|
+
def unbranch
|
461
|
+
@child
|
462
|
+
end
|
463
|
+
|
464
|
+
# (see Environment#dataset)
|
465
|
+
def dataset(name)
|
466
|
+
if @defs.has_key?(name)
|
467
|
+
@defs[name]
|
468
|
+
elsif @child
|
469
|
+
@child.dataset(name)
|
470
|
+
else
|
471
|
+
raise "No such dataset #{name}"
|
472
|
+
end
|
473
|
+
end
|
474
|
+
|
475
|
+
end # class Explicit
|
476
|
+
|
477
|
+
#
|
478
|
+
# Specialization of Environment to work on files of a given folder.
|
479
|
+
#
|
480
|
+
# This kind of environment resolves datasets by simply looking at
|
481
|
+
# recognized files in a specific folder. "Recognized" files are simply
|
482
|
+
# those for which a Reader subclass has been previously registered.
|
483
|
+
# This environment then serves reader instances.
|
484
|
+
#
|
485
|
+
class Folder < Environment
|
486
|
+
|
487
|
+
#
|
488
|
+
# Creates an environment instance, wired to the specified folder.
|
489
|
+
#
|
490
|
+
# @param [String] folder path to the folder to use as dataset source
|
491
|
+
#
|
492
|
+
def initialize(folder)
|
493
|
+
@folder = folder
|
494
|
+
end
|
495
|
+
|
496
|
+
# (see Environment#dataset)
|
497
|
+
def dataset(name)
|
498
|
+
if file = find_file(name)
|
499
|
+
Reader.reader(file, self)
|
500
|
+
else
|
501
|
+
raise "No such dataset #{name} (#{@folder})"
|
502
|
+
end
|
503
|
+
end
|
504
|
+
|
505
|
+
protected
|
506
|
+
|
507
|
+
def find_file(name)
|
508
|
+
# TODO: refactor this, because it allows getting out of the folder
|
509
|
+
if File.exists?(name.to_s)
|
510
|
+
name.to_s
|
511
|
+
elsif File.exists?(explicit = File.join(@folder, name.to_s)) &&
|
512
|
+
File.file?(explicit)
|
513
|
+
explicit
|
514
|
+
else
|
515
|
+
Dir[File.join(@folder, "#{name}.*")].find do |f|
|
516
|
+
File.file?(f)
|
517
|
+
end
|
518
|
+
end
|
519
|
+
end
|
520
|
+
|
521
|
+
end # class Folder
|
522
|
+
|
523
|
+
#
|
524
|
+
# Factors a Folder environment on a specific path
|
525
|
+
#
|
526
|
+
def self.folder(path)
|
527
|
+
Folder.new(path)
|
528
|
+
end
|
529
|
+
|
530
|
+
#
|
531
|
+
# Returns the default environment
|
532
|
+
#
|
533
|
+
def self.default
|
534
|
+
examples
|
535
|
+
end
|
536
|
+
|
537
|
+
#
|
538
|
+
# Returns the examples environment
|
539
|
+
#
|
540
|
+
def self.examples
|
541
|
+
folder File.expand_path('../../examples', __FILE__)
|
542
|
+
end
|
543
|
+
|
544
|
+
end # class Environment
|
545
|
+
|
546
|
+
#
|
547
|
+
# Marker module for all elements implementing tuple iterators.
|
548
|
+
#
|
549
|
+
# At first glance, an iterator is nothing else than an Enumerable that serves
|
550
|
+
# tuples (represented by ruby hashes). However, this module helps Alf's internal
|
551
|
+
# classes to recognize enumerables that may safely be considered as tuple
|
552
|
+
# iterators from other enumerables. For this reason, all elements that would
|
553
|
+
# like to participate to an iteration chain (that is, an logical operator
|
554
|
+
# implementation) should be marked with this module. This is the case for
|
555
|
+
# all Readers and Operators defined in Alf.
|
556
|
+
#
|
557
|
+
# Moreover, an Iterator should always define a {#pipe} method, which is the
|
558
|
+
# natural way to define the input and execution environment of operators and
|
559
|
+
# readers.
|
560
|
+
#
|
561
|
+
module Iterator
|
562
|
+
include Enumerable
|
563
|
+
|
564
|
+
#
|
565
|
+
# Wire the iterator input and an optional execution environment.
|
566
|
+
#
|
567
|
+
# Iterators (typically Reader and Operator instances) work from input data
|
568
|
+
# that come from files, or other operators, and so on. This method wires
|
569
|
+
# this input data to the iterator. Wiring is required before any attempt
|
570
|
+
# to call each, unless autowiring occurs at construction. The exact kind of
|
571
|
+
# input object is left at discretion of Iterator implementations.
|
572
|
+
#
|
573
|
+
# @param [Object] input the iterator input, at discretion of the Iterator
|
574
|
+
# implementation.
|
575
|
+
# @param [Environment] environment an optional environment for resolving
|
576
|
+
# named datasets if needed.
|
577
|
+
#
|
578
|
+
def pipe(input, environment = nil)
|
579
|
+
end
|
580
|
+
undef :pipe
|
581
|
+
|
582
|
+
|
583
|
+
#
|
584
|
+
# Coerces something to an iterator
|
585
|
+
#
|
586
|
+
def self.coerce(arg, env)
|
587
|
+
case arg
|
588
|
+
when Iterator, Array
|
589
|
+
arg
|
590
|
+
else
|
591
|
+
Reader.coerce(arg, env)
|
592
|
+
end
|
593
|
+
end
|
594
|
+
|
595
|
+
end # module Iterator
|
596
|
+
|
597
|
+
#
|
598
|
+
# Implements an Iterator at the interface with the outside world.
|
599
|
+
#
|
600
|
+
# The contrat of a Reader is simply to be an Iterator. Unlike operators,
|
601
|
+
# however, readers are not expected to take other iterators as input, but IO
|
602
|
+
# objects, database tables, or something similar instead. This base class
|
603
|
+
# provides a default behavior for readers that works with IO objects. It can
|
604
|
+
# be safely extended, overriden, or even mimiced (provided that you include
|
605
|
+
# and implement the Iterator contract).
|
606
|
+
#
|
607
|
+
# This class also provides a registration mechanism to help getting Reader
|
608
|
+
# instances for specific file extensions. A typical scenario for using this
|
609
|
+
# registration mechanism is as follows:
|
610
|
+
#
|
611
|
+
# # Registers a reader kind named :foo, associated with ".foo" file
|
612
|
+
# # extensions and the FooFileDecoder class (typically a subclass of
|
613
|
+
# # Reader)
|
614
|
+
# Reader.register(:foo, [".foo"], FooFileDecoder)
|
615
|
+
#
|
616
|
+
# # Later on, you can request a reader instance for a .foo file, as
|
617
|
+
# # illustrated below.
|
618
|
+
# r = Reader.reader('/a/path/to/a/file.foo')
|
619
|
+
#
|
620
|
+
# # Also, a factory method is automatically installed on the Reader class
|
621
|
+
# # itself. This factory method can be used with a String, or an IO object.
|
622
|
+
# r = Reader.foo([a path or a IO object])
|
623
|
+
#
|
624
|
+
class Reader
|
625
|
+
include Iterator
|
626
|
+
|
627
|
+
# Registered readers
|
628
|
+
@@readers = []
|
629
|
+
|
630
|
+
#
|
631
|
+
# Registers a reader class associated with specific file extensions
|
632
|
+
#
|
633
|
+
# Registered class must provide a constructor with the following signature
|
634
|
+
# <code>new(path_or_io, environment = nil)</code>. The name must be a symbol
|
635
|
+
# which can safely be used as a ruby method name. A factory class method of
|
636
|
+
# that name and same signature is automatically installed on the Reader
|
637
|
+
# class.
|
638
|
+
#
|
639
|
+
# @param [Symbol] name a name for the kind of data decoded
|
640
|
+
# @param [Array] extensions file extensions mapped to the registered reader
|
641
|
+
# class (should include the '.', e.g. '.foo')
|
642
|
+
# @param [Class] class Reader subclass used to decode this kind of files
|
643
|
+
#
|
644
|
+
def self.register(name, extensions, clazz)
|
645
|
+
@@readers << [name, extensions, clazz]
|
646
|
+
(class << self; self; end).
|
647
|
+
send(:define_method, name) do |*args|
|
648
|
+
clazz.new(*args)
|
649
|
+
end
|
650
|
+
end
|
651
|
+
|
652
|
+
#
|
653
|
+
# Returns a reader instance for a specific file whose path is given
|
654
|
+
# as argument.
|
655
|
+
#
|
656
|
+
# @param [String] filepath path to a file for which extension is recognized
|
657
|
+
# @param [Array] args optional additional arguments that must be passed at
|
658
|
+
# reader's class new method.
|
659
|
+
# @return [Reader] a reader instance
|
660
|
+
#
|
661
|
+
def self.reader(filepath, *args)
|
662
|
+
ext = File.extname(filepath)
|
663
|
+
if registered = @@readers.find{|r| r[1].include?(ext)}
|
664
|
+
registered[2].new(filepath, *args)
|
665
|
+
else
|
666
|
+
raise "No registered reader for #{ext} (#{filepath})"
|
667
|
+
end
|
668
|
+
end
|
669
|
+
|
670
|
+
#
|
671
|
+
# Coerces an argument to a reader, using an optional environment to convert
|
672
|
+
# named datasets.
|
673
|
+
#
|
674
|
+
# This method automatically provides readers for Strings and Symbols through
|
675
|
+
# passed environment (**not** through the reader factory) and for IO objects
|
676
|
+
# (through Rash reader). It is part if Alf's internals and should be used
|
677
|
+
# with care.
|
678
|
+
#
|
679
|
+
def self.coerce(arg, environment = nil)
|
680
|
+
case arg
|
681
|
+
when Reader
|
682
|
+
arg
|
683
|
+
when IO
|
684
|
+
rash(arg, environment)
|
685
|
+
when String, Symbol
|
686
|
+
if environment
|
687
|
+
environment.dataset(arg.to_sym)
|
688
|
+
else
|
689
|
+
raise "No environment set"
|
690
|
+
end
|
691
|
+
else
|
692
|
+
raise ArgumentError, "Unable to coerce #{arg.inspect} to a reader"
|
693
|
+
end
|
694
|
+
end
|
695
|
+
|
696
|
+
# @return [Environment] Wired environment
|
697
|
+
attr_accessor :environment
|
698
|
+
|
699
|
+
# @return [String or IO] Input IO, or file name
|
700
|
+
attr_accessor :input
|
701
|
+
|
702
|
+
#
|
703
|
+
# Creates a reader instance, with an optional input and environment wiring.
|
704
|
+
#
|
705
|
+
# @param [String or IO] path to a file or IO object for input
|
706
|
+
# @param [Environment] environment wired environment, serving this reader
|
707
|
+
#
|
708
|
+
def initialize(input = nil, environment = nil)
|
709
|
+
@input = input
|
710
|
+
@environment = environment
|
711
|
+
end
|
712
|
+
|
713
|
+
#
|
714
|
+
# (see Iterator#pipe)
|
715
|
+
#
|
716
|
+
def pipe(input, env = environment)
|
717
|
+
@input = input
|
718
|
+
end
|
719
|
+
|
720
|
+
#
|
721
|
+
# (see Iterator#each)
|
722
|
+
#
|
723
|
+
# @private the default implementation reads lines of the input stream and
|
724
|
+
# yields the block with <code>line2tuple(line)</code> on each of them. This
|
725
|
+
# method may be overriden if this behavior does not fit reader's needs.
|
726
|
+
#
|
727
|
+
def each
|
728
|
+
each_input_line do |line|
|
729
|
+
tuple = line2tuple(line)
|
730
|
+
yield tuple unless tuple.nil?
|
731
|
+
end
|
732
|
+
end
|
733
|
+
|
734
|
+
protected
|
735
|
+
|
736
|
+
#
|
737
|
+
# Coerces the input object to an IO and yields the block with it.
|
738
|
+
#
|
739
|
+
# StringIO and IO input are yield directly while file paths are first
|
740
|
+
# opened in read mode and then yield.
|
741
|
+
#
|
742
|
+
def with_input_io
|
743
|
+
case input
|
744
|
+
when IO, StringIO
|
745
|
+
yield input
|
746
|
+
when String
|
747
|
+
File.open(input, 'r'){|io| yield io}
|
748
|
+
else
|
749
|
+
raise "Unable to convert #{input} to an IO object"
|
750
|
+
end
|
751
|
+
end
|
752
|
+
|
753
|
+
#
|
754
|
+
# Returns the whole input text.
|
755
|
+
#
|
756
|
+
# This feature should only be used by subclasses on inputs that are
|
757
|
+
# small enough to fit in memory. Consider implementing readers without this
|
758
|
+
# feature on files that could be larger.
|
759
|
+
#
|
760
|
+
def input_text
|
761
|
+
with_input_io{|io| io.readlines.join}
|
762
|
+
end
|
763
|
+
|
764
|
+
#
|
765
|
+
# Yields the block with each line of the input text in turn.
|
766
|
+
#
|
767
|
+
# This method is an helper for files that capture one tuple on each input
|
768
|
+
# line. It should be used in those cases, as the resulting reader will not
|
769
|
+
# load all input in memory but serve tuples on demand.
|
770
|
+
#
|
771
|
+
def each_input_line
|
772
|
+
with_input_io{|io| io.each_line(&Proc.new)}
|
773
|
+
end
|
774
|
+
|
775
|
+
#
|
776
|
+
# Converts a line previously read from the input stream to a tuple.
|
777
|
+
#
|
778
|
+
# The line is simply ignored is this method return nil. Errors should be
|
779
|
+
# properly handled by raising exceptions. This method MUST be implemented
|
780
|
+
# by subclasses unless each is overriden.
|
781
|
+
#
|
782
|
+
def line2tuple(line)
|
783
|
+
end
|
784
|
+
undef :line2tuple
|
785
|
+
|
786
|
+
#
|
787
|
+
# Specialization of the Reader contract for .rash files.
|
788
|
+
#
|
789
|
+
# A .rash file/stream contains one ruby hash literal on each line. This
|
790
|
+
# reader simply decodes each of them in turn with Kernel.eval, providing a
|
791
|
+
# state-less reader (that is, tuples are not all loaded in memory at once).
|
792
|
+
#
|
793
|
+
class Rash < Reader
|
794
|
+
|
795
|
+
# (see Reader#line2tuple)
|
796
|
+
def line2tuple(line)
|
797
|
+
begin
|
798
|
+
h = Kernel.eval(line)
|
799
|
+
raise "hash expected, got #{h}" unless h.is_a?(Hash)
|
800
|
+
rescue Exception => ex
|
801
|
+
$stderr << "Skipping #{line.strip}: #{ex.message}\n"
|
802
|
+
nil
|
803
|
+
else
|
804
|
+
return h
|
805
|
+
end
|
806
|
+
end
|
807
|
+
|
808
|
+
Reader.register(:rash, [".rash"], self)
|
809
|
+
end # class Rash
|
810
|
+
|
811
|
+
#
|
812
|
+
# Specialization of the Reader contrat for .alf files.
|
813
|
+
#
|
814
|
+
# A .alf file simply contains a query expression in the Lispy DSL. This
|
815
|
+
# reader decodes and compiles the expression and delegates the enumeration
|
816
|
+
# to the obtained operator.
|
817
|
+
#
|
818
|
+
# Note that an Environment must be wired at creation or piping time.
|
819
|
+
# NoSuchDatasetError will certainly occur otherwise.
|
820
|
+
#
|
821
|
+
class AlfFile < Reader
|
822
|
+
|
823
|
+
# (see Reader#each)
|
824
|
+
def each
|
825
|
+
op = Alf.lispy(environment).compile(input_text)
|
826
|
+
op.each(&Proc.new)
|
827
|
+
end
|
828
|
+
|
829
|
+
Reader.register(:alf, [".alf"], self)
|
830
|
+
end # module AlfFile
|
831
|
+
|
832
|
+
end # module Reader
|
833
|
+
|
834
|
+
#
|
835
|
+
# Renders a relation (given by any Iterator) in a specific format.
|
836
|
+
#
|
837
|
+
# A renderer takes an Iterator instance as input and renders it on an output
|
838
|
+
# stream. Renderers are **not** iterators themselves, even if they mimic the
|
839
|
+
# {#pipe} method. Their usage is made via the {#execute} method.
|
840
|
+
#
|
841
|
+
# Similarly to the {Reader} class, this one provides a registration mechanism
|
842
|
+
# for specific output formats. The common scenario is as follows:
|
843
|
+
#
|
844
|
+
# # Register a new renderer for :foo format (automatically provides the
|
845
|
+
# # '--foo Render output as a foo stream' option of 'alf show') and with
|
846
|
+
# # the FooRenderer class for handling rendering.
|
847
|
+
# Renderer.register(:foo, "as a foo stream", FooRenderer)
|
848
|
+
#
|
849
|
+
# # Later on, you can request a renderer instance for a specific format
|
850
|
+
# # as follows (wiring input is optional)
|
851
|
+
# r = Renderer.renderer(:foo, [an Iterator])
|
852
|
+
#
|
853
|
+
# # Also, a factory method is automatically installed on the Renderer class
|
854
|
+
# # itself.
|
855
|
+
# r = Renderer.foo([an Iterator])
|
856
|
+
#
|
857
|
+
class Renderer
|
858
|
+
|
859
|
+
# Registered renderers
|
860
|
+
@@renderers = []
|
861
|
+
|
862
|
+
#
|
863
|
+
# Register a renderering class with a given name and description.
|
864
|
+
#
|
865
|
+
# Registered class must at least provide a constructor with an empty
|
866
|
+
# signature. The name must be a symbol which can safely be used as a ruby
|
867
|
+
# method name. A factory class method of that name and degelation signature
|
868
|
+
# is automatically installed on the Renderer class.
|
869
|
+
#
|
870
|
+
# @param [Symbol] name a name for the output format
|
871
|
+
# @param [String] description an output format description (for 'alf show')
|
872
|
+
# @param [Class] clazz Renderer subclass used to render in this format
|
873
|
+
#
|
874
|
+
def self.register(name, description, clazz)
|
875
|
+
@@renderers << [name, description, clazz]
|
876
|
+
(class << self; self; end).
|
877
|
+
send(:define_method, name) do |*args|
|
878
|
+
clazz.new(*args)
|
879
|
+
end
|
880
|
+
end
|
881
|
+
|
882
|
+
#
|
883
|
+
# Returns a Renderer instance for the given output format name.
|
884
|
+
#
|
885
|
+
# @param [Symbol] name name of an output format previously registered
|
886
|
+
# @param [...] args other arguments to pass to the renderer constructor
|
887
|
+
# @return [Renderer] a Renderer instance, already wired if args are
|
888
|
+
# provided
|
889
|
+
#
|
890
|
+
def self.renderer(name, *args)
|
891
|
+
if r = @@renderers.find{|triple| triple[0] == name}
|
892
|
+
r[2].new(*args)
|
893
|
+
else
|
894
|
+
raise "No renderer registered for #{name}"
|
895
|
+
end
|
896
|
+
end
|
897
|
+
|
898
|
+
#
|
899
|
+
# Yields each (name,description,clazz) previously registered in turn
|
900
|
+
#
|
901
|
+
def self.each_renderer
|
902
|
+
@@renderers.each(&Proc.new)
|
903
|
+
end
|
904
|
+
|
905
|
+
# Renderer input (typically an Iterator)
|
906
|
+
attr_accessor :input
|
907
|
+
|
908
|
+
# @return [Environment] Optional wired environment
|
909
|
+
attr_accessor :environment
|
910
|
+
|
911
|
+
#
|
912
|
+
# Creates a renderer instance, optionally wired to an input
|
913
|
+
#
|
914
|
+
def initialize(input = nil)
|
915
|
+
@input = input
|
916
|
+
end
|
917
|
+
|
918
|
+
#
|
919
|
+
# Sets the renderer input.
|
920
|
+
#
|
921
|
+
# This method mimics {Iterator#pipe} and have the same contract.
|
922
|
+
#
|
923
|
+
def pipe(input, env = environment)
|
924
|
+
self.environment = env
|
925
|
+
self.input = input
|
926
|
+
end
|
927
|
+
|
928
|
+
#
|
929
|
+
# Executes the rendering, outputting the resulting tuples on the provided
|
930
|
+
# output buffer.
|
931
|
+
#
|
932
|
+
# The default implementation simply coerces the input as an Iterator and
|
933
|
+
# delegates the call to {#render}.
|
934
|
+
#
|
935
|
+
def execute(output = $stdout)
|
936
|
+
render(Iterator.coerce(input, environment), output)
|
937
|
+
end
|
938
|
+
|
939
|
+
protected
|
940
|
+
|
941
|
+
#
|
942
|
+
# Renders tuples served by the iterator to the output buffer provided and
|
943
|
+
# returns the latter.
|
944
|
+
#
|
945
|
+
# This method must be implemented by subclasses unless {#execute} is
|
946
|
+
# overriden.
|
947
|
+
#
|
948
|
+
def render(iterator, output)
|
949
|
+
end
|
950
|
+
undef :render
|
951
|
+
|
952
|
+
#
|
953
|
+
# Implements the Renderer contract through inspect
|
954
|
+
#
|
955
|
+
class Rash < Renderer
|
956
|
+
|
957
|
+
# (see Renderer#render)
|
958
|
+
def render(input, output)
|
959
|
+
input.each do |tuple|
|
960
|
+
output << tuple.inspect << "\n"
|
961
|
+
end
|
962
|
+
output
|
963
|
+
end
|
964
|
+
|
965
|
+
Renderer.register(:rash, "as ruby hashes", self)
|
966
|
+
end # class Rash
|
967
|
+
|
968
|
+
require "alf/renderer/text"
|
969
|
+
require "alf/renderer/yaml"
|
970
|
+
end # module Renderer
|
971
|
+
|
972
|
+
#
|
973
|
+
# Provides a factory over Alf operators and handles the interface with
|
974
|
+
# Quickl for commandline support.
|
975
|
+
#
|
976
|
+
# This module is part of Alf's internal architecture and should not be used
|
977
|
+
# at all by third-party projects.
|
978
|
+
#
|
979
|
+
module Factory
|
980
|
+
|
981
|
+
# @see Quickl::Command
|
982
|
+
def Command(file, line)
|
983
|
+
Quickl::Command(file, line){|builder|
|
984
|
+
builder.command_parent = Alf::Command::Main
|
985
|
+
yield(builder) if block_given?
|
986
|
+
}
|
987
|
+
end
|
988
|
+
|
989
|
+
# @see Operator
|
990
|
+
def Operator(file, line)
|
991
|
+
Command(file, line) do |b|
|
992
|
+
b.instance_module Alf::Operator
|
993
|
+
end
|
994
|
+
end
|
995
|
+
|
996
|
+
extend Factory
|
997
|
+
end # module Factory
|
998
|
+
|
999
|
+
#
|
1000
|
+
# Marker module and namespace for Alf main commands, those that are **not**
|
1001
|
+
# operators at all.
|
1002
|
+
#
|
1003
|
+
module Command
|
1004
|
+
|
1005
|
+
#
|
1006
|
+
# alf - Classy data-manipulation dressed in a DSL (+ commandline)
|
1007
|
+
#
|
1008
|
+
# SYNOPSIS
|
1009
|
+
# alf [--version] [--help]
|
1010
|
+
# alf -e '(lispy command)'
|
1011
|
+
# alf [FILE.alf]
|
1012
|
+
# alf [alf opts] OPERATOR [operator opts] ARGS ...
|
1013
|
+
# alf help OPERATOR
|
1014
|
+
#
|
1015
|
+
# OPTIONS
|
1016
|
+
# #{summarized_options}
|
1017
|
+
#
|
1018
|
+
# RELATIONAL COMMANDS
|
1019
|
+
# #{summarized_subcommands subcommands.select{|cmd|
|
1020
|
+
# cmd.include?(Alf::Operator::Relational)
|
1021
|
+
# }}
|
1022
|
+
#
|
1023
|
+
# NON-RELATIONAL COMMANDS
|
1024
|
+
# #{summarized_subcommands subcommands.select{|cmd|
|
1025
|
+
# cmd.include?(Alf::Operator::NonRelational)
|
1026
|
+
# }}
|
1027
|
+
#
|
1028
|
+
# OTHER NON-RELATIONAL COMMANDS
|
1029
|
+
# #{summarized_subcommands subcommands.select{|cmd|
|
1030
|
+
# cmd.include?(Alf::Command)
|
1031
|
+
# }}
|
1032
|
+
#
|
1033
|
+
# See '#{program_name} help COMMAND' for details about a specific command.
|
1034
|
+
#
|
1035
|
+
class Main < Quickl::Delegator(__FILE__, __LINE__)
|
1036
|
+
include Command, Lispy
|
1037
|
+
|
1038
|
+
# Environment instance to use to get base iterators
|
1039
|
+
attr_accessor :environment
|
1040
|
+
|
1041
|
+
# Output renderer
|
1042
|
+
attr_accessor :renderer
|
1043
|
+
|
1044
|
+
# Creates a command instance
|
1045
|
+
def initialize(env = Environment.default)
|
1046
|
+
@environment = env
|
1047
|
+
end
|
1048
|
+
|
1049
|
+
# Install options
|
1050
|
+
options do |opt|
|
1051
|
+
@execute = false
|
1052
|
+
opt.on("-e", "--execute", "Execute one line of script (Lispy API)") do
|
1053
|
+
@execute = true
|
1054
|
+
end
|
1055
|
+
|
1056
|
+
@renderer = Renderer::Rash.new
|
1057
|
+
Renderer.each_renderer do |name,descr,clazz|
|
1058
|
+
opt.on("--#{name}", "Render output #{descr}"){
|
1059
|
+
@renderer = clazz.new
|
1060
|
+
}
|
1061
|
+
end
|
1062
|
+
|
1063
|
+
opt.on('--env=FOLDER',
|
1064
|
+
"Set the environment folder to use") do |value|
|
1065
|
+
@environment = Environment.folder(value)
|
1066
|
+
end
|
1067
|
+
|
1068
|
+
opt.on_tail('-h', "--help", "Show help") do
|
1069
|
+
raise Quickl::Help
|
1070
|
+
end
|
1071
|
+
|
1072
|
+
opt.on_tail('-v', "--version", "Show version") do
|
1073
|
+
raise Quickl::Exit, "#{program_name} #{Alf::VERSION}"\
|
1074
|
+
" (c) 2011, Bernard Lambeau"
|
1075
|
+
end
|
1076
|
+
end # Alf's options
|
1077
|
+
|
1078
|
+
#
|
1079
|
+
# Overrided because Quickl only keep --options but modifying it there
|
1080
|
+
# should probably be considered a broken API.
|
1081
|
+
#
|
1082
|
+
def _run(argv = [])
|
1083
|
+
|
1084
|
+
# 1) Extract my options and parse them
|
1085
|
+
my_argv = []
|
1086
|
+
while argv.first =~ /^-/
|
1087
|
+
my_argv << argv.shift
|
1088
|
+
end
|
1089
|
+
parse_options(my_argv)
|
1090
|
+
|
1091
|
+
# 2) build the operator according to -e option
|
1092
|
+
operator = if @execute
|
1093
|
+
instance_eval(argv.first)
|
1094
|
+
else
|
1095
|
+
super
|
1096
|
+
end
|
1097
|
+
|
1098
|
+
# 3) if there is a requester, then we do the job (assuming bin/alf)
|
1099
|
+
# with the renderer to use. Otherwise, we simply return built operator
|
1100
|
+
if operator && requester
|
1101
|
+
chain(renderer, operator).execute($stdout)
|
1102
|
+
else
|
1103
|
+
operator
|
1104
|
+
end
|
1105
|
+
end
|
1106
|
+
|
1107
|
+
end
|
1108
|
+
|
1109
|
+
#
|
1110
|
+
# Output input tuples through a specific renderer (text, yaml, ...)
|
1111
|
+
#
|
1112
|
+
# SYNOPSIS
|
1113
|
+
# #{program_name} #{command_name} [DATASET...]
|
1114
|
+
#
|
1115
|
+
# OPTIONS
|
1116
|
+
# #{summarized_options}
|
1117
|
+
#
|
1118
|
+
# DESCRIPTION
|
1119
|
+
#
|
1120
|
+
# When dataset names are specified as commandline args, request the environment
|
1121
|
+
# to provide those datasets and print them. Otherwise, take what comes on standard
|
1122
|
+
# input.
|
1123
|
+
#
|
1124
|
+
# Note that this command is not an operator and should not be piped anymore.
|
1125
|
+
#
|
1126
|
+
class Show < Factory::Command(__FILE__, __LINE__)
|
1127
|
+
include Command
|
1128
|
+
|
1129
|
+
options do |opt|
|
1130
|
+
@renderer = Renderer::Text.new
|
1131
|
+
Renderer.each_renderer do |name,descr,clazz|
|
1132
|
+
opt.on("--#{name}", "Render output #{descr}"){
|
1133
|
+
@renderer = clazz.new
|
1134
|
+
}
|
1135
|
+
end
|
1136
|
+
end
|
1137
|
+
|
1138
|
+
def execute(args)
|
1139
|
+
requester.renderer = @renderer
|
1140
|
+
args = [ $stdin ] if args.empty?
|
1141
|
+
requester.chain(*args)
|
1142
|
+
end
|
1143
|
+
|
1144
|
+
end # class Show
|
1145
|
+
|
1146
|
+
#
|
1147
|
+
# Executes an .alf file on current environment
|
1148
|
+
#
|
1149
|
+
# SYNOPSIS
|
1150
|
+
# #{program_name} #{command_name} [FILE]
|
1151
|
+
#
|
1152
|
+
# OPTIONS
|
1153
|
+
# #{summarized_options}
|
1154
|
+
#
|
1155
|
+
# DESCRIPTION
|
1156
|
+
#
|
1157
|
+
# This command executes the .alf file passed as first argument (or what comes
|
1158
|
+
# on standard input) as a alf query to be executed on the current environment.
|
1159
|
+
#
|
1160
|
+
class Exec < Factory::Command(__FILE__, __LINE__)
|
1161
|
+
include Command
|
1162
|
+
|
1163
|
+
def execute(args)
|
1164
|
+
Reader.alf(args.first || $stdin, requester.environment)
|
1165
|
+
end
|
1166
|
+
|
1167
|
+
end # class Exec
|
1168
|
+
|
1169
|
+
#
|
1170
|
+
# Show help about a specific command
|
1171
|
+
#
|
1172
|
+
# SYNOPSIS
|
1173
|
+
# #{program_name} #{command_name} COMMAND
|
1174
|
+
#
|
1175
|
+
class Help < Factory::Command(__FILE__, __LINE__)
|
1176
|
+
include Command
|
1177
|
+
|
1178
|
+
# Let NoSuchCommandError be passed to higher stage
|
1179
|
+
no_react_to Quickl::NoSuchCommand
|
1180
|
+
|
1181
|
+
# Command execution
|
1182
|
+
def execute(args)
|
1183
|
+
if args.size != 1
|
1184
|
+
puts super_command.help
|
1185
|
+
else
|
1186
|
+
cmd = has_command!(args.first, super_command)
|
1187
|
+
puts cmd.help
|
1188
|
+
end
|
1189
|
+
nil
|
1190
|
+
end
|
1191
|
+
|
1192
|
+
end # class Help
|
1193
|
+
|
1194
|
+
end
|
1195
|
+
|
1196
|
+
#
|
1197
|
+
# Marker for all operators, relational and non-relational ones.
|
1198
|
+
#
|
1199
|
+
module Operator
|
1200
|
+
include Iterator, Tools
|
1201
|
+
|
1202
|
+
#
|
1203
|
+
# Encapsulates method definitions that convert operators to Quickl
|
1204
|
+
# commands
|
1205
|
+
#
|
1206
|
+
module CommandMethods
|
1207
|
+
|
1208
|
+
protected
|
1209
|
+
|
1210
|
+
#
|
1211
|
+
# Configures the operator from arguments taken from command line.
|
1212
|
+
#
|
1213
|
+
# This method is intended to be overriden by subclasses and must return the
|
1214
|
+
# operator itself.
|
1215
|
+
#
|
1216
|
+
def set_args(args)
|
1217
|
+
self
|
1218
|
+
end
|
1219
|
+
|
1220
|
+
#
|
1221
|
+
# Overrides Quickl::Command::Single#_run to handles the '--' separator
|
1222
|
+
# correctly.
|
1223
|
+
#
|
1224
|
+
# This is because parse_options tend to eat the '--' separator... This
|
1225
|
+
# could be handled in Quickl itself, but it should be considered a broken
|
1226
|
+
# API and will only be available in quickl >= 0.3.0 (probably)
|
1227
|
+
#
|
1228
|
+
def _run(argv = [])
|
1229
|
+
operands, args = split_command_args(argv).collect do |arr|
|
1230
|
+
parse_options(arr)
|
1231
|
+
end
|
1232
|
+
self.set_args(args)
|
1233
|
+
if operands = command_line_operands(operands)
|
1234
|
+
env = environment || (requester ? requester.environment : nil)
|
1235
|
+
self.pipe(operands, env)
|
1236
|
+
end
|
1237
|
+
self
|
1238
|
+
end
|
1239
|
+
|
1240
|
+
def split_command_args(args)
|
1241
|
+
operands, args = case i = args.index("--")
|
1242
|
+
when NilClass
|
1243
|
+
[args, []]
|
1244
|
+
when 0
|
1245
|
+
[[ $stdin ], args[1..-1]]
|
1246
|
+
else
|
1247
|
+
[args[0...i], args[i+1..-1]]
|
1248
|
+
end
|
1249
|
+
end
|
1250
|
+
|
1251
|
+
def command_line_operands(operands)
|
1252
|
+
operands
|
1253
|
+
end
|
1254
|
+
|
1255
|
+
end # module CommandMethods
|
1256
|
+
include CommandMethods
|
1257
|
+
|
1258
|
+
# Operators input datasets
|
1259
|
+
attr_accessor :datasets
|
1260
|
+
|
1261
|
+
# Optional environment
|
1262
|
+
attr_reader :environment
|
1263
|
+
|
1264
|
+
# Sets the environment on this operator and propagate on
|
1265
|
+
# datasets
|
1266
|
+
def environment=(env)
|
1267
|
+
# this is to avoid infinite loop (TODO: why is there infinite loops??)
|
1268
|
+
return if @environment == env
|
1269
|
+
|
1270
|
+
# set and propagate on children
|
1271
|
+
@environment = env
|
1272
|
+
datasets.each do |dataset|
|
1273
|
+
if dataset.respond_to?(:environment)
|
1274
|
+
dataset.environment = env
|
1275
|
+
end
|
1276
|
+
end if datasets
|
1277
|
+
|
1278
|
+
env
|
1279
|
+
end
|
1280
|
+
|
1281
|
+
#
|
1282
|
+
# Sets the operator input
|
1283
|
+
#
|
1284
|
+
def pipe(input, env = environment)
|
1285
|
+
raise NotImplementedError, "Operator#pipe should be overriden"
|
1286
|
+
end
|
1287
|
+
|
1288
|
+
#
|
1289
|
+
# Yields each tuple in turn
|
1290
|
+
#
|
1291
|
+
# This method is implemented in a way that ensures that all operators are
|
1292
|
+
# thread safe. It is not intended to be overriden, use _each instead.
|
1293
|
+
#
|
1294
|
+
def each
|
1295
|
+
op = self.dup
|
1296
|
+
op._prepare
|
1297
|
+
op._each(&Proc.new)
|
1298
|
+
end
|
1299
|
+
|
1300
|
+
protected
|
1301
|
+
|
1302
|
+
#
|
1303
|
+
# Prepares the iterator before subsequent call to _each.
|
1304
|
+
#
|
1305
|
+
# This method is intended to be overriden by suclasses to install what's
|
1306
|
+
# need for successful iteration. The default implementation does nothing.
|
1307
|
+
#
|
1308
|
+
def _prepare
|
1309
|
+
end
|
1310
|
+
|
1311
|
+
# Internal implementation of the iterator.
|
1312
|
+
#
|
1313
|
+
# This method must be implemented by subclasses. It is safe to use instance
|
1314
|
+
# variables (typically initialized in _prepare) here.
|
1315
|
+
#
|
1316
|
+
def _each
|
1317
|
+
end
|
1318
|
+
|
1319
|
+
#
|
1320
|
+
# Specialization of Operator for operators that work on a unary input
|
1321
|
+
#
|
1322
|
+
module Unary
|
1323
|
+
include Operator
|
1324
|
+
|
1325
|
+
#
|
1326
|
+
# Sets the operator input
|
1327
|
+
#
|
1328
|
+
def pipe(input, env = environment)
|
1329
|
+
self.environment = env
|
1330
|
+
self.datasets = [ input ]
|
1331
|
+
end
|
1332
|
+
|
1333
|
+
protected
|
1334
|
+
|
1335
|
+
def command_line_operands(operands)
|
1336
|
+
operands.first
|
1337
|
+
end
|
1338
|
+
|
1339
|
+
#
|
1340
|
+
# Simply returns the first dataset
|
1341
|
+
#
|
1342
|
+
def input
|
1343
|
+
Iterator.coerce(datasets.first, environment)
|
1344
|
+
end
|
1345
|
+
|
1346
|
+
#
|
1347
|
+
# Yields the block with each input tuple.
|
1348
|
+
#
|
1349
|
+
# This method should be preferred to <code>input.each</code> when possible.
|
1350
|
+
#
|
1351
|
+
def each_input_tuple
|
1352
|
+
input.each(&Proc.new)
|
1353
|
+
end
|
1354
|
+
|
1355
|
+
end # module Unary
|
1356
|
+
|
1357
|
+
#
|
1358
|
+
# Specialization of Operator for operators that work on a binary input
|
1359
|
+
#
|
1360
|
+
module Binary
|
1361
|
+
include Operator
|
1362
|
+
|
1363
|
+
#
|
1364
|
+
# Sets the operator input
|
1365
|
+
#
|
1366
|
+
def pipe(input, env = environment)
|
1367
|
+
self.environment = env
|
1368
|
+
self.datasets = input
|
1369
|
+
end
|
1370
|
+
|
1371
|
+
protected
|
1372
|
+
|
1373
|
+
def command_line_operands(operands)
|
1374
|
+
(operands.size < 2) ? ([$stdin] + operands) : operands
|
1375
|
+
end
|
1376
|
+
|
1377
|
+
# Returns the left operand
|
1378
|
+
def left
|
1379
|
+
Iterator.coerce(datasets.first, environment)
|
1380
|
+
end
|
1381
|
+
|
1382
|
+
# Returns the right operand
|
1383
|
+
def right
|
1384
|
+
Iterator.coerce(datasets.last, environment)
|
1385
|
+
end
|
1386
|
+
|
1387
|
+
end # module Binary
|
1388
|
+
|
1389
|
+
#
|
1390
|
+
# Specialization of Operator for operators that simply convert single tuples
|
1391
|
+
# to single tuples.
|
1392
|
+
#
|
1393
|
+
module Transform
|
1394
|
+
include Unary
|
1395
|
+
|
1396
|
+
protected
|
1397
|
+
|
1398
|
+
# (see Operator#_each)
|
1399
|
+
def _each
|
1400
|
+
each_input_tuple do |tuple|
|
1401
|
+
yield _tuple2tuple(tuple)
|
1402
|
+
end
|
1403
|
+
end
|
1404
|
+
|
1405
|
+
#
|
1406
|
+
# Transforms an input tuple to an output tuple
|
1407
|
+
#
|
1408
|
+
def _tuple2tuple(tuple)
|
1409
|
+
end
|
1410
|
+
|
1411
|
+
end # module Transform
|
1412
|
+
|
1413
|
+
#
|
1414
|
+
# Specialization of Operator for implementing operators that rely on a
|
1415
|
+
# cesure algorithm.
|
1416
|
+
#
|
1417
|
+
module Cesure
|
1418
|
+
include Unary
|
1419
|
+
|
1420
|
+
protected
|
1421
|
+
|
1422
|
+
# (see Operator#_each)
|
1423
|
+
def _each
|
1424
|
+
receiver, proj_key, prev_key = Proc.new, cesure_key, nil
|
1425
|
+
each_input_tuple do |tuple|
|
1426
|
+
cur_key = proj_key.project(tuple)
|
1427
|
+
if cur_key != prev_key
|
1428
|
+
flush_cesure(prev_key, receiver) unless prev_key.nil?
|
1429
|
+
start_cesure(cur_key, receiver)
|
1430
|
+
prev_key = cur_key
|
1431
|
+
end
|
1432
|
+
accumulate_cesure(tuple, receiver)
|
1433
|
+
end
|
1434
|
+
flush_cesure(prev_key, receiver) unless prev_key.nil?
|
1435
|
+
end
|
1436
|
+
|
1437
|
+
def cesure_key
|
1438
|
+
end
|
1439
|
+
|
1440
|
+
def start_cesure(key, receiver)
|
1441
|
+
end
|
1442
|
+
|
1443
|
+
def accumulate_cesure(tuple, receiver)
|
1444
|
+
end
|
1445
|
+
|
1446
|
+
def flush_cesure(key, receiver)
|
1447
|
+
end
|
1448
|
+
|
1449
|
+
end # module Cesure
|
1450
|
+
|
1451
|
+
#
|
1452
|
+
# Specialization of Operator for operators that are shortcuts for longer
|
1453
|
+
# expressions.
|
1454
|
+
#
|
1455
|
+
module Shortcut
|
1456
|
+
include Operator
|
1457
|
+
|
1458
|
+
#
|
1459
|
+
# Sets the operator input
|
1460
|
+
#
|
1461
|
+
def pipe(input, env = environment)
|
1462
|
+
self.environment = env
|
1463
|
+
self.datasets = input
|
1464
|
+
end
|
1465
|
+
|
1466
|
+
protected
|
1467
|
+
|
1468
|
+
# (see Operator#_each)
|
1469
|
+
def _each
|
1470
|
+
longexpr.each(&Proc.new)
|
1471
|
+
end
|
1472
|
+
|
1473
|
+
#
|
1474
|
+
# Compiles the longer expression and returns it.
|
1475
|
+
#
|
1476
|
+
# @return (Iterator) the compiled longer expression, typically another
|
1477
|
+
# Operator instance
|
1478
|
+
#
|
1479
|
+
def longexpr
|
1480
|
+
end
|
1481
|
+
undef :longexpr
|
1482
|
+
|
1483
|
+
#
|
1484
|
+
# This is an helper ala Lispy#chain for implementing (#longexpr).
|
1485
|
+
#
|
1486
|
+
# @param [Array] elements a list of Iterator-able
|
1487
|
+
# @return [Operator] the first element of the list, but piped with the
|
1488
|
+
# next one, and so on.
|
1489
|
+
#
|
1490
|
+
def chain(*elements)
|
1491
|
+
elements = elements.reverse
|
1492
|
+
elements[1..-1].inject(elements.first) do |c, elm|
|
1493
|
+
elm.pipe(c, environment)
|
1494
|
+
elm
|
1495
|
+
end
|
1496
|
+
end
|
1497
|
+
|
1498
|
+
end # module Shortcut
|
1499
|
+
|
1500
|
+
end # module Operator
|
1501
|
+
|
1502
|
+
#
|
1503
|
+
# Marker module and namespace for non relational operators
|
1504
|
+
#
|
1505
|
+
module Operator::NonRelational
|
1506
|
+
|
1507
|
+
#
|
1508
|
+
# Extend with an unique autonumber attribute
|
1509
|
+
#
|
1510
|
+
# SYNOPSIS
|
1511
|
+
# #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
|
1512
|
+
#
|
1513
|
+
# API & EXAMPLE
|
1514
|
+
#
|
1515
|
+
# # Autonumber suppliers (:autonum attribute name by default)
|
1516
|
+
# (autonum :suppliers)
|
1517
|
+
#
|
1518
|
+
# # You can specify the attribute name
|
1519
|
+
# (autonum :suppliers, :unique_id)
|
1520
|
+
#
|
1521
|
+
# DESCRIPTION
|
1522
|
+
#
|
1523
|
+
# This operator takes input tuples in any order they come and extends them
|
1524
|
+
# with an autonumber attribute ATTRNAME. This allows converting non-relational
|
1525
|
+
# tuple enumerators to relational ones by ensuring uniqueness of tuples in an
|
1526
|
+
# arbitrary manner.
|
1527
|
+
#
|
1528
|
+
# alf autonum suppliers
|
1529
|
+
# alf autonum suppliers -- unique_id
|
1530
|
+
#
|
1531
|
+
class Autonum < Factory::Operator(__FILE__, __LINE__)
|
1532
|
+
include Operator::NonRelational, Operator::Transform
|
1533
|
+
|
1534
|
+
# Names of the new attribute to add
|
1535
|
+
attr_accessor :attrname
|
1536
|
+
|
1537
|
+
def initialize(attrname = :autonum)
|
1538
|
+
@attrname = attrname
|
1539
|
+
end
|
1540
|
+
|
1541
|
+
protected
|
1542
|
+
|
1543
|
+
# (see Operator::CommandMethods#set_args)
|
1544
|
+
def set_args(args)
|
1545
|
+
@attrname = args.last.to_sym unless args.empty?
|
1546
|
+
end
|
1547
|
+
|
1548
|
+
# (see Operator#_prepare)
|
1549
|
+
def _prepare
|
1550
|
+
@autonum = -1
|
1551
|
+
end
|
1552
|
+
|
1553
|
+
# (see Operator::Transform#_tuple2tuple)
|
1554
|
+
def _tuple2tuple(tuple)
|
1555
|
+
tuple.merge(@attrname => (@autonum += 1))
|
1556
|
+
end
|
1557
|
+
|
1558
|
+
end # class Autonum
|
1559
|
+
|
1560
|
+
#
|
1561
|
+
# Force default values on missing/nil attributes
|
1562
|
+
#
|
1563
|
+
# SYNOPSIS
|
1564
|
+
# #{program_name} #{command_name} [OPERAND] -- ATTR1 VAL1 ...
|
1565
|
+
#
|
1566
|
+
# OPTIONS
|
1567
|
+
# #{summarized_options}
|
1568
|
+
#
|
1569
|
+
# API & EXAMPLE
|
1570
|
+
#
|
1571
|
+
# # Non strict mode
|
1572
|
+
# (defaults :suppliers, :country => 'Belgium')
|
1573
|
+
#
|
1574
|
+
# # Strict mode (--strict)
|
1575
|
+
# (defaults :suppliers, {:country => 'Belgium'}, true)
|
1576
|
+
#
|
1577
|
+
# DESCRIPTION
|
1578
|
+
#
|
1579
|
+
# This operator rewrites tuples so as to ensure that all values for specified
|
1580
|
+
# attributes ATTRx are defined and not nil. Missing or nil attributes are
|
1581
|
+
# replaced by the associated default value VALx.
|
1582
|
+
#
|
1583
|
+
# When used in shell, the hash of default values is built from commandline
|
1584
|
+
# arguments ala Hash[...]. However, to keep type safety VALx are interpreted
|
1585
|
+
# as ruby literals and built with Kernel.eval. This means that strings must
|
1586
|
+
# be doubly quoted. For the example of the API section:
|
1587
|
+
#
|
1588
|
+
# alf defaults suppliers -- country "'Belgium'"
|
1589
|
+
#
|
1590
|
+
# When used in --strict mode, the operator simply project resulting tuples on
|
1591
|
+
# attributes for which a default value has been specified. Using the strict
|
1592
|
+
# mode guarantess that the heading of all tuples is the same, and that no nil
|
1593
|
+
# value ever remains. However, this operator never remove duplicates.
|
1594
|
+
#
|
1595
|
+
class Defaults < Factory::Operator(__FILE__, __LINE__)
|
1596
|
+
include Operator::NonRelational, Operator::Transform
|
1597
|
+
|
1598
|
+
# Default values as a ATTR -> VAL hash
|
1599
|
+
attr_accessor :defaults
|
1600
|
+
|
1601
|
+
# Strict mode?
|
1602
|
+
attr_accessor :strict
|
1603
|
+
|
1604
|
+
# Builds a Defaults operator instance
|
1605
|
+
def initialize(defaults = {}, strict = false)
|
1606
|
+
@defaults = defaults
|
1607
|
+
@strict = strict
|
1608
|
+
end
|
1609
|
+
|
1610
|
+
options do |opt|
|
1611
|
+
opt.on('-s', '--strict', 'Strictly restrict to default attributes'){
|
1612
|
+
self.strict = true
|
1613
|
+
}
|
1614
|
+
end
|
1615
|
+
|
1616
|
+
protected
|
1617
|
+
|
1618
|
+
# (see Operator::CommandMethods#set_args)
|
1619
|
+
def set_args(args)
|
1620
|
+
@defaults = tuple_collect(args.each_slice(2)) do |k,v|
|
1621
|
+
[k.to_sym, Kernel.eval(v)]
|
1622
|
+
end
|
1623
|
+
self
|
1624
|
+
end
|
1625
|
+
|
1626
|
+
# (see Operator::Transform#_tuple2tuple)
|
1627
|
+
def _tuple2tuple(tuple)
|
1628
|
+
if strict
|
1629
|
+
tuple_collect(@defaults){|k,v|
|
1630
|
+
[k, coalesce(tuple[k], v)]
|
1631
|
+
}
|
1632
|
+
else
|
1633
|
+
@defaults.merge tuple_collect(tuple){|k,v|
|
1634
|
+
[k, coalesce(v, @defaults[k])]
|
1635
|
+
}
|
1636
|
+
end
|
1637
|
+
end
|
1638
|
+
|
1639
|
+
end # class Defaults
|
1640
|
+
|
1641
|
+
#
|
1642
|
+
# Remove tuple duplicates
|
1643
|
+
#
|
1644
|
+
# SYNOPSIS
|
1645
|
+
# #{program_name} #{command_name} [OPERAND]
|
1646
|
+
#
|
1647
|
+
# API & EXAMPLE
|
1648
|
+
#
|
1649
|
+
# # clip, unlike project, typically leave duplicates
|
1650
|
+
# (compact (clip :suppliers, [ :city ]))
|
1651
|
+
#
|
1652
|
+
# DESCRIPTION
|
1653
|
+
#
|
1654
|
+
# This operator remove duplicates from input tuples. As defaults, it is a non
|
1655
|
+
# relational operator that helps normalizing input for implementing relational
|
1656
|
+
# operators. This one is centric in converting bags of tuples to sets of
|
1657
|
+
# tuples, as required by true relations.
|
1658
|
+
#
|
1659
|
+
# alf compact ...
|
1660
|
+
#
|
1661
|
+
class Compact < Factory::Operator(__FILE__, __LINE__)
|
1662
|
+
include Operator::NonRelational, Operator::Shortcut, Operator::Unary
|
1663
|
+
|
1664
|
+
# Removes duplicates according to a complete order
|
1665
|
+
class SortBased
|
1666
|
+
include Operator::Cesure
|
1667
|
+
|
1668
|
+
def cesure_key
|
1669
|
+
@cesure_key ||= ProjectionKey.new([],true)
|
1670
|
+
end
|
1671
|
+
|
1672
|
+
def accumulate_cesure(tuple, receiver)
|
1673
|
+
@tuple = tuple
|
1674
|
+
end
|
1675
|
+
|
1676
|
+
def flush_cesure(key, receiver)
|
1677
|
+
receiver.call(@tuple)
|
1678
|
+
end
|
1679
|
+
|
1680
|
+
end # class SortBased
|
1681
|
+
|
1682
|
+
# Removes duplicates by loading all in memory and filtering
|
1683
|
+
# them there
|
1684
|
+
class BufferBased
|
1685
|
+
include Operator::Unary
|
1686
|
+
|
1687
|
+
def _prepare
|
1688
|
+
@tuples = input.to_a.uniq
|
1689
|
+
end
|
1690
|
+
|
1691
|
+
def _each
|
1692
|
+
@tuples.each(&Proc.new)
|
1693
|
+
end
|
1694
|
+
|
1695
|
+
end # class BufferBased
|
1696
|
+
|
1697
|
+
protected
|
1698
|
+
|
1699
|
+
def longexpr
|
1700
|
+
chain BufferBased.new,
|
1701
|
+
datasets
|
1702
|
+
end
|
1703
|
+
|
1704
|
+
end # class Compact
|
1705
|
+
|
1706
|
+
#
|
1707
|
+
# Sort input tuples according to an order relation
|
1708
|
+
#
|
1709
|
+
# SYNOPSIS
|
1710
|
+
# #{program_name} #{command_name} [OPERAND] -- ATTR1 ORDER1 ATTR2 ORDER2...
|
1711
|
+
#
|
1712
|
+
# API & EXAMPLE
|
1713
|
+
#
|
1714
|
+
# # sort on supplier name in ascending order
|
1715
|
+
# (sort :suppliers, [:name])
|
1716
|
+
#
|
1717
|
+
# # sort on city then on name
|
1718
|
+
# (sort :suppliers, [:city, :name])
|
1719
|
+
#
|
1720
|
+
# # sort on city DESC then on name ASC
|
1721
|
+
# (sort :suppliers, [[:city, :desc], [:name, :asc]])
|
1722
|
+
#
|
1723
|
+
# => See OrderingKey about specifying orderings
|
1724
|
+
#
|
1725
|
+
# DESCRIPTION
|
1726
|
+
#
|
1727
|
+
# This operator sorts input tuples on ATTR1 then ATTR2, etc. and outputs
|
1728
|
+
# them sorted after that. This is, of course, a non relational operator as
|
1729
|
+
# relations are unordered sets. It is provided to implement operators that
|
1730
|
+
# need tuples to be sorted to work correctly. When used in shell, the key
|
1731
|
+
# ordering must be specified in its longest form:
|
1732
|
+
#
|
1733
|
+
# alf sort suppliers -- name asc
|
1734
|
+
# alf sort suppliers -- city desc name asc
|
1735
|
+
#
|
1736
|
+
# LIMITATIONS
|
1737
|
+
#
|
1738
|
+
# The fact that the ordering must be completely specified with commandline
|
1739
|
+
# arguments is a limitation, shortcuts could be provided in the future.
|
1740
|
+
#
|
1741
|
+
class Sort < Factory::Operator(__FILE__, __LINE__)
|
1742
|
+
include Operator::NonRelational, Operator::Unary
|
1743
|
+
|
1744
|
+
def initialize(ordering_key = [])
|
1745
|
+
@ordering_key = OrderingKey.coerce(ordering_key)
|
1746
|
+
yield self if block_given?
|
1747
|
+
end
|
1748
|
+
|
1749
|
+
def ordering=(ordering)
|
1750
|
+
@ordering_key = OrderingKey.coerce(ordering)
|
1751
|
+
end
|
1752
|
+
|
1753
|
+
protected
|
1754
|
+
|
1755
|
+
def set_args(args)
|
1756
|
+
self.ordering = args.collect{|c| c.to_sym}.each_slice(2).to_a
|
1757
|
+
self
|
1758
|
+
end
|
1759
|
+
|
1760
|
+
def _prepare
|
1761
|
+
@buffer = Buffer::Sorted.new(@ordering_key)
|
1762
|
+
@buffer.add_all(input)
|
1763
|
+
end
|
1764
|
+
|
1765
|
+
def _each
|
1766
|
+
@buffer.each(&Proc.new)
|
1767
|
+
end
|
1768
|
+
|
1769
|
+
end # class Sort
|
1770
|
+
|
1771
|
+
#
|
1772
|
+
# Clip input tuples to a subset of attributes
|
1773
|
+
#
|
1774
|
+
# SYNOPSIS
|
1775
|
+
# #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ...
|
1776
|
+
#
|
1777
|
+
# OPTIONS
|
1778
|
+
# #{summarized_options}
|
1779
|
+
#
|
1780
|
+
# API & EXAMPLE
|
1781
|
+
#
|
1782
|
+
# # Keep only name and city attributes
|
1783
|
+
# (clip :suppliers, [:name, :city])
|
1784
|
+
#
|
1785
|
+
# # Keep all but name and city attributes
|
1786
|
+
# (clip :suppliers, [:name, :city], true)
|
1787
|
+
#
|
1788
|
+
# DESCRIPTION
|
1789
|
+
#
|
1790
|
+
# This operator clips tuples on attributes whose names are specified as
|
1791
|
+
# arguments. This is similar to the relational PROJECT operator, expect
|
1792
|
+
# that this one does not removed duplicates that can occur from clipping.
|
1793
|
+
# In other words, clipping may lead to bags of tuples instead of sets.
|
1794
|
+
#
|
1795
|
+
# When used in shell, the clipping/projection key is simply taken from
|
1796
|
+
# commandline arguments:
|
1797
|
+
#
|
1798
|
+
# alf clip suppliers -- name city
|
1799
|
+
# alf clip suppliers --allbut -- name city
|
1800
|
+
#
|
1801
|
+
class Clip < Factory::Operator(__FILE__, __LINE__)
|
1802
|
+
include Operator::NonRelational, Operator::Transform
|
1803
|
+
|
1804
|
+
# Builds a Clip operator instance
|
1805
|
+
def initialize(attributes = [], allbut = false)
|
1806
|
+
@projection_key = ProjectionKey.new(attributes, allbut)
|
1807
|
+
yield self if block_given?
|
1808
|
+
end
|
1809
|
+
|
1810
|
+
def attributes=(attrs)
|
1811
|
+
@projection_key.attributes = attrs
|
1812
|
+
end
|
1813
|
+
|
1814
|
+
def allbut=(allbut)
|
1815
|
+
@projection_key.allbut = allbut
|
1816
|
+
end
|
1817
|
+
|
1818
|
+
# Installs the options
|
1819
|
+
options do |opt|
|
1820
|
+
opt.on('-a', '--allbut', 'Apply a ALLBUT clipping') do
|
1821
|
+
self.allbut = true
|
1822
|
+
end
|
1823
|
+
end
|
1824
|
+
|
1825
|
+
protected
|
1826
|
+
|
1827
|
+
# (see Operator::CommandMethods#set_args)
|
1828
|
+
def set_args(args)
|
1829
|
+
self.attributes = args.collect{|a| a.to_sym}
|
1830
|
+
self
|
1831
|
+
end
|
1832
|
+
|
1833
|
+
# (see Operator::Transform#_tuple2tuple)
|
1834
|
+
def _tuple2tuple(tuple)
|
1835
|
+
@projection_key.project(tuple)
|
1836
|
+
end
|
1837
|
+
|
1838
|
+
end # class Clip
|
1839
|
+
|
1840
|
+
end # Operator::NonRelational
|
1841
|
+
|
1842
|
+
#
|
1843
|
+
# Marker module and namespace for relational operators
|
1844
|
+
#
|
1845
|
+
module Operator::Relational
|
1846
|
+
|
1847
|
+
#
|
1848
|
+
# Relational projection (clip + compact)
|
1849
|
+
#
|
1850
|
+
# SYNOPSIS
|
1851
|
+
# #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ...
|
1852
|
+
#
|
1853
|
+
# OPTIONS
|
1854
|
+
# #{summarized_options}
|
1855
|
+
#
|
1856
|
+
# API & EXAMPLE
|
1857
|
+
#
|
1858
|
+
# # Project on name and city attributes
|
1859
|
+
# (project :suppliers, [:name, :city])
|
1860
|
+
#
|
1861
|
+
# # Project on all but name and city attributes
|
1862
|
+
# (allbut :suppliers, [:name, :city])
|
1863
|
+
#
|
1864
|
+
# DESCRIPTION
|
1865
|
+
#
|
1866
|
+
# This operator projects tuples on attributes whose names are specified as
|
1867
|
+
# arguments. This is similar to clip, except that this ones is a truly
|
1868
|
+
# relational one, that is, it also removes duplicates tuples.
|
1869
|
+
#
|
1870
|
+
# When used in shell, the clipping/projection key is simply taken from
|
1871
|
+
# commandline arguments:
|
1872
|
+
#
|
1873
|
+
# alf project suppliers -- name city
|
1874
|
+
# alf project --allbut suppliers -- name city
|
1875
|
+
#
|
1876
|
+
class Project < Factory::Operator(__FILE__, __LINE__)
|
1877
|
+
include Operator::Relational, Operator::Shortcut, Operator::Unary
|
1878
|
+
|
1879
|
+
# Builds a Project operator instance
|
1880
|
+
def initialize(attributes = [], allbut = false)
|
1881
|
+
@projection_key = ProjectionKey.new(attributes, allbut)
|
1882
|
+
yield self if block_given?
|
1883
|
+
end
|
1884
|
+
|
1885
|
+
def attributes=(attrs)
|
1886
|
+
@projection_key.attributes = attrs
|
1887
|
+
end
|
1888
|
+
|
1889
|
+
def allbut=(allbut)
|
1890
|
+
@projection_key.allbut = allbut
|
1891
|
+
end
|
1892
|
+
|
1893
|
+
# Installs the options
|
1894
|
+
options do |opt|
|
1895
|
+
opt.on('-a', '--allbut', 'Apply a ALLBUT projection') do
|
1896
|
+
self.allbut = true
|
1897
|
+
end
|
1898
|
+
end
|
1899
|
+
|
1900
|
+
protected
|
1901
|
+
|
1902
|
+
# (see Operator::CommandMethods#set_args)
|
1903
|
+
def set_args(args)
|
1904
|
+
self.attributes = args.collect{|a| a.to_sym}
|
1905
|
+
self
|
1906
|
+
end
|
1907
|
+
|
1908
|
+
# (see Operator::Shortcut#longexpr)
|
1909
|
+
def longexpr
|
1910
|
+
chain Operator::NonRelational::Compact.new,
|
1911
|
+
Operator::NonRelational::Clip.new(@projection_key.attributes,
|
1912
|
+
@projection_key.allbut),
|
1913
|
+
datasets
|
1914
|
+
end
|
1915
|
+
|
1916
|
+
end # class Project
|
1917
|
+
|
1918
|
+
#
|
1919
|
+
# Relational extension (additional, computed attributes)
|
1920
|
+
#
|
1921
|
+
# SYNOPSIS
|
1922
|
+
# #{program_name} #{command_name} [OPERAND] -- ATTR1 EXPR1 ATTR2 EXPR2...
|
1923
|
+
#
|
1924
|
+
# API & EXAMPLE
|
1925
|
+
#
|
1926
|
+
# (extend :supplies, :sp => lambda{ sid + "/" + pid },
|
1927
|
+
# :big => lambda{ qty > 100 ? true : false })
|
1928
|
+
#
|
1929
|
+
# DESCRIPTION
|
1930
|
+
#
|
1931
|
+
# This command extend input tuples with new attributes (named ATTR1, ...)
|
1932
|
+
# whose value is the result of evaluating tuple expressions (i.e. EXPR1, ...).
|
1933
|
+
# See main documentation about the semantics of tuple expressions. When used
|
1934
|
+
# in shell, the hash of extensions is built from commandline arguments ala
|
1935
|
+
# Hash[...]. Tuple expressions must be specified as code literals there:
|
1936
|
+
#
|
1937
|
+
# alf extend supplies -- sp 'sid + "/" + pid' big "qty > 100 ? true : false"
|
1938
|
+
#
|
1939
|
+
# Attributes ATTRx should not already exist, no behavior is guaranteed if
|
1940
|
+
# this precondition is not respected.
|
1941
|
+
#
|
1942
|
+
class Extend < Factory::Operator(__FILE__, __LINE__)
|
1943
|
+
include Operator::Relational, Operator::Transform
|
1944
|
+
|
1945
|
+
# Extensions as a Hash attr => lambda{...}
|
1946
|
+
attr_accessor :extensions
|
1947
|
+
|
1948
|
+
# Builds an Extend operator instance
|
1949
|
+
def initialize(extensions = {})
|
1950
|
+
@extensions = extensions
|
1951
|
+
end
|
1952
|
+
|
1953
|
+
protected
|
1954
|
+
|
1955
|
+
# (see Operator::CommandMethods#set_args)
|
1956
|
+
def set_args(args)
|
1957
|
+
@extensions = tuple_collect(args.each_slice(2)){|k,v|
|
1958
|
+
[k.to_sym, TupleHandle.compile(v)]
|
1959
|
+
}
|
1960
|
+
self
|
1961
|
+
end
|
1962
|
+
|
1963
|
+
# (see Operator#_prepare)
|
1964
|
+
def _prepare
|
1965
|
+
@handle = TupleHandle.new
|
1966
|
+
end
|
1967
|
+
|
1968
|
+
# (see Operator::Transform#_tuple2tuple)
|
1969
|
+
def _tuple2tuple(tuple)
|
1970
|
+
tuple.merge tuple_collect(@extensions){|k,v|
|
1971
|
+
[k, @handle.set(tuple).evaluate(v)]
|
1972
|
+
}
|
1973
|
+
end
|
1974
|
+
|
1975
|
+
end # class Extend
|
1976
|
+
|
1977
|
+
#
|
1978
|
+
# Relational renaming (rename some attributes)
|
1979
|
+
#
|
1980
|
+
# SYNOPSIS
|
1981
|
+
# #{program_name} #{command_name} [OPERAND] -- OLD1 NEW1 ...
|
1982
|
+
#
|
1983
|
+
# OPTIONS
|
1984
|
+
# #{summarized_options}
|
1985
|
+
#
|
1986
|
+
# API & EXAMPLE
|
1987
|
+
#
|
1988
|
+
# (rename :suppliers, :name => :supplier_name, :city => :supplier_city)
|
1989
|
+
#
|
1990
|
+
# DESCRIPTION
|
1991
|
+
#
|
1992
|
+
# This command renames OLD attributes as NEW as specified by arguments.
|
1993
|
+
# Attributes OLD should exist in source tuples while attributes NEW should
|
1994
|
+
# not. When used in shell, renaming attributes are built ala Hash[...] from
|
1995
|
+
# commandline arguments:
|
1996
|
+
#
|
1997
|
+
# alf rename suppliers -- name supplier_name city supplier_city
|
1998
|
+
#
|
1999
|
+
class Rename < Factory::Operator(__FILE__, __LINE__)
|
2000
|
+
include Operator::Relational, Operator::Transform
|
2001
|
+
|
2002
|
+
# Hash of source -> target attribute renamings
|
2003
|
+
attr_accessor :renaming
|
2004
|
+
|
2005
|
+
# Builds a Rename operator instance
|
2006
|
+
def initialize(renaming = {})
|
2007
|
+
@renaming = renaming
|
2008
|
+
end
|
2009
|
+
|
2010
|
+
protected
|
2011
|
+
|
2012
|
+
# (see Operator::CommandMethods#set_args)
|
2013
|
+
def set_args(args)
|
2014
|
+
@renaming = Hash[*args.collect{|c| c.to_sym}]
|
2015
|
+
self
|
2016
|
+
end
|
2017
|
+
|
2018
|
+
# (see Operator::Transform#_tuple2tuple)
|
2019
|
+
def _tuple2tuple(tuple)
|
2020
|
+
tuple_collect(tuple){|k,v| [@renaming[k] || k, v]}
|
2021
|
+
end
|
2022
|
+
|
2023
|
+
end # class Rename
|
2024
|
+
|
2025
|
+
#
|
2026
|
+
# Relational restriction (aka where, predicate filtering)
|
2027
|
+
#
|
2028
|
+
# SYNOPSIS
|
2029
|
+
# #{program_name} #{command_name} [OPERAND] -- EXPR
|
2030
|
+
# #{program_name} #{command_name} [OPERAND] -- ATTR1 VAL1 ...
|
2031
|
+
#
|
2032
|
+
# API & EXAMPLE
|
2033
|
+
#
|
2034
|
+
# # Restrict to suppliers with status greater than 20
|
2035
|
+
# (restrict :suppliers, lambda{ status > 20 })
|
2036
|
+
#
|
2037
|
+
# # Restrict to suppliers that live in London
|
2038
|
+
# (restrict :suppliers, lambda{ city == 'London' })
|
2039
|
+
#
|
2040
|
+
# DESCRIPTION
|
2041
|
+
#
|
2042
|
+
# This command restricts tuples to those for which EXPR evaluates to true.
|
2043
|
+
# EXPR must be a valid tuple expression that should return a truth-value.
|
2044
|
+
# When used in shell, the predicate is taken as a string and compiled with
|
2045
|
+
# TupleHandle.compile. We also provide a shortcut for equality expressions.
|
2046
|
+
# Note that, in that case, values are expected to be ruby code literals,
|
2047
|
+
# evaluated with Kernel.eval. Therefore, strings must be doubly quoted.
|
2048
|
+
#
|
2049
|
+
# alf restrict suppliers -- "status > 20"
|
2050
|
+
# alf restrict suppliers -- city "'London'"
|
2051
|
+
#
|
2052
|
+
class Restrict < Factory::Operator(__FILE__, __LINE__)
|
2053
|
+
include Operator::Relational, Operator::Unary
|
2054
|
+
|
2055
|
+
# Restriction predicate
|
2056
|
+
attr_accessor :predicate
|
2057
|
+
|
2058
|
+
# Builds a Restrict operator instance
|
2059
|
+
def initialize(predicate = "true")
|
2060
|
+
@predicate = TupleHandle.compile(predicate)
|
2061
|
+
yield self if block_given?
|
2062
|
+
end
|
2063
|
+
|
2064
|
+
protected
|
2065
|
+
|
2066
|
+
# (see Operator::CommandMethods#set_args)
|
2067
|
+
def set_args(args)
|
2068
|
+
@predicate = if args.size > 1
|
2069
|
+
TupleHandle.compile tuple_collect(args.each_slice(2)){|a,expr|
|
2070
|
+
[a, Kernel.eval(expr)]
|
2071
|
+
}
|
2072
|
+
else
|
2073
|
+
TupleHandle.compile(args.first)
|
2074
|
+
end
|
2075
|
+
self
|
2076
|
+
end
|
2077
|
+
|
2078
|
+
# (see Operator#_each)
|
2079
|
+
def _each
|
2080
|
+
handle = TupleHandle.new
|
2081
|
+
each_input_tuple{|t| yield(t) if handle.set(t).evaluate(@predicate) }
|
2082
|
+
end
|
2083
|
+
|
2084
|
+
end # class Restrict
|
2085
|
+
|
2086
|
+
#
|
2087
|
+
# Relational join (and cross-join)
|
2088
|
+
#
|
2089
|
+
# SYNOPSIS
|
2090
|
+
# #{program_name} #{command_name} [LEFT] RIGHT
|
2091
|
+
#
|
2092
|
+
# API & EXAMPLE
|
2093
|
+
#
|
2094
|
+
# (join :suppliers, :parts)
|
2095
|
+
#
|
2096
|
+
# DESCRIPTION
|
2097
|
+
#
|
2098
|
+
# This operator computes the (natural) join of two input iterators. Natural
|
2099
|
+
# join means that, unlike what is commonly used in SQL, the default behavior
|
2100
|
+
# is to join on common attributes. You can use the rename operator if this
|
2101
|
+
# behavior does not fit your needs.
|
2102
|
+
#
|
2103
|
+
# alf join suppliers supplies
|
2104
|
+
#
|
2105
|
+
class Join < Factory::Operator(__FILE__, __LINE__)
|
2106
|
+
include Operator::Relational, Operator::Shortcut, Operator::Binary
|
2107
|
+
|
2108
|
+
class HashBased
|
2109
|
+
include Operator::Binary
|
2110
|
+
|
2111
|
+
class JoinBuffer
|
2112
|
+
|
2113
|
+
def initialize(enum)
|
2114
|
+
@buffer = nil
|
2115
|
+
@key = nil
|
2116
|
+
@enum = enum
|
2117
|
+
end
|
2118
|
+
|
2119
|
+
def split(tuple)
|
2120
|
+
_init(tuple) unless @key
|
2121
|
+
@key.split(tuple)
|
2122
|
+
end
|
2123
|
+
|
2124
|
+
def each(key)
|
2125
|
+
@buffer[key].each(&Proc.new) if @buffer.has_key?(key)
|
2126
|
+
end
|
2127
|
+
|
2128
|
+
private
|
2129
|
+
|
2130
|
+
def _init(right)
|
2131
|
+
@buffer = Hash.new{|h,k| h[k] = []}
|
2132
|
+
@enum.each do |left|
|
2133
|
+
@key = Tools::ProjectionKey.coerce(left.keys & right.keys) unless @key
|
2134
|
+
@buffer[@key.project(left)] << left
|
2135
|
+
end
|
2136
|
+
end
|
2137
|
+
|
2138
|
+
end
|
2139
|
+
|
2140
|
+
protected
|
2141
|
+
|
2142
|
+
def _each
|
2143
|
+
buffer = JoinBuffer.new(right)
|
2144
|
+
left.each do |left_tuple|
|
2145
|
+
key, rest = buffer.split(left_tuple)
|
2146
|
+
buffer.each(key) do |right|
|
2147
|
+
yield(left_tuple.merge(right))
|
2148
|
+
end
|
2149
|
+
end
|
2150
|
+
end
|
2151
|
+
|
2152
|
+
end
|
2153
|
+
|
2154
|
+
protected
|
2155
|
+
|
2156
|
+
# (see Shortcut#longexpr)
|
2157
|
+
def longexpr
|
2158
|
+
chain HashBased.new,
|
2159
|
+
datasets
|
2160
|
+
end
|
2161
|
+
|
2162
|
+
end # class Join
|
2163
|
+
|
2164
|
+
#
|
2165
|
+
# Relational intersection (aka a logical and)
|
2166
|
+
#
|
2167
|
+
# SYNOPSIS
|
2168
|
+
# #{program_name} #{command_name} [LEFT] RIGHT
|
2169
|
+
#
|
2170
|
+
# API & EXAMPLE
|
2171
|
+
#
|
2172
|
+
# # Give suppliers that live in Paris and have status >= 20
|
2173
|
+
# (intersect \\
|
2174
|
+
# (restrict :suppliers, lambda{ status >= 20 }),
|
2175
|
+
# (restrict :suppliers, lambda{ city == 'Paris' }))
|
2176
|
+
#
|
2177
|
+
# DESCRIPTION
|
2178
|
+
#
|
2179
|
+
# This operator computes the intersection between its two operands. The
|
2180
|
+
# intersection is simply the set of common tuples between them. Both operands
|
2181
|
+
# must have the same heading.
|
2182
|
+
#
|
2183
|
+
# alf intersect ... ...
|
2184
|
+
#
|
2185
|
+
class Intersect < Factory::Operator(__FILE__, __LINE__)
|
2186
|
+
include Operator::Relational, Operator::Shortcut, Operator::Binary
|
2187
|
+
|
2188
|
+
class HashBased
|
2189
|
+
include Operator::Binary
|
2190
|
+
|
2191
|
+
protected
|
2192
|
+
|
2193
|
+
def _prepare
|
2194
|
+
@index = Hash.new
|
2195
|
+
right.each{|t| @index[t] = true}
|
2196
|
+
end
|
2197
|
+
|
2198
|
+
def _each
|
2199
|
+
left.each do |left_tuple|
|
2200
|
+
yield(left_tuple) if @index.has_key?(left_tuple)
|
2201
|
+
end
|
2202
|
+
end
|
2203
|
+
|
2204
|
+
end
|
2205
|
+
|
2206
|
+
protected
|
2207
|
+
|
2208
|
+
# (see Shortcut#longexpr)
|
2209
|
+
def longexpr
|
2210
|
+
chain HashBased.new,
|
2211
|
+
datasets
|
2212
|
+
end
|
2213
|
+
|
2214
|
+
end # class Intersect
|
2215
|
+
|
2216
|
+
#
|
2217
|
+
# Relational minus (aka difference)
|
2218
|
+
#
|
2219
|
+
# SYNOPSIS
|
2220
|
+
# #{program_name} #{command_name} [LEFT] RIGHT
|
2221
|
+
#
|
2222
|
+
# API & EXAMPLE
|
2223
|
+
#
|
2224
|
+
# # Give all suppliers but those living in Paris
|
2225
|
+
# (minus :suppliers,
|
2226
|
+
# (restrict :suppliers, lambda{ city == 'Paris' }))
|
2227
|
+
#
|
2228
|
+
# DESCRIPTION
|
2229
|
+
#
|
2230
|
+
# This operator computes the difference between its two operands. The
|
2231
|
+
# difference is simply the set of tuples in left operands non shared by
|
2232
|
+
# the right one.
|
2233
|
+
#
|
2234
|
+
# alf minus ... ...
|
2235
|
+
#
|
2236
|
+
class Minus < Factory::Operator(__FILE__, __LINE__)
|
2237
|
+
include Operator::Relational, Operator::Shortcut, Operator::Binary
|
2238
|
+
|
2239
|
+
class HashBased
|
2240
|
+
include Operator::Binary
|
2241
|
+
|
2242
|
+
protected
|
2243
|
+
|
2244
|
+
def _prepare
|
2245
|
+
@index = Hash.new
|
2246
|
+
right.each{|t| @index[t] = true}
|
2247
|
+
end
|
2248
|
+
|
2249
|
+
def _each
|
2250
|
+
left.each do |left_tuple|
|
2251
|
+
yield(left_tuple) unless @index.has_key?(left_tuple)
|
2252
|
+
end
|
2253
|
+
end
|
2254
|
+
|
2255
|
+
end
|
2256
|
+
|
2257
|
+
protected
|
2258
|
+
|
2259
|
+
# (see Shortcut#longexpr)
|
2260
|
+
def longexpr
|
2261
|
+
chain HashBased.new,
|
2262
|
+
datasets
|
2263
|
+
end
|
2264
|
+
|
2265
|
+
end # class Minus
|
2266
|
+
|
2267
|
+
#
|
2268
|
+
# Relational union
|
2269
|
+
#
|
2270
|
+
# SYNOPSIS
|
2271
|
+
# #{program_name} #{command_name} [LEFT] RIGHT
|
2272
|
+
#
|
2273
|
+
# API & EXAMPLE
|
2274
|
+
#
|
2275
|
+
# (union (project :suppliers, [:city]),
|
2276
|
+
# (project :parts, [:city]))
|
2277
|
+
#
|
2278
|
+
# DESCRIPTION
|
2279
|
+
#
|
2280
|
+
# This operator computes the union join of two input iterators. Input
|
2281
|
+
# iterators should have the same heading. The result never contain duplicates.
|
2282
|
+
#
|
2283
|
+
# alf union ... ...
|
2284
|
+
#
|
2285
|
+
class Union < Factory::Operator(__FILE__, __LINE__)
|
2286
|
+
include Operator::Relational, Operator::Shortcut, Operator::Binary
|
2287
|
+
|
2288
|
+
class DisjointBased
|
2289
|
+
include Operator::Binary
|
2290
|
+
|
2291
|
+
protected
|
2292
|
+
|
2293
|
+
def _each
|
2294
|
+
left.each(&Proc.new)
|
2295
|
+
right.each(&Proc.new)
|
2296
|
+
end
|
2297
|
+
|
2298
|
+
end
|
2299
|
+
|
2300
|
+
protected
|
2301
|
+
|
2302
|
+
# (see Shortcut#longexpr)
|
2303
|
+
def longexpr
|
2304
|
+
chain Operator::NonRelational::Compact.new,
|
2305
|
+
DisjointBased.new,
|
2306
|
+
datasets
|
2307
|
+
end
|
2308
|
+
|
2309
|
+
end # class Union
|
2310
|
+
|
2311
|
+
#
|
2312
|
+
# Relational nesting (tuple-valued attributes)
|
2313
|
+
#
|
2314
|
+
# SYNOPSIS
|
2315
|
+
# #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ... NEWNAME
|
2316
|
+
#
|
2317
|
+
# API & EXAMPLE
|
2318
|
+
#
|
2319
|
+
# (nest :suppliers, [:city, :status], :loc_and_status)
|
2320
|
+
#
|
2321
|
+
# DESCRIPTION
|
2322
|
+
#
|
2323
|
+
# This operator nests attributes ATTR1 to ATTRN as a new, tuple-based
|
2324
|
+
# attribute whose name is NEWNAME. When used in shell, names of nested
|
2325
|
+
# attributes are taken from commandline arguments, expected the last one
|
2326
|
+
# which defines the new name to use:
|
2327
|
+
#
|
2328
|
+
# alf nest suppliers -- city status loc_and_status
|
2329
|
+
#
|
2330
|
+
class Nest < Factory::Operator(__FILE__, __LINE__)
|
2331
|
+
include Operator::Relational, Operator::Transform
|
2332
|
+
|
2333
|
+
# Array of nesting attributes
|
2334
|
+
attr_accessor :attributes
|
2335
|
+
|
2336
|
+
# New name for the nested attribute
|
2337
|
+
attr_accessor :as
|
2338
|
+
|
2339
|
+
# Builds a Nest operator instance
|
2340
|
+
def initialize(attributes = [], as = :nested)
|
2341
|
+
@attributes = attributes
|
2342
|
+
@as = as
|
2343
|
+
end
|
2344
|
+
|
2345
|
+
protected
|
2346
|
+
|
2347
|
+
# (see Operator::CommandMethods#set_args)
|
2348
|
+
def set_args(args)
|
2349
|
+
@as = args.pop.to_sym
|
2350
|
+
@attributes = args.collect{|a| a.to_sym}
|
2351
|
+
self
|
2352
|
+
end
|
2353
|
+
|
2354
|
+
# (see Operator::Transform#_tuple2tuple)
|
2355
|
+
def _tuple2tuple(tuple)
|
2356
|
+
others = tuple_collect(tuple.keys - @attributes){|k| [k,tuple[k]] }
|
2357
|
+
others[as] = tuple_collect(attributes){|k| [k, tuple[k]] }
|
2358
|
+
others
|
2359
|
+
end
|
2360
|
+
|
2361
|
+
end # class Nest
|
2362
|
+
|
2363
|
+
#
|
2364
|
+
# Relational un-nesting (inverse of nest)
|
2365
|
+
#
|
2366
|
+
# SYNOPSIS
|
2367
|
+
# #{program_name} #{command_name} [OPERAND] -- ATTR
|
2368
|
+
#
|
2369
|
+
# API & EXAMPLE
|
2370
|
+
#
|
2371
|
+
# # Assuming nested = (nest :suppliers, [:city, :status], :loc_and_status)
|
2372
|
+
# (unnest nested, :loc_and_status)
|
2373
|
+
#
|
2374
|
+
# DESCRIPTION
|
2375
|
+
#
|
2376
|
+
# This operator unnests the tuple-valued attribute named ATTR so as to
|
2377
|
+
# flatten its pairs with 'upstream' tuple. The latter should be such so that
|
2378
|
+
# no name collision occurs. When used in shell, the name of the attribute to
|
2379
|
+
# unnest is taken as the first commandline argument:
|
2380
|
+
#
|
2381
|
+
# alf unnest nest -- loc_and_status
|
2382
|
+
#
|
2383
|
+
class Unnest < Factory::Operator(__FILE__, __LINE__)
|
2384
|
+
include Operator::Relational, Operator::Transform
|
2385
|
+
|
2386
|
+
# Name of the attribute to unnest
|
2387
|
+
attr_accessor :attribute
|
2388
|
+
|
2389
|
+
# Builds a Rename operator instance
|
2390
|
+
def initialize(attribute = :nested)
|
2391
|
+
@attribute = attribute
|
2392
|
+
end
|
2393
|
+
|
2394
|
+
protected
|
2395
|
+
|
2396
|
+
# (see Operator::CommandMethods#set_args)
|
2397
|
+
def set_args(args)
|
2398
|
+
@attribute = args.first.to_sym
|
2399
|
+
self
|
2400
|
+
end
|
2401
|
+
|
2402
|
+
# (see Operator::Transform#_tuple2tuple)
|
2403
|
+
def _tuple2tuple(tuple)
|
2404
|
+
tuple = tuple.dup
|
2405
|
+
nested = tuple.delete(@attribute) || {}
|
2406
|
+
tuple.merge(nested)
|
2407
|
+
end
|
2408
|
+
|
2409
|
+
end # class Unnest
|
2410
|
+
|
2411
|
+
#
|
2412
|
+
# Relational grouping (relation-valued attributes)
|
2413
|
+
#
|
2414
|
+
# SYNOPSIS
|
2415
|
+
# #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ... NEWNAME
|
2416
|
+
#
|
2417
|
+
# API & EXAMPLE
|
2418
|
+
#
|
2419
|
+
# (group :supplies, [:pid, :qty], :supplying)
|
2420
|
+
# (group :supplies, [:sid], :supplying, true)
|
2421
|
+
#
|
2422
|
+
# DESCRIPTION
|
2423
|
+
#
|
2424
|
+
# This operator groups attributes ATTR1 to ATTRN as a new, relation-valued
|
2425
|
+
# attribute whose name is NEWNAME. When used in shell, names of grouped
|
2426
|
+
# attributes are taken from commandline arguments, expected the last one
|
2427
|
+
# which defines the new name to use:
|
2428
|
+
#
|
2429
|
+
# alf group supplies -- pid qty supplying
|
2430
|
+
# alf group supplies --allbut -- sid supplying
|
2431
|
+
#
|
2432
|
+
class Group < Factory::Operator(__FILE__, __LINE__)
|
2433
|
+
include Operator::Relational, Operator::Unary
|
2434
|
+
|
2435
|
+
# Attributes on which grouping applies
|
2436
|
+
attr_accessor :attributes
|
2437
|
+
|
2438
|
+
# Attribute name for grouping tuple
|
2439
|
+
attr_accessor :as
|
2440
|
+
|
2441
|
+
# Group all but attributes?
|
2442
|
+
attr_accessor :allbut
|
2443
|
+
|
2444
|
+
# Creates a Group instance
|
2445
|
+
def initialize(attributes = [], as = :group, allbut = false)
|
2446
|
+
@attributes = attributes
|
2447
|
+
@as = as
|
2448
|
+
@allbut = allbut
|
2449
|
+
end
|
2450
|
+
|
2451
|
+
options do |opt|
|
2452
|
+
opt.on('--allbut', "Group all but specified attributes"){ @allbut = true }
|
2453
|
+
end
|
2454
|
+
|
2455
|
+
protected
|
2456
|
+
|
2457
|
+
# (see Operator::CommandMethods#set_args)
|
2458
|
+
def set_args(args)
|
2459
|
+
@as = args.pop.to_sym
|
2460
|
+
@attributes = args.collect{|a| a.to_sym}
|
2461
|
+
self
|
2462
|
+
end
|
2463
|
+
|
2464
|
+
# See Operator#_prepare
|
2465
|
+
def _prepare
|
2466
|
+
pkey = ProjectionKey.new(attributes, !allbut)
|
2467
|
+
@index = Hash.new{|h,k| h[k] = []}
|
2468
|
+
each_input_tuple do |tuple|
|
2469
|
+
key, rest = pkey.split(tuple)
|
2470
|
+
@index[key] << rest
|
2471
|
+
end
|
2472
|
+
end
|
2473
|
+
|
2474
|
+
# See Operator#_each
|
2475
|
+
def _each
|
2476
|
+
@index.each_pair do |k,v|
|
2477
|
+
yield(k.merge(@as => v))
|
2478
|
+
end
|
2479
|
+
end
|
2480
|
+
|
2481
|
+
end # class Group
|
2482
|
+
|
2483
|
+
#
|
2484
|
+
# Relational un-grouping (inverse of group)
|
2485
|
+
#
|
2486
|
+
# SYNOPSIS
|
2487
|
+
# #{program_name} #{command_name} [OPERAND] -- ATTR
|
2488
|
+
#
|
2489
|
+
# API & EXAMPLE
|
2490
|
+
#
|
2491
|
+
# # Assuming grouped = (group enum, [:pid, :qty], :supplying)
|
2492
|
+
# (ungroup grouped, :supplying)
|
2493
|
+
#
|
2494
|
+
# DESCRIPTION
|
2495
|
+
#
|
2496
|
+
# This operator ungroups the relation-valued attribute named ATTR and outputs
|
2497
|
+
# tuples as the flattening of each of of its tuples merged with the upstream
|
2498
|
+
# one. Sub relation should be such so that no name collision occurs. When
|
2499
|
+
# used in shell, the name of the attribute to ungroup is taken as the first
|
2500
|
+
# commandline argument:
|
2501
|
+
#
|
2502
|
+
# alf ungroup group -- supplying
|
2503
|
+
#
|
2504
|
+
class Ungroup < Factory::Operator(__FILE__, __LINE__)
|
2505
|
+
include Operator::Relational, Operator::Unary
|
2506
|
+
|
2507
|
+
# Relation-value attribute to ungroup
|
2508
|
+
attr_accessor :attribute
|
2509
|
+
|
2510
|
+
# Creates a Group instance
|
2511
|
+
def initialize(attribute = :grouped)
|
2512
|
+
@attribute = attribute
|
2513
|
+
end
|
2514
|
+
|
2515
|
+
protected
|
2516
|
+
|
2517
|
+
# (see Operator::CommandMethods#set_args)
|
2518
|
+
def set_args(args)
|
2519
|
+
@attribute = args.pop.to_sym
|
2520
|
+
self
|
2521
|
+
end
|
2522
|
+
|
2523
|
+
# See Operator#_each
|
2524
|
+
def _each
|
2525
|
+
each_input_tuple do |tuple|
|
2526
|
+
tuple = tuple.dup
|
2527
|
+
subrel = tuple.delete(@attribute)
|
2528
|
+
subrel.each do |subtuple|
|
2529
|
+
yield(tuple.merge(subtuple))
|
2530
|
+
end
|
2531
|
+
end
|
2532
|
+
end
|
2533
|
+
|
2534
|
+
end # class Ungroup
|
2535
|
+
|
2536
|
+
#
|
2537
|
+
# Relational summarization (group-by + aggregate ops)
|
2538
|
+
#
|
2539
|
+
# SYNOPSIS
|
2540
|
+
# #{program_name} #{command_name} [OPERAND] --by=KEY1,KEY2... -- AGG1 EXPR1...
|
2541
|
+
#
|
2542
|
+
# OPTIONS
|
2543
|
+
# #{summarized_options}
|
2544
|
+
#
|
2545
|
+
# API & EXAMPLE
|
2546
|
+
#
|
2547
|
+
# (summarize :supplies, [:sid],
|
2548
|
+
# :total_qty => Aggregator.sum(:qty))
|
2549
|
+
#
|
2550
|
+
# DESCRIPTION
|
2551
|
+
#
|
2552
|
+
# This operator summarizes input tuples on the projection on KEY1,KEY2,...
|
2553
|
+
# attributes and applies aggregate operators on sets of matching tuples.
|
2554
|
+
# Introduced names AGG should be disjoint from KEY attributes.
|
2555
|
+
#
|
2556
|
+
# When used in shell, the aggregations are taken from commandline arguments
|
2557
|
+
# AGG and EXPR, where AGG is the name of a new attribute and EXPR is an
|
2558
|
+
# aggregation expression evaluated on Aggregator:
|
2559
|
+
#
|
2560
|
+
# alf summarize supplies --by=sid -- total_qty "sum(:qty)"
|
2561
|
+
#
|
2562
|
+
class Summarize < Factory::Operator(__FILE__, __LINE__)
|
2563
|
+
include Operator::Relational, Operator::Shortcut, Operator::Unary
|
2564
|
+
|
2565
|
+
# By attributes
|
2566
|
+
attr_accessor :by
|
2567
|
+
|
2568
|
+
# Aggregations as a AGG => Aggregator(EXPR) hash
|
2569
|
+
attr_accessor :aggregators
|
2570
|
+
|
2571
|
+
def initialize(by = [], aggregators = {})
|
2572
|
+
@by = by
|
2573
|
+
@aggregators = aggregators
|
2574
|
+
end
|
2575
|
+
|
2576
|
+
# Installs the options
|
2577
|
+
options do |opt|
|
2578
|
+
opt.on('--by=x,y,z', 'Specify by attributes', Array) do |args|
|
2579
|
+
@by = args.collect{|a| a.to_sym}
|
2580
|
+
end
|
2581
|
+
end
|
2582
|
+
|
2583
|
+
# Summarizes according to a complete order
|
2584
|
+
class SortBased
|
2585
|
+
include Alf::Operator::Cesure
|
2586
|
+
|
2587
|
+
attr_reader :cesure_key
|
2588
|
+
attr_reader :aggregators
|
2589
|
+
|
2590
|
+
def initialize(by_key, aggregators)
|
2591
|
+
@cesure_key, @aggregators = by_key, aggregators
|
2592
|
+
end
|
2593
|
+
|
2594
|
+
protected
|
2595
|
+
|
2596
|
+
def start_cesure(key, receiver)
|
2597
|
+
@aggs = tuple_collect(@aggregators) do |a,agg|
|
2598
|
+
[a, agg.least]
|
2599
|
+
end
|
2600
|
+
end
|
2601
|
+
|
2602
|
+
def accumulate_cesure(tuple, receiver)
|
2603
|
+
@aggs = tuple_collect(@aggregators) do |a,agg|
|
2604
|
+
[a, agg.happens(@aggs[a], tuple)]
|
2605
|
+
end
|
2606
|
+
end
|
2607
|
+
|
2608
|
+
def flush_cesure(key, receiver)
|
2609
|
+
@aggs = tuple_collect(@aggregators) do |a,agg|
|
2610
|
+
[a, agg.finalize(@aggs[a])]
|
2611
|
+
end
|
2612
|
+
receiver.call key.merge(@aggs)
|
2613
|
+
end
|
2614
|
+
|
2615
|
+
end # class SortBased
|
2616
|
+
|
2617
|
+
protected
|
2618
|
+
|
2619
|
+
# (see Operator::CommandMethods#set_args)
|
2620
|
+
def set_args(args)
|
2621
|
+
@aggregators = tuple_collect(args.each_slice(2)) do |a,expr|
|
2622
|
+
[a.to_sym, Aggregator.compile(expr)]
|
2623
|
+
end
|
2624
|
+
self
|
2625
|
+
end
|
2626
|
+
|
2627
|
+
def longexpr
|
2628
|
+
by_key = Tools::ProjectionKey.new(@by, false)
|
2629
|
+
chain SortBased.new(by_key, @aggregators),
|
2630
|
+
Operator::NonRelational::Sort.new(by_key.to_ordering_key),
|
2631
|
+
datasets
|
2632
|
+
end
|
2633
|
+
|
2634
|
+
end # class Summarize
|
2635
|
+
|
2636
|
+
#
|
2637
|
+
# Relational quota-queries (position, sum progression, etc.)
|
2638
|
+
#
|
2639
|
+
# SYNOPSIS
|
2640
|
+
# #{program_name} #{command_name} [OPERAND] --by=KEY1,... --order=OR1... AGG1 EXPR1...
|
2641
|
+
#
|
2642
|
+
# OPTIONS
|
2643
|
+
# #{summarized_options}
|
2644
|
+
#
|
2645
|
+
# API & EXAMPLE
|
2646
|
+
#
|
2647
|
+
# (quota :supplies, [:sid], [:qty],
|
2648
|
+
# :position => Aggregator.count,
|
2649
|
+
# :sum_qty => Aggregator.sum(:qty))
|
2650
|
+
#
|
2651
|
+
# DESCRIPTION
|
2652
|
+
#
|
2653
|
+
# This operator computes quota values on input tuples.
|
2654
|
+
#
|
2655
|
+
# alf quota supplies --by=sid --order=qty -- position count sum_qty "sum(:qty)"
|
2656
|
+
#
|
2657
|
+
class Quota < Factory::Operator(__FILE__, __LINE__)
|
2658
|
+
include Operator::Relational, Operator::Shortcut, Operator::Unary
|
2659
|
+
|
2660
|
+
# Quota by
|
2661
|
+
attr_accessor :by
|
2662
|
+
|
2663
|
+
# Quota order
|
2664
|
+
attr_accessor :order
|
2665
|
+
|
2666
|
+
# Quota aggregations
|
2667
|
+
attr_accessor :aggregators
|
2668
|
+
|
2669
|
+
def initialize(by = [], order = [], aggregators = {})
|
2670
|
+
@by, @order, @aggregators = by, order, aggregators
|
2671
|
+
end
|
2672
|
+
|
2673
|
+
options do |opt|
|
2674
|
+
opt.on('--by=x,y,z', 'Specify by attributes', Array) do |args|
|
2675
|
+
@by = args.collect{|a| a.to_sym}
|
2676
|
+
end
|
2677
|
+
opt.on('--order=x,y,z', 'Specify order attributes', Array) do |args|
|
2678
|
+
@order = args.collect{|a| a.to_sym}
|
2679
|
+
end
|
2680
|
+
end
|
2681
|
+
|
2682
|
+
class SortBased
|
2683
|
+
include Operator::Cesure
|
2684
|
+
|
2685
|
+
def initialize(by, order, aggregators)
|
2686
|
+
@by, @order, @aggregators = by, order, aggregators
|
2687
|
+
end
|
2688
|
+
|
2689
|
+
def cesure_key
|
2690
|
+
ProjectionKey.coerce @by
|
2691
|
+
end
|
2692
|
+
|
2693
|
+
def ordering_key
|
2694
|
+
OrderingKey.coerce @order
|
2695
|
+
end
|
2696
|
+
|
2697
|
+
def start_cesure(key, receiver)
|
2698
|
+
@aggs = tuple_collect(@aggregators) do |a,agg|
|
2699
|
+
[a, agg.least]
|
2700
|
+
end
|
2701
|
+
end
|
2702
|
+
|
2703
|
+
def accumulate_cesure(tuple, receiver)
|
2704
|
+
@aggs = tuple_collect(@aggregators) do |a,agg|
|
2705
|
+
[a, agg.happens(@aggs[a], tuple)]
|
2706
|
+
end
|
2707
|
+
thisone = tuple_collect(@aggregators) do |a,agg|
|
2708
|
+
[a, agg.finalize(@aggs[a])]
|
2709
|
+
end
|
2710
|
+
receiver.call tuple.merge(thisone)
|
2711
|
+
end
|
2712
|
+
|
2713
|
+
end # class SortBased
|
2714
|
+
|
2715
|
+
protected
|
2716
|
+
|
2717
|
+
# (see Operator::CommandMethods#set_args)
|
2718
|
+
def set_args(args)
|
2719
|
+
@aggregators = tuple_collect(args.each_slice(2)) do |a,expr|
|
2720
|
+
[a.to_sym, Aggregator.compile(expr)]
|
2721
|
+
end
|
2722
|
+
self
|
2723
|
+
end
|
2724
|
+
|
2725
|
+
def cesure_key
|
2726
|
+
ProjectionKey.coerce @by
|
2727
|
+
end
|
2728
|
+
|
2729
|
+
def ordering_key
|
2730
|
+
OrderingKey.coerce @order
|
2731
|
+
end
|
2732
|
+
|
2733
|
+
def longexpr
|
2734
|
+
sort_key = cesure_key.to_ordering_key + ordering_key
|
2735
|
+
chain SortBased.new(@by, @order, @aggregators),
|
2736
|
+
Operator::NonRelational::Sort.new(sort_key),
|
2737
|
+
datasets
|
2738
|
+
end
|
2739
|
+
|
2740
|
+
end # class Quota
|
2741
|
+
|
2742
|
+
end
|
2743
|
+
|
2744
|
+
#
|
2745
|
+
# Aggregation operator.
|
2746
|
+
#
|
2747
|
+
class Aggregator
|
2748
|
+
|
2749
|
+
# Aggregate options
|
2750
|
+
attr_reader :options
|
2751
|
+
|
2752
|
+
#
|
2753
|
+
# Automatically installs factory methods for inherited classes.
|
2754
|
+
#
|
2755
|
+
# Example:
|
2756
|
+
# class Sum < Aggregate # will give a method Aggregator.sum
|
2757
|
+
# ...
|
2758
|
+
# end
|
2759
|
+
# Aggregator.sum(:size) # factor an Sum aggregator on tuple[:size]
|
2760
|
+
# Aggregator.sum{ size } # idem but works on any tuple expression
|
2761
|
+
#
|
2762
|
+
def self.inherited(clazz)
|
2763
|
+
basename = Tools.ruby_case(Tools.class_name(clazz))
|
2764
|
+
instance_eval <<-EOF
|
2765
|
+
def #{basename}(*args, &block)
|
2766
|
+
#{clazz}.new(*args, &block)
|
2767
|
+
end
|
2768
|
+
EOF
|
2769
|
+
end
|
2770
|
+
|
2771
|
+
def self.compile(expr, &block)
|
2772
|
+
instance_eval(expr, &block)
|
2773
|
+
end
|
2774
|
+
|
2775
|
+
#
|
2776
|
+
# Creates an Aggregator instance.
|
2777
|
+
#
|
2778
|
+
# This constructor can be used either by passing an attribute
|
2779
|
+
# argument or a block that will be evaluated on a TupleHandle
|
2780
|
+
# instance set on each aggregated tuple.
|
2781
|
+
#
|
2782
|
+
# Aggregator.new(:size) # will aggregate on tuple[:size]
|
2783
|
+
# Aggregator.new{ size * price } # ... on tuple[:size] * tuple[:price]
|
2784
|
+
#
|
2785
|
+
def initialize(attribute = nil, options = {}, &block)
|
2786
|
+
attribute, options = nil, attribute if attribute.is_a?(Hash)
|
2787
|
+
@handle = Tools::TupleHandle.new
|
2788
|
+
@options = default_options.merge(options)
|
2789
|
+
@functor = Tools::TupleHandle.compile(attribute || block)
|
2790
|
+
end
|
2791
|
+
|
2792
|
+
#
|
2793
|
+
# Returns the default options to use
|
2794
|
+
#
|
2795
|
+
def default_options
|
2796
|
+
{}
|
2797
|
+
end
|
2798
|
+
|
2799
|
+
#
|
2800
|
+
# Returns the least value, which is the one to use on an empty
|
2801
|
+
# set.
|
2802
|
+
#
|
2803
|
+
# This method is intended to be overriden by subclasses; default
|
2804
|
+
# implementation returns nil.
|
2805
|
+
#
|
2806
|
+
def least
|
2807
|
+
nil
|
2808
|
+
end
|
2809
|
+
|
2810
|
+
#
|
2811
|
+
# This method is called on each aggregated tuple and must return
|
2812
|
+
# an updated _memo_ value. It can be seen as the block typically
|
2813
|
+
# given to Enumerable.inject.
|
2814
|
+
#
|
2815
|
+
# The default implementation collects the pre-value on the tuple
|
2816
|
+
# and delegates to _happens.
|
2817
|
+
#
|
2818
|
+
def happens(memo, tuple)
|
2819
|
+
_happens(memo, @handle.set(tuple).evaluate(@functor))
|
2820
|
+
end
|
2821
|
+
|
2822
|
+
#
|
2823
|
+
# This method finalizes a computation.
|
2824
|
+
#
|
2825
|
+
# Argument _memo_ is either _least_ or the result of aggregating
|
2826
|
+
# through _happens_. The default implementation simply returns
|
2827
|
+
# _memo_. The method is intended to be overriden for complex
|
2828
|
+
# aggregations that need statefull information. See Avg for an
|
2829
|
+
# example
|
2830
|
+
#
|
2831
|
+
def finalize(memo)
|
2832
|
+
memo
|
2833
|
+
end
|
2834
|
+
|
2835
|
+
#
|
2836
|
+
# Aggregates over an enumeration of tuples.
|
2837
|
+
#
|
2838
|
+
def aggregate(enum)
|
2839
|
+
finalize(
|
2840
|
+
enum.inject(least){|memo,tuple|
|
2841
|
+
happens(memo, tuple)
|
2842
|
+
})
|
2843
|
+
end
|
2844
|
+
|
2845
|
+
protected
|
2846
|
+
|
2847
|
+
#
|
2848
|
+
# @see happens.
|
2849
|
+
#
|
2850
|
+
# This method is intended to be overriden and returns _value_
|
2851
|
+
# by default, making this aggregator a "Last" one...
|
2852
|
+
#
|
2853
|
+
def _happens(memo, value)
|
2854
|
+
value
|
2855
|
+
end
|
2856
|
+
|
2857
|
+
#
|
2858
|
+
# Defines a COUNT aggregation operator
|
2859
|
+
#
|
2860
|
+
class Count < Aggregator
|
2861
|
+
def least(); 0; end
|
2862
|
+
def happens(memo, tuple) memo + 1; end
|
2863
|
+
end # class Count
|
2864
|
+
|
2865
|
+
#
|
2866
|
+
# Defines a SUM aggregation operator
|
2867
|
+
#
|
2868
|
+
class Sum < Aggregator
|
2869
|
+
def least(); 0; end
|
2870
|
+
def _happens(memo, val) memo + val; end
|
2871
|
+
end # class Sum
|
2872
|
+
|
2873
|
+
#
|
2874
|
+
# Defines an AVG aggregation operator
|
2875
|
+
#
|
2876
|
+
class Avg < Aggregator
|
2877
|
+
def least(); [0.0, 0.0]; end
|
2878
|
+
def _happens(memo, val) [memo.first + val, memo.last + 1]; end
|
2879
|
+
def finalize(memo) memo.first / memo.last end
|
2880
|
+
end # class Sum
|
2881
|
+
|
2882
|
+
#
|
2883
|
+
# Defines a MIN aggregation operator
|
2884
|
+
#
|
2885
|
+
class Min < Aggregator
|
2886
|
+
def least(); nil; end
|
2887
|
+
def _happens(memo, val)
|
2888
|
+
memo.nil? ? val : (memo < val ? memo : val)
|
2889
|
+
end
|
2890
|
+
end # class Min
|
2891
|
+
|
2892
|
+
#
|
2893
|
+
# Defines a MAX aggregation operator
|
2894
|
+
#
|
2895
|
+
class Max < Aggregator
|
2896
|
+
def least(); nil; end
|
2897
|
+
def _happens(memo, val)
|
2898
|
+
memo.nil? ? val : (memo > val ? memo : val)
|
2899
|
+
end
|
2900
|
+
end # class Max
|
2901
|
+
|
2902
|
+
#
|
2903
|
+
# Defines a COLLECT aggregation operator
|
2904
|
+
#
|
2905
|
+
class Group < Aggregator
|
2906
|
+
def initialize(*attrs)
|
2907
|
+
super(nil, {}){
|
2908
|
+
Tools.tuple_collect(attrs){|k| [k, self.send(k)] }
|
2909
|
+
}
|
2910
|
+
end
|
2911
|
+
def least(); []; end
|
2912
|
+
def _happens(memo, val)
|
2913
|
+
memo << val
|
2914
|
+
end
|
2915
|
+
def finalize(memo)
|
2916
|
+
memo.uniq
|
2917
|
+
end
|
2918
|
+
end
|
2919
|
+
|
2920
|
+
#
|
2921
|
+
# Defines a COLLECT aggregation operator
|
2922
|
+
#
|
2923
|
+
class Collect < Aggregator
|
2924
|
+
def least(); []; end
|
2925
|
+
def _happens(memo, val)
|
2926
|
+
memo << val
|
2927
|
+
end
|
2928
|
+
end
|
2929
|
+
|
2930
|
+
#
|
2931
|
+
# Defines a CONCAT aggregation operator
|
2932
|
+
#
|
2933
|
+
class Concat < Aggregator
|
2934
|
+
def least(); ""; end
|
2935
|
+
def default_options
|
2936
|
+
{:before => "", :after => "", :between => ""}
|
2937
|
+
end
|
2938
|
+
def _happens(memo, val)
|
2939
|
+
memo << options[:between].to_s unless memo.empty?
|
2940
|
+
memo << val.to_s
|
2941
|
+
end
|
2942
|
+
def finalize(memo)
|
2943
|
+
options[:before].to_s + memo + options[:after].to_s
|
2944
|
+
end
|
2945
|
+
end
|
2946
|
+
|
2947
|
+
Lispy::Agg = Aggregator
|
2948
|
+
end # class Aggregator
|
2949
|
+
|
2950
|
+
#
|
2951
|
+
# Base class for implementing buffers.
|
2952
|
+
#
|
2953
|
+
class Buffer
|
2954
|
+
|
2955
|
+
#
|
2956
|
+
# Keeps tuples ordered on a specific key
|
2957
|
+
#
|
2958
|
+
class Sorted < Buffer
|
2959
|
+
|
2960
|
+
def initialize(ordering_key)
|
2961
|
+
@ordering_key = ordering_key
|
2962
|
+
@buffer = []
|
2963
|
+
end
|
2964
|
+
|
2965
|
+
def add_all(enum)
|
2966
|
+
sorter = @ordering_key.sorter
|
2967
|
+
@buffer = merge_sort(@buffer, enum.to_a.sort(&sorter), sorter)
|
2968
|
+
end
|
2969
|
+
|
2970
|
+
def each
|
2971
|
+
@buffer.each(&Proc.new)
|
2972
|
+
end
|
2973
|
+
|
2974
|
+
private
|
2975
|
+
|
2976
|
+
def merge_sort(s1, s2, sorter)
|
2977
|
+
(s1 + s2).sort(&sorter)
|
2978
|
+
end
|
2979
|
+
|
2980
|
+
end # class Buffer::Sorted
|
2981
|
+
|
2982
|
+
end # class Buffer
|
2983
|
+
|
2984
|
+
end # module Alf
|