alf 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. data/CHANGELOG.md +5 -0
  2. data/Gemfile +2 -0
  3. data/Gemfile.lock +42 -0
  4. data/LICENCE.md +22 -0
  5. data/Manifest.txt +15 -0
  6. data/README.md +769 -0
  7. data/Rakefile +23 -0
  8. data/TODO.md +26 -0
  9. data/alf.gemspec +191 -0
  10. data/alf.noespec +30 -0
  11. data/bin/alf +31 -0
  12. data/examples/autonum.alf +6 -0
  13. data/examples/cities.rash +4 -0
  14. data/examples/clip.alf +3 -0
  15. data/examples/compact.alf +2 -0
  16. data/examples/database.alf +6 -0
  17. data/examples/defaults.alf +3 -0
  18. data/examples/extend.alf +3 -0
  19. data/examples/group.alf +3 -0
  20. data/examples/intersect.alf +4 -0
  21. data/examples/join.alf +2 -0
  22. data/examples/minus.alf +8 -0
  23. data/examples/nest.alf +2 -0
  24. data/examples/nulls.rash +3 -0
  25. data/examples/parts.rash +6 -0
  26. data/examples/project.alf +2 -0
  27. data/examples/quota.alf +4 -0
  28. data/examples/rename.alf +3 -0
  29. data/examples/restrict.alf +2 -0
  30. data/examples/runall.sh +26 -0
  31. data/examples/schema.yaml +28 -0
  32. data/examples/sort.alf +4 -0
  33. data/examples/summarize.alf +16 -0
  34. data/examples/suppliers.rash +5 -0
  35. data/examples/supplies.rash +12 -0
  36. data/examples/ungroup.alf +4 -0
  37. data/examples/union.alf +3 -0
  38. data/examples/unnest.alf +4 -0
  39. data/examples/with.alf +23 -0
  40. data/lib/alf.rb +2984 -0
  41. data/lib/alf/loader.rb +1 -0
  42. data/lib/alf/renderer/text.rb +153 -0
  43. data/lib/alf/renderer/yaml.rb +22 -0
  44. data/lib/alf/version.rb +14 -0
  45. data/spec/aggregator_spec.rb +62 -0
  46. data/spec/alf_spec.rb +47 -0
  47. data/spec/assumptions_spec.rb +15 -0
  48. data/spec/environment/explicit_spec.rb +15 -0
  49. data/spec/environment/folder_spec.rb +30 -0
  50. data/spec/examples_spec.rb +26 -0
  51. data/spec/lispy_spec.rb +23 -0
  52. data/spec/operator/command_methods_spec.rb +38 -0
  53. data/spec/operator/non_relational/autonum_spec.rb +61 -0
  54. data/spec/operator/non_relational/clip_spec.rb +49 -0
  55. data/spec/operator/non_relational/compact/buffer_based.rb +30 -0
  56. data/spec/operator/non_relational/compact/sort_based_spec.rb +30 -0
  57. data/spec/operator/non_relational/compact_spec.rb +38 -0
  58. data/spec/operator/non_relational/defaults_spec.rb +55 -0
  59. data/spec/operator/non_relational/sort_spec.rb +66 -0
  60. data/spec/operator/relational/extend_spec.rb +34 -0
  61. data/spec/operator/relational/group_spec.rb +54 -0
  62. data/spec/operator/relational/intersect_spec.rb +58 -0
  63. data/spec/operator/relational/join/hash_based_spec.rb +63 -0
  64. data/spec/operator/relational/minus_spec.rb +56 -0
  65. data/spec/operator/relational/nest_spec.rb +32 -0
  66. data/spec/operator/relational/project_spec.rb +65 -0
  67. data/spec/operator/relational/quota_spec.rb +44 -0
  68. data/spec/operator/relational/rename_spec.rb +32 -0
  69. data/spec/operator/relational/restrict_spec.rb +56 -0
  70. data/spec/operator/relational/summarize/sort_based_spec.rb +31 -0
  71. data/spec/operator/relational/summarize_spec.rb +41 -0
  72. data/spec/operator/relational/ungroup_spec.rb +35 -0
  73. data/spec/operator/relational/union_spec.rb +35 -0
  74. data/spec/operator/relational/unnest_spec.rb +32 -0
  75. data/spec/reader/alf_file_spec.rb +15 -0
  76. data/spec/reader/input.rb +2 -0
  77. data/spec/reader/rash_spec.rb +31 -0
  78. data/spec/reader_spec.rb +27 -0
  79. data/spec/renderer/text/cell_spec.rb +34 -0
  80. data/spec/renderer/text/row_spec.rb +30 -0
  81. data/spec/renderer/text/table_spec.rb +39 -0
  82. data/spec/renderer_spec.rb +42 -0
  83. data/spec/spec_helper.rb +26 -0
  84. data/spec/tools/ordering_key_spec.rb +81 -0
  85. data/spec/tools/projection_key_spec.rb +83 -0
  86. data/spec/tools/tools_spec.rb +25 -0
  87. data/spec/tools/tuple_handle_spec.rb +78 -0
  88. data/tasks/debug_mail.rake +78 -0
  89. data/tasks/debug_mail.txt +13 -0
  90. data/tasks/gem.rake +68 -0
  91. data/tasks/spec_test.rake +79 -0
  92. data/tasks/unit_test.rake +77 -0
  93. data/tasks/yard.rake +51 -0
  94. metadata +282 -0
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env alf
2
+ (sort :suppliers, [:name])
3
+ (sort :suppliers, [:city, :name])
4
+ (sort :suppliers, [[:city, :desc], [:name, :asc]])
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env alf
2
+
3
+ # What is the sum of supplied quantities by supplier?
4
+ (summarize :supplies,
5
+ [:sid],
6
+ :total_qty => Agg::sum(:qty))
7
+
8
+ # Give the maximal supplied quantity by country, taking only into account
9
+ # suppliers that have a status greater than 10
10
+ (summarize \
11
+ (join \
12
+ (join (restrict :suppliers, lambda{ status > 10 }),
13
+ :supplies),
14
+ :cities),
15
+ [:country],
16
+ :maxqty => Agg::sum{ qty })
@@ -0,0 +1,5 @@
1
+ {:sid => 'S1', :name => 'Smith', :status => 20, :city => 'London'}
2
+ {:sid => 'S2', :name => 'Jones', :status => 10, :city => 'Paris'}
3
+ {:sid => 'S3', :name => 'Blake', :status => 30, :city => 'Paris'}
4
+ {:sid => 'S4', :name => 'Clark', :status => 20, :city => 'London'}
5
+ {:sid => 'S5', :name => 'Adams', :status => 30, :city => 'Athens'}
@@ -0,0 +1,12 @@
1
+ {:sid => 'S1', :pid => 'P1', :qty => 300}
2
+ {:sid => 'S1', :pid => 'P2', :qty => 200}
3
+ {:sid => 'S1', :pid => 'P3', :qty => 400}
4
+ {:sid => 'S1', :pid => 'P4', :qty => 200}
5
+ {:sid => 'S1', :pid => 'P5', :qty => 100}
6
+ {:sid => 'S1', :pid => 'P6', :qty => 100}
7
+ {:sid => 'S2', :pid => 'P1', :qty => 300}
8
+ {:sid => 'S2', :pid => 'P2', :qty => 400}
9
+ {:sid => 'S3', :pid => 'P2', :qty => 200}
10
+ {:sid => 'S4', :pid => 'P2', :qty => 200}
11
+ {:sid => 'S4', :pid => 'P4', :qty => 300}
12
+ {:sid => 'S4', :pid => 'P5', :qty => 400}
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env alf
2
+ (ungroup \
3
+ (group :supplies, [:pid, :qty], :supplying),
4
+ :supplying)
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env alf
2
+ (union (project :suppliers, [:city]),
3
+ (project :parts, [:city]))
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env alf
2
+ (unnest \
3
+ (nest :suppliers, [:city, :status], :loc_and_status),
4
+ :loc_and_status)
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env alf
2
+
3
+ # Compute the total qty supplied in each country together with the subset
4
+ # of products shipped there. Only consider suppliers that have a status
5
+ # greater than 10, however.
6
+ (summarize \
7
+ (join \
8
+ (join (restrict :suppliers, lambda{ status > 10 }),
9
+ :supplies),
10
+ :cities),
11
+ [:country],
12
+ :which => Agg::group(:pid),
13
+ :total => Agg::sum{ qty })
14
+
15
+ # Another equivalent way to write it
16
+ with( :kept_suppliers => (restrict :suppliers, lambda{ status > 10 }),
17
+ :with_countries => (join :kept_suppliers, :cities),
18
+ :supplying => (join :with_countries, :supplies) ) do
19
+ (summarize :supplying,
20
+ [:country],
21
+ :which => Agg::group(:pid),
22
+ :total => Agg::sum{ qty })
23
+ end
@@ -0,0 +1,2984 @@
1
+ require "enumerator"
2
+ require "stringio"
3
+ require "alf/version"
4
+ require "alf/loader"
5
+
6
+ #
7
+ # Classy data-manipulation dressed in a DSL (+ commandline)
8
+ #
9
+ module Alf
10
+
11
+ #
12
+ # Provides tooling methods that are used here and there in Alf.
13
+ #
14
+ module Tools
15
+
16
+ #
17
+ # Returns the unqualified name of a ruby class or module
18
+ #
19
+ # Example
20
+ #
21
+ # class_name(Alf::Tools) -> :Tools
22
+ #
23
+ def class_name(clazz)
24
+ clazz.name.to_s =~ /([A-Za-z0-9_]+)$/
25
+ $1.to_sym
26
+ end
27
+
28
+ #
29
+ # Converts an unqualified class or module name to a ruby case method name.
30
+ #
31
+ # Example
32
+ #
33
+ # ruby_case(:Alf) -> "alf"
34
+ # ruby_case(:HelloWorld) -> "hello_world"
35
+ #
36
+ def ruby_case(s)
37
+ s.to_s.gsub(/[A-Z]/){|x| "_#{x.downcase}"}[1..-1]
38
+ end
39
+
40
+ #
41
+ # Returns the first non nil values from arguments
42
+ #
43
+ # Example
44
+ #
45
+ # coalesce(nil, 1, "abc") -> 1
46
+ #
47
+ def coalesce(*args)
48
+ args.find{|x| !x.nil?}
49
+ end
50
+
51
+ #
52
+ # Iterates over enum and yields the block on each element.
53
+ # Collect block results as key/value pairs returns them as
54
+ # a Hash.
55
+ #
56
+ def tuple_collect(enum)
57
+ tuple = {}
58
+ enum.each do |elm|
59
+ k, v = yield(elm)
60
+ tuple[k] = v
61
+ end
62
+ tuple
63
+ end
64
+
65
+ #
66
+ # Provides a handle, implementing a flyweight design pattern on tuples.
67
+ #
68
+ class TupleHandle
69
+
70
+ # Creates an handle instance
71
+ def initialize
72
+ @tuple = nil
73
+ end
74
+
75
+ #
76
+ # Sets the next tuple to use.
77
+ #
78
+ # This method installs the handle as a side effect
79
+ # on first call.
80
+ #
81
+ def set(tuple)
82
+ build(tuple) if @tuple.nil?
83
+ @tuple = tuple
84
+ self
85
+ end
86
+
87
+ #
88
+ # Compiles a tuple expression and returns a lambda
89
+ # instance that can be passed to evaluate later.
90
+ #
91
+ def self.compile(expr)
92
+ case expr
93
+ when Proc
94
+ expr
95
+ when NilClass
96
+ compile('true')
97
+ when Hash
98
+ if expr.empty?
99
+ compile(nil)
100
+ else
101
+ # TODO: replace inspect by to_ruby
102
+ compile expr.each_pair.collect{|k,v|
103
+ "(#{k} == #{v.inspect})"
104
+ }.join(" && ")
105
+ end
106
+ when Array
107
+ compile(Hash[*expr])
108
+ when String, Symbol
109
+ eval("lambda{ #{expr} }")
110
+ else
111
+ raise ArgumentError, "Unable to compile #{expr} to a TupleHandle"
112
+ end
113
+ end
114
+
115
+ #
116
+ # Evaluates an expression on the current tuple. Expression
117
+ # can be a lambda or a string (immediately compiled in the
118
+ # later case).
119
+ #
120
+ def evaluate(expr)
121
+ if RUBY_VERSION < "1.9"
122
+ instance_eval(&TupleHandle.compile(expr))
123
+ else
124
+ instance_exec(&TupleHandle.compile(expr))
125
+ end
126
+ end
127
+
128
+ private
129
+
130
+ #
131
+ # Builds this handle with a tuple.
132
+ #
133
+ # This method should be called only once and installs
134
+ # instance methods on the handle with keys of _tuple_.
135
+ #
136
+ def build(tuple)
137
+ tuple.keys.each do |k|
138
+ (class << self; self; end).send(:define_method, k) do
139
+ @tuple[k]
140
+ end
141
+ end
142
+ end
143
+
144
+ end # class TupleHandle
145
+
146
+ #
147
+ # Defines a projection key
148
+ #
149
+ class ProjectionKey
150
+ include Tools
151
+
152
+ # Projection attributes
153
+ attr_accessor :attributes
154
+
155
+ # Allbut projection?
156
+ attr_accessor :allbut
157
+
158
+ def initialize(attributes, allbut = false)
159
+ @attributes = attributes
160
+ @allbut = allbut
161
+ end
162
+
163
+ def self.coerce(arg)
164
+ case arg
165
+ when Array
166
+ ProjectionKey.new(arg, false)
167
+ when OrderingKey
168
+ ProjectionKey.new(arg.attributes, false)
169
+ when ProjectionKey
170
+ arg
171
+ else
172
+ raise ArgumentError, "Unable to coerce #{arg} to a projection key"
173
+ end
174
+ end
175
+
176
+ def to_ordering_key
177
+ OrderingKey.new attributes.collect{|arg|
178
+ [arg, :asc]
179
+ }
180
+ end
181
+
182
+ def project(tuple)
183
+ split(tuple).first
184
+ end
185
+
186
+ def split(tuple)
187
+ projection, rest = {}, tuple.dup
188
+ attributes.each do |a|
189
+ projection[a] = tuple[a]
190
+ rest.delete(a)
191
+ end
192
+ @allbut ? [rest, projection] : [projection, rest]
193
+ end
194
+
195
+ end # class ProjectionKey
196
+
197
+ #
198
+ # Encapsulates tools for computing orders on tuples
199
+ #
200
+ class OrderingKey
201
+
202
+ attr_reader :ordering
203
+
204
+ def initialize(ordering = [])
205
+ @ordering = ordering
206
+ @sorter = nil
207
+ end
208
+
209
+ def self.coerce(arg)
210
+ case arg
211
+ when Array
212
+ if arg.all?{|a| a.is_a?(Symbol)}
213
+ arg = arg.collect{|a| [a, :asc]}
214
+ end
215
+ OrderingKey.new(arg)
216
+ when ProjectionKey
217
+ arg.to_ordering_key
218
+ when OrderingKey
219
+ arg
220
+ else
221
+ raise ArgumentError, "Unable to coerce #{arg} to an ordering key"
222
+ end
223
+ end
224
+
225
+ def attributes
226
+ @ordering.collect{|arg| arg.first}
227
+ end
228
+
229
+ def order_by(attr, order = :asc)
230
+ @ordering << [attr, order]
231
+ @sorter = nil
232
+ self
233
+ end
234
+
235
+ def order_of(attr)
236
+ @ordering.find{|arg| arg.first == attr}.last
237
+ end
238
+
239
+ def compare(t1,t2)
240
+ @ordering.each do |attr,order|
241
+ comp = (t1[attr] <=> t2[attr])
242
+ comp *= -1 if order == :desc
243
+ return comp unless comp == 0
244
+ end
245
+ return 0
246
+ end
247
+
248
+ def sorter
249
+ @sorter ||= lambda{|t1,t2| compare(t1, t2)}
250
+ end
251
+
252
+ def +(other)
253
+ other = OrderingKey.coerce(other)
254
+ OrderingKey.new(@ordering + other.ordering)
255
+ end
256
+
257
+ end # class OrderingKey
258
+
259
+ extend Tools
260
+ end # module Tools
261
+
262
+ #
263
+ # Builds and returns a lispy engine on a specific environment.
264
+ #
265
+ # Example(s):
266
+ #
267
+ # # Returns a lispy instance on the default environment
268
+ # lispy = Alf.lispy
269
+ #
270
+ # # Returns a lispy instance on the examples' environment
271
+ # lispy = Alf.lispy(Alf::Environment.examples)
272
+ #
273
+ # # Returns a lispy instance on a folder environment of your choice
274
+ # lispy = Alf.lispy(Alf::Environment.folder('path/to/a/folder'))
275
+ #
276
+ # @see Alf::Environment about available environments and their contract
277
+ #
278
+ def self.lispy(env = Alf::Environment.default)
279
+ Command::Main.new(env)
280
+ end
281
+
282
+ #
283
+ # Implements a small LISP-like DSL on top of Alf.
284
+ #
285
+ # The lispy dialect is the functional one used in .alf files and in compiled
286
+ # expressions as below:
287
+ #
288
+ # Alf.lispy.compile do
289
+ # (restrict :suppliers, lambda{ city == 'London' })
290
+ # end
291
+ #
292
+ # The DSL this module provides is part of Alf's public API and won't be broken
293
+ # without a major version change. The module itself and its inclusion pre-
294
+ # conditions are not part of the DSL itself, thus not considered as part of
295
+ # the API, and may therefore evolve at any time. In other words, this module
296
+ # is not intended to be directly included by third-party classes.
297
+ #
298
+ module Lispy
299
+
300
+ # The environment
301
+ attr_accessor :environment
302
+
303
+ #
304
+ # Compiles a query expression given by a String or a block and returns
305
+ # the result (typically a tuple iterator)
306
+ #
307
+ def compile(expr = nil, &block)
308
+ expr.nil? ? instance_eval(&block) : instance_eval(expr)
309
+ end
310
+
311
+ # Delegated to the environment
312
+ def dataset(name)
313
+ raise "Environment not set" unless @environment
314
+ @environment.dataset(name)
315
+ end
316
+
317
+ #
318
+ # Compiles the subexpression given by the block in the context of
319
+ # additional temporary expressions given by definitions
320
+ #
321
+ def with(definitions)
322
+ # We branch with the definitions for compilation
323
+ self.environment = environment.branch(definitions)
324
+
325
+ # this is to ensure that sub definitions can reuse other
326
+ # ones
327
+ definitions.each_value do |defn|
328
+ defn.environment = self.environment
329
+ end
330
+
331
+ # compile now
332
+ op = compile(&Proc.new)
333
+
334
+ # We now unbranch for next expression
335
+ self.environment = environment.unbranch
336
+
337
+ op
338
+ end
339
+
340
+ #
341
+ # Chains some elements as a new operator
342
+ #
343
+ def chain(*elements)
344
+ elements = elements.reverse
345
+ elements[1..-1].inject(elements.first) do |c, elm|
346
+ elm.pipe(c, environment)
347
+ elm
348
+ end
349
+ end
350
+
351
+ [ :Autonum, :Clip, :Compact, :Defaults, :Sort ].each do |op_name|
352
+ meth_name = Tools.ruby_case(op_name).to_sym
353
+ define_method(meth_name) do |child, *args|
354
+ chain(Operator::NonRelational.const_get(op_name).new(*args), child)
355
+ end
356
+ end
357
+
358
+ [:Project,
359
+ :Extend,
360
+ :Rename,
361
+ :Restrict,
362
+ :Nest,
363
+ :Unnest,
364
+ :Group,
365
+ :Ungroup,
366
+ :Summarize,
367
+ :Quota ].each do |op_name|
368
+ meth_name = Tools.ruby_case(op_name).to_sym
369
+ define_method(meth_name) do |child, *args|
370
+ chain(Operator::Relational.const_get(op_name).new(*args), child)
371
+ end
372
+ end
373
+
374
+ def allbut(child, attributes)
375
+ chain(Operator::Relational::Project.new(attributes, true), child)
376
+ end
377
+
378
+ [ :Join,
379
+ :Union,
380
+ :Intersect,
381
+ :Minus ].each do |op_name|
382
+ meth_name = Tools.ruby_case(op_name).to_sym
383
+ define_method(meth_name) do |left, right, *args|
384
+ chain(Operator::Relational.const_get(op_name).new(*args), [left, right])
385
+ end
386
+ end
387
+
388
+ end # module Lispy
389
+
390
+ #
391
+ # Encapsulates the interface with the outside world, providing base iterators
392
+ # for named datasets, among others.
393
+ #
394
+ # An environment is typically obtained through the factory defined by this
395
+ # class:
396
+ #
397
+ # # Returns the default environment (examples, for now)
398
+ # Alf::Environment.default
399
+ #
400
+ # # Returns an environment on Alf's examples
401
+ # Alf::Environment.examples
402
+ #
403
+ # # Returns an environment on a specific folder, automatically
404
+ # # resolving datasources via Readers' recognized file extensions
405
+ # Alf::Environment.folder('path/to/a/folder')
406
+ #
407
+ # You can implement your own environment by subclassing this class and
408
+ # implementing the {#dataset} method. As additional support is implemented
409
+ # in the base class, Environment should never be mimiced.
410
+ #
411
+ class Environment
412
+
413
+ #
414
+ # Returns a dataset whose name is provided.
415
+ #
416
+ # This method resolves named datasets to tuple enumerables. When the
417
+ # dataset exists, this method must return an Iterator, typically a
418
+ # Reader instance. Otherwise, it must throw a NoSuchDatasetError.
419
+ #
420
+ # @param [Symbol] name the name of a dataset
421
+ # @return [Iterator] an iterator, typically a Reader instance
422
+ # @raise [NoSuchDatasetError] when the dataset does not exists
423
+ #
424
+ def dataset(name)
425
+ end
426
+ undef :dataset
427
+
428
+ #
429
+ # Branches this environment and puts some additional explicit
430
+ # definitions.
431
+ #
432
+ # This method is provided for (with ...) expressions and should not
433
+ # be overriden by subclasses.
434
+ #
435
+ # @param [Hash] a set of (name, Iterator) pairs.
436
+ # @return [Environment] an environment instance with new definitions set
437
+ #
438
+ def branch(defs)
439
+ Explicit.new(defs, self)
440
+ end
441
+
442
+ #
443
+ # Specialization of Environment that works with explicitely defined
444
+ # datasources and allow branching and unbranching.
445
+ #
446
+ class Explicit < Environment
447
+
448
+ #
449
+ # Creates a new environment instance with initial definitions
450
+ # and optional child environment.
451
+ #
452
+ def initialize(defs = {}, child = nil)
453
+ @defs = defs
454
+ @child = child
455
+ end
456
+
457
+ #
458
+ # Unbranches this environment and returns its child
459
+ #
460
+ def unbranch
461
+ @child
462
+ end
463
+
464
+ # (see Environment#dataset)
465
+ def dataset(name)
466
+ if @defs.has_key?(name)
467
+ @defs[name]
468
+ elsif @child
469
+ @child.dataset(name)
470
+ else
471
+ raise "No such dataset #{name}"
472
+ end
473
+ end
474
+
475
+ end # class Explicit
476
+
477
+ #
478
+ # Specialization of Environment to work on files of a given folder.
479
+ #
480
+ # This kind of environment resolves datasets by simply looking at
481
+ # recognized files in a specific folder. "Recognized" files are simply
482
+ # those for which a Reader subclass has been previously registered.
483
+ # This environment then serves reader instances.
484
+ #
485
+ class Folder < Environment
486
+
487
+ #
488
+ # Creates an environment instance, wired to the specified folder.
489
+ #
490
+ # @param [String] folder path to the folder to use as dataset source
491
+ #
492
+ def initialize(folder)
493
+ @folder = folder
494
+ end
495
+
496
+ # (see Environment#dataset)
497
+ def dataset(name)
498
+ if file = find_file(name)
499
+ Reader.reader(file, self)
500
+ else
501
+ raise "No such dataset #{name} (#{@folder})"
502
+ end
503
+ end
504
+
505
+ protected
506
+
507
+ def find_file(name)
508
+ # TODO: refactor this, because it allows getting out of the folder
509
+ if File.exists?(name.to_s)
510
+ name.to_s
511
+ elsif File.exists?(explicit = File.join(@folder, name.to_s)) &&
512
+ File.file?(explicit)
513
+ explicit
514
+ else
515
+ Dir[File.join(@folder, "#{name}.*")].find do |f|
516
+ File.file?(f)
517
+ end
518
+ end
519
+ end
520
+
521
+ end # class Folder
522
+
523
+ #
524
+ # Factors a Folder environment on a specific path
525
+ #
526
+ def self.folder(path)
527
+ Folder.new(path)
528
+ end
529
+
530
+ #
531
+ # Returns the default environment
532
+ #
533
+ def self.default
534
+ examples
535
+ end
536
+
537
+ #
538
+ # Returns the examples environment
539
+ #
540
+ def self.examples
541
+ folder File.expand_path('../../examples', __FILE__)
542
+ end
543
+
544
+ end # class Environment
545
+
546
+ #
547
+ # Marker module for all elements implementing tuple iterators.
548
+ #
549
+ # At first glance, an iterator is nothing else than an Enumerable that serves
550
+ # tuples (represented by ruby hashes). However, this module helps Alf's internal
551
+ # classes to recognize enumerables that may safely be considered as tuple
552
+ # iterators from other enumerables. For this reason, all elements that would
553
+ # like to participate to an iteration chain (that is, an logical operator
554
+ # implementation) should be marked with this module. This is the case for
555
+ # all Readers and Operators defined in Alf.
556
+ #
557
+ # Moreover, an Iterator should always define a {#pipe} method, which is the
558
+ # natural way to define the input and execution environment of operators and
559
+ # readers.
560
+ #
561
+ module Iterator
562
+ include Enumerable
563
+
564
+ #
565
+ # Wire the iterator input and an optional execution environment.
566
+ #
567
+ # Iterators (typically Reader and Operator instances) work from input data
568
+ # that come from files, or other operators, and so on. This method wires
569
+ # this input data to the iterator. Wiring is required before any attempt
570
+ # to call each, unless autowiring occurs at construction. The exact kind of
571
+ # input object is left at discretion of Iterator implementations.
572
+ #
573
+ # @param [Object] input the iterator input, at discretion of the Iterator
574
+ # implementation.
575
+ # @param [Environment] environment an optional environment for resolving
576
+ # named datasets if needed.
577
+ #
578
+ def pipe(input, environment = nil)
579
+ end
580
+ undef :pipe
581
+
582
+
583
+ #
584
+ # Coerces something to an iterator
585
+ #
586
+ def self.coerce(arg, env)
587
+ case arg
588
+ when Iterator, Array
589
+ arg
590
+ else
591
+ Reader.coerce(arg, env)
592
+ end
593
+ end
594
+
595
+ end # module Iterator
596
+
597
+ #
598
+ # Implements an Iterator at the interface with the outside world.
599
+ #
600
+ # The contrat of a Reader is simply to be an Iterator. Unlike operators,
601
+ # however, readers are not expected to take other iterators as input, but IO
602
+ # objects, database tables, or something similar instead. This base class
603
+ # provides a default behavior for readers that works with IO objects. It can
604
+ # be safely extended, overriden, or even mimiced (provided that you include
605
+ # and implement the Iterator contract).
606
+ #
607
+ # This class also provides a registration mechanism to help getting Reader
608
+ # instances for specific file extensions. A typical scenario for using this
609
+ # registration mechanism is as follows:
610
+ #
611
+ # # Registers a reader kind named :foo, associated with ".foo" file
612
+ # # extensions and the FooFileDecoder class (typically a subclass of
613
+ # # Reader)
614
+ # Reader.register(:foo, [".foo"], FooFileDecoder)
615
+ #
616
+ # # Later on, you can request a reader instance for a .foo file, as
617
+ # # illustrated below.
618
+ # r = Reader.reader('/a/path/to/a/file.foo')
619
+ #
620
+ # # Also, a factory method is automatically installed on the Reader class
621
+ # # itself. This factory method can be used with a String, or an IO object.
622
+ # r = Reader.foo([a path or a IO object])
623
+ #
624
+ class Reader
625
+ include Iterator
626
+
627
+ # Registered readers
628
+ @@readers = []
629
+
630
+ #
631
+ # Registers a reader class associated with specific file extensions
632
+ #
633
+ # Registered class must provide a constructor with the following signature
634
+ # <code>new(path_or_io, environment = nil)</code>. The name must be a symbol
635
+ # which can safely be used as a ruby method name. A factory class method of
636
+ # that name and same signature is automatically installed on the Reader
637
+ # class.
638
+ #
639
+ # @param [Symbol] name a name for the kind of data decoded
640
+ # @param [Array] extensions file extensions mapped to the registered reader
641
+ # class (should include the '.', e.g. '.foo')
642
+ # @param [Class] class Reader subclass used to decode this kind of files
643
+ #
644
+ def self.register(name, extensions, clazz)
645
+ @@readers << [name, extensions, clazz]
646
+ (class << self; self; end).
647
+ send(:define_method, name) do |*args|
648
+ clazz.new(*args)
649
+ end
650
+ end
651
+
652
+ #
653
+ # Returns a reader instance for a specific file whose path is given
654
+ # as argument.
655
+ #
656
+ # @param [String] filepath path to a file for which extension is recognized
657
+ # @param [Array] args optional additional arguments that must be passed at
658
+ # reader's class new method.
659
+ # @return [Reader] a reader instance
660
+ #
661
+ def self.reader(filepath, *args)
662
+ ext = File.extname(filepath)
663
+ if registered = @@readers.find{|r| r[1].include?(ext)}
664
+ registered[2].new(filepath, *args)
665
+ else
666
+ raise "No registered reader for #{ext} (#{filepath})"
667
+ end
668
+ end
669
+
670
+ #
671
+ # Coerces an argument to a reader, using an optional environment to convert
672
+ # named datasets.
673
+ #
674
+ # This method automatically provides readers for Strings and Symbols through
675
+ # passed environment (**not** through the reader factory) and for IO objects
676
+ # (through Rash reader). It is part if Alf's internals and should be used
677
+ # with care.
678
+ #
679
+ def self.coerce(arg, environment = nil)
680
+ case arg
681
+ when Reader
682
+ arg
683
+ when IO
684
+ rash(arg, environment)
685
+ when String, Symbol
686
+ if environment
687
+ environment.dataset(arg.to_sym)
688
+ else
689
+ raise "No environment set"
690
+ end
691
+ else
692
+ raise ArgumentError, "Unable to coerce #{arg.inspect} to a reader"
693
+ end
694
+ end
695
+
696
+ # @return [Environment] Wired environment
697
+ attr_accessor :environment
698
+
699
+ # @return [String or IO] Input IO, or file name
700
+ attr_accessor :input
701
+
702
+ #
703
+ # Creates a reader instance, with an optional input and environment wiring.
704
+ #
705
+ # @param [String or IO] path to a file or IO object for input
706
+ # @param [Environment] environment wired environment, serving this reader
707
+ #
708
+ def initialize(input = nil, environment = nil)
709
+ @input = input
710
+ @environment = environment
711
+ end
712
+
713
+ #
714
+ # (see Iterator#pipe)
715
+ #
716
+ def pipe(input, env = environment)
717
+ @input = input
718
+ end
719
+
720
+ #
721
+ # (see Iterator#each)
722
+ #
723
+ # @private the default implementation reads lines of the input stream and
724
+ # yields the block with <code>line2tuple(line)</code> on each of them. This
725
+ # method may be overriden if this behavior does not fit reader's needs.
726
+ #
727
+ def each
728
+ each_input_line do |line|
729
+ tuple = line2tuple(line)
730
+ yield tuple unless tuple.nil?
731
+ end
732
+ end
733
+
734
+ protected
735
+
736
+ #
737
+ # Coerces the input object to an IO and yields the block with it.
738
+ #
739
+ # StringIO and IO input are yield directly while file paths are first
740
+ # opened in read mode and then yield.
741
+ #
742
+ def with_input_io
743
+ case input
744
+ when IO, StringIO
745
+ yield input
746
+ when String
747
+ File.open(input, 'r'){|io| yield io}
748
+ else
749
+ raise "Unable to convert #{input} to an IO object"
750
+ end
751
+ end
752
+
753
+ #
754
+ # Returns the whole input text.
755
+ #
756
+ # This feature should only be used by subclasses on inputs that are
757
+ # small enough to fit in memory. Consider implementing readers without this
758
+ # feature on files that could be larger.
759
+ #
760
+ def input_text
761
+ with_input_io{|io| io.readlines.join}
762
+ end
763
+
764
+ #
765
+ # Yields the block with each line of the input text in turn.
766
+ #
767
+ # This method is an helper for files that capture one tuple on each input
768
+ # line. It should be used in those cases, as the resulting reader will not
769
+ # load all input in memory but serve tuples on demand.
770
+ #
771
+ def each_input_line
772
+ with_input_io{|io| io.each_line(&Proc.new)}
773
+ end
774
+
775
+ #
776
+ # Converts a line previously read from the input stream to a tuple.
777
+ #
778
+ # The line is simply ignored is this method return nil. Errors should be
779
+ # properly handled by raising exceptions. This method MUST be implemented
780
+ # by subclasses unless each is overriden.
781
+ #
782
+ def line2tuple(line)
783
+ end
784
+ undef :line2tuple
785
+
786
+ #
787
+ # Specialization of the Reader contract for .rash files.
788
+ #
789
+ # A .rash file/stream contains one ruby hash literal on each line. This
790
+ # reader simply decodes each of them in turn with Kernel.eval, providing a
791
+ # state-less reader (that is, tuples are not all loaded in memory at once).
792
+ #
793
+ class Rash < Reader
794
+
795
+ # (see Reader#line2tuple)
796
+ def line2tuple(line)
797
+ begin
798
+ h = Kernel.eval(line)
799
+ raise "hash expected, got #{h}" unless h.is_a?(Hash)
800
+ rescue Exception => ex
801
+ $stderr << "Skipping #{line.strip}: #{ex.message}\n"
802
+ nil
803
+ else
804
+ return h
805
+ end
806
+ end
807
+
808
+ Reader.register(:rash, [".rash"], self)
809
+ end # class Rash
810
+
811
+ #
812
+ # Specialization of the Reader contrat for .alf files.
813
+ #
814
+ # A .alf file simply contains a query expression in the Lispy DSL. This
815
+ # reader decodes and compiles the expression and delegates the enumeration
816
+ # to the obtained operator.
817
+ #
818
+ # Note that an Environment must be wired at creation or piping time.
819
+ # NoSuchDatasetError will certainly occur otherwise.
820
+ #
821
+ class AlfFile < Reader
822
+
823
+ # (see Reader#each)
824
+ def each
825
+ op = Alf.lispy(environment).compile(input_text)
826
+ op.each(&Proc.new)
827
+ end
828
+
829
+ Reader.register(:alf, [".alf"], self)
830
+ end # module AlfFile
831
+
832
+ end # module Reader
833
+
834
+ #
835
+ # Renders a relation (given by any Iterator) in a specific format.
836
+ #
837
+ # A renderer takes an Iterator instance as input and renders it on an output
838
+ # stream. Renderers are **not** iterators themselves, even if they mimic the
839
+ # {#pipe} method. Their usage is made via the {#execute} method.
840
+ #
841
+ # Similarly to the {Reader} class, this one provides a registration mechanism
842
+ # for specific output formats. The common scenario is as follows:
843
+ #
844
+ # # Register a new renderer for :foo format (automatically provides the
845
+ # # '--foo Render output as a foo stream' option of 'alf show') and with
846
+ # # the FooRenderer class for handling rendering.
847
+ # Renderer.register(:foo, "as a foo stream", FooRenderer)
848
+ #
849
+ # # Later on, you can request a renderer instance for a specific format
850
+ # # as follows (wiring input is optional)
851
+ # r = Renderer.renderer(:foo, [an Iterator])
852
+ #
853
+ # # Also, a factory method is automatically installed on the Renderer class
854
+ # # itself.
855
+ # r = Renderer.foo([an Iterator])
856
+ #
857
+ class Renderer
858
+
859
+ # Registered renderers
860
+ @@renderers = []
861
+
862
+ #
863
+ # Register a renderering class with a given name and description.
864
+ #
865
+ # Registered class must at least provide a constructor with an empty
866
+ # signature. The name must be a symbol which can safely be used as a ruby
867
+ # method name. A factory class method of that name and degelation signature
868
+ # is automatically installed on the Renderer class.
869
+ #
870
+ # @param [Symbol] name a name for the output format
871
+ # @param [String] description an output format description (for 'alf show')
872
+ # @param [Class] clazz Renderer subclass used to render in this format
873
+ #
874
+ def self.register(name, description, clazz)
875
+ @@renderers << [name, description, clazz]
876
+ (class << self; self; end).
877
+ send(:define_method, name) do |*args|
878
+ clazz.new(*args)
879
+ end
880
+ end
881
+
882
+ #
883
+ # Returns a Renderer instance for the given output format name.
884
+ #
885
+ # @param [Symbol] name name of an output format previously registered
886
+ # @param [...] args other arguments to pass to the renderer constructor
887
+ # @return [Renderer] a Renderer instance, already wired if args are
888
+ # provided
889
+ #
890
+ def self.renderer(name, *args)
891
+ if r = @@renderers.find{|triple| triple[0] == name}
892
+ r[2].new(*args)
893
+ else
894
+ raise "No renderer registered for #{name}"
895
+ end
896
+ end
897
+
898
+ #
899
+ # Yields each (name,description,clazz) previously registered in turn
900
+ #
901
+ def self.each_renderer
902
+ @@renderers.each(&Proc.new)
903
+ end
904
+
905
+ # Renderer input (typically an Iterator)
906
+ attr_accessor :input
907
+
908
+ # @return [Environment] Optional wired environment
909
+ attr_accessor :environment
910
+
911
+ #
912
+ # Creates a renderer instance, optionally wired to an input
913
+ #
914
+ def initialize(input = nil)
915
+ @input = input
916
+ end
917
+
918
+ #
919
+ # Sets the renderer input.
920
+ #
921
+ # This method mimics {Iterator#pipe} and have the same contract.
922
+ #
923
+ def pipe(input, env = environment)
924
+ self.environment = env
925
+ self.input = input
926
+ end
927
+
928
+ #
929
+ # Executes the rendering, outputting the resulting tuples on the provided
930
+ # output buffer.
931
+ #
932
+ # The default implementation simply coerces the input as an Iterator and
933
+ # delegates the call to {#render}.
934
+ #
935
+ def execute(output = $stdout)
936
+ render(Iterator.coerce(input, environment), output)
937
+ end
938
+
939
+ protected
940
+
941
+ #
942
+ # Renders tuples served by the iterator to the output buffer provided and
943
+ # returns the latter.
944
+ #
945
+ # This method must be implemented by subclasses unless {#execute} is
946
+ # overriden.
947
+ #
948
+ def render(iterator, output)
949
+ end
950
+ undef :render
951
+
952
+ #
953
+ # Implements the Renderer contract through inspect
954
+ #
955
+ class Rash < Renderer
956
+
957
+ # (see Renderer#render)
958
+ def render(input, output)
959
+ input.each do |tuple|
960
+ output << tuple.inspect << "\n"
961
+ end
962
+ output
963
+ end
964
+
965
+ Renderer.register(:rash, "as ruby hashes", self)
966
+ end # class Rash
967
+
968
+ require "alf/renderer/text"
969
+ require "alf/renderer/yaml"
970
+ end # module Renderer
971
+
972
+ #
973
+ # Provides a factory over Alf operators and handles the interface with
974
+ # Quickl for commandline support.
975
+ #
976
+ # This module is part of Alf's internal architecture and should not be used
977
+ # at all by third-party projects.
978
+ #
979
+ module Factory
980
+
981
+ # @see Quickl::Command
982
+ def Command(file, line)
983
+ Quickl::Command(file, line){|builder|
984
+ builder.command_parent = Alf::Command::Main
985
+ yield(builder) if block_given?
986
+ }
987
+ end
988
+
989
+ # @see Operator
990
+ def Operator(file, line)
991
+ Command(file, line) do |b|
992
+ b.instance_module Alf::Operator
993
+ end
994
+ end
995
+
996
+ extend Factory
997
+ end # module Factory
998
+
999
+ #
1000
+ # Marker module and namespace for Alf main commands, those that are **not**
1001
+ # operators at all.
1002
+ #
1003
+ module Command
1004
+
1005
+ #
1006
+ # alf - Classy data-manipulation dressed in a DSL (+ commandline)
1007
+ #
1008
+ # SYNOPSIS
1009
+ # alf [--version] [--help]
1010
+ # alf -e '(lispy command)'
1011
+ # alf [FILE.alf]
1012
+ # alf [alf opts] OPERATOR [operator opts] ARGS ...
1013
+ # alf help OPERATOR
1014
+ #
1015
+ # OPTIONS
1016
+ # #{summarized_options}
1017
+ #
1018
+ # RELATIONAL COMMANDS
1019
+ # #{summarized_subcommands subcommands.select{|cmd|
1020
+ # cmd.include?(Alf::Operator::Relational)
1021
+ # }}
1022
+ #
1023
+ # NON-RELATIONAL COMMANDS
1024
+ # #{summarized_subcommands subcommands.select{|cmd|
1025
+ # cmd.include?(Alf::Operator::NonRelational)
1026
+ # }}
1027
+ #
1028
+ # OTHER NON-RELATIONAL COMMANDS
1029
+ # #{summarized_subcommands subcommands.select{|cmd|
1030
+ # cmd.include?(Alf::Command)
1031
+ # }}
1032
+ #
1033
+ # See '#{program_name} help COMMAND' for details about a specific command.
1034
+ #
1035
+ class Main < Quickl::Delegator(__FILE__, __LINE__)
1036
+ include Command, Lispy
1037
+
1038
+ # Environment instance to use to get base iterators
1039
+ attr_accessor :environment
1040
+
1041
+ # Output renderer
1042
+ attr_accessor :renderer
1043
+
1044
+ # Creates a command instance
1045
+ def initialize(env = Environment.default)
1046
+ @environment = env
1047
+ end
1048
+
1049
+ # Install options
1050
+ options do |opt|
1051
+ @execute = false
1052
+ opt.on("-e", "--execute", "Execute one line of script (Lispy API)") do
1053
+ @execute = true
1054
+ end
1055
+
1056
+ @renderer = Renderer::Rash.new
1057
+ Renderer.each_renderer do |name,descr,clazz|
1058
+ opt.on("--#{name}", "Render output #{descr}"){
1059
+ @renderer = clazz.new
1060
+ }
1061
+ end
1062
+
1063
+ opt.on('--env=FOLDER',
1064
+ "Set the environment folder to use") do |value|
1065
+ @environment = Environment.folder(value)
1066
+ end
1067
+
1068
+ opt.on_tail('-h', "--help", "Show help") do
1069
+ raise Quickl::Help
1070
+ end
1071
+
1072
+ opt.on_tail('-v', "--version", "Show version") do
1073
+ raise Quickl::Exit, "#{program_name} #{Alf::VERSION}"\
1074
+ " (c) 2011, Bernard Lambeau"
1075
+ end
1076
+ end # Alf's options
1077
+
1078
+ #
1079
+ # Overrided because Quickl only keep --options but modifying it there
1080
+ # should probably be considered a broken API.
1081
+ #
1082
+ def _run(argv = [])
1083
+
1084
+ # 1) Extract my options and parse them
1085
+ my_argv = []
1086
+ while argv.first =~ /^-/
1087
+ my_argv << argv.shift
1088
+ end
1089
+ parse_options(my_argv)
1090
+
1091
+ # 2) build the operator according to -e option
1092
+ operator = if @execute
1093
+ instance_eval(argv.first)
1094
+ else
1095
+ super
1096
+ end
1097
+
1098
+ # 3) if there is a requester, then we do the job (assuming bin/alf)
1099
+ # with the renderer to use. Otherwise, we simply return built operator
1100
+ if operator && requester
1101
+ chain(renderer, operator).execute($stdout)
1102
+ else
1103
+ operator
1104
+ end
1105
+ end
1106
+
1107
+ end
1108
+
1109
+ #
1110
+ # Output input tuples through a specific renderer (text, yaml, ...)
1111
+ #
1112
+ # SYNOPSIS
1113
+ # #{program_name} #{command_name} [DATASET...]
1114
+ #
1115
+ # OPTIONS
1116
+ # #{summarized_options}
1117
+ #
1118
+ # DESCRIPTION
1119
+ #
1120
+ # When dataset names are specified as commandline args, request the environment
1121
+ # to provide those datasets and print them. Otherwise, take what comes on standard
1122
+ # input.
1123
+ #
1124
+ # Note that this command is not an operator and should not be piped anymore.
1125
+ #
1126
+ class Show < Factory::Command(__FILE__, __LINE__)
1127
+ include Command
1128
+
1129
+ options do |opt|
1130
+ @renderer = Renderer::Text.new
1131
+ Renderer.each_renderer do |name,descr,clazz|
1132
+ opt.on("--#{name}", "Render output #{descr}"){
1133
+ @renderer = clazz.new
1134
+ }
1135
+ end
1136
+ end
1137
+
1138
+ def execute(args)
1139
+ requester.renderer = @renderer
1140
+ args = [ $stdin ] if args.empty?
1141
+ requester.chain(*args)
1142
+ end
1143
+
1144
+ end # class Show
1145
+
1146
+ #
1147
+ # Executes an .alf file on current environment
1148
+ #
1149
+ # SYNOPSIS
1150
+ # #{program_name} #{command_name} [FILE]
1151
+ #
1152
+ # OPTIONS
1153
+ # #{summarized_options}
1154
+ #
1155
+ # DESCRIPTION
1156
+ #
1157
+ # This command executes the .alf file passed as first argument (or what comes
1158
+ # on standard input) as a alf query to be executed on the current environment.
1159
+ #
1160
+ class Exec < Factory::Command(__FILE__, __LINE__)
1161
+ include Command
1162
+
1163
+ def execute(args)
1164
+ Reader.alf(args.first || $stdin, requester.environment)
1165
+ end
1166
+
1167
+ end # class Exec
1168
+
1169
+ #
1170
+ # Show help about a specific command
1171
+ #
1172
+ # SYNOPSIS
1173
+ # #{program_name} #{command_name} COMMAND
1174
+ #
1175
+ class Help < Factory::Command(__FILE__, __LINE__)
1176
+ include Command
1177
+
1178
+ # Let NoSuchCommandError be passed to higher stage
1179
+ no_react_to Quickl::NoSuchCommand
1180
+
1181
+ # Command execution
1182
+ def execute(args)
1183
+ if args.size != 1
1184
+ puts super_command.help
1185
+ else
1186
+ cmd = has_command!(args.first, super_command)
1187
+ puts cmd.help
1188
+ end
1189
+ nil
1190
+ end
1191
+
1192
+ end # class Help
1193
+
1194
+ end
1195
+
1196
+ #
1197
+ # Marker for all operators, relational and non-relational ones.
1198
+ #
1199
+ module Operator
1200
+ include Iterator, Tools
1201
+
1202
+ #
1203
+ # Encapsulates method definitions that convert operators to Quickl
1204
+ # commands
1205
+ #
1206
+ module CommandMethods
1207
+
1208
+ protected
1209
+
1210
+ #
1211
+ # Configures the operator from arguments taken from command line.
1212
+ #
1213
+ # This method is intended to be overriden by subclasses and must return the
1214
+ # operator itself.
1215
+ #
1216
+ def set_args(args)
1217
+ self
1218
+ end
1219
+
1220
+ #
1221
+ # Overrides Quickl::Command::Single#_run to handles the '--' separator
1222
+ # correctly.
1223
+ #
1224
+ # This is because parse_options tend to eat the '--' separator... This
1225
+ # could be handled in Quickl itself, but it should be considered a broken
1226
+ # API and will only be available in quickl >= 0.3.0 (probably)
1227
+ #
1228
+ def _run(argv = [])
1229
+ operands, args = split_command_args(argv).collect do |arr|
1230
+ parse_options(arr)
1231
+ end
1232
+ self.set_args(args)
1233
+ if operands = command_line_operands(operands)
1234
+ env = environment || (requester ? requester.environment : nil)
1235
+ self.pipe(operands, env)
1236
+ end
1237
+ self
1238
+ end
1239
+
1240
+ def split_command_args(args)
1241
+ operands, args = case i = args.index("--")
1242
+ when NilClass
1243
+ [args, []]
1244
+ when 0
1245
+ [[ $stdin ], args[1..-1]]
1246
+ else
1247
+ [args[0...i], args[i+1..-1]]
1248
+ end
1249
+ end
1250
+
1251
+ def command_line_operands(operands)
1252
+ operands
1253
+ end
1254
+
1255
+ end # module CommandMethods
1256
+ include CommandMethods
1257
+
1258
+ # Operators input datasets
1259
+ attr_accessor :datasets
1260
+
1261
+ # Optional environment
1262
+ attr_reader :environment
1263
+
1264
+ # Sets the environment on this operator and propagate on
1265
+ # datasets
1266
+ def environment=(env)
1267
+ # this is to avoid infinite loop (TODO: why is there infinite loops??)
1268
+ return if @environment == env
1269
+
1270
+ # set and propagate on children
1271
+ @environment = env
1272
+ datasets.each do |dataset|
1273
+ if dataset.respond_to?(:environment)
1274
+ dataset.environment = env
1275
+ end
1276
+ end if datasets
1277
+
1278
+ env
1279
+ end
1280
+
1281
+ #
1282
+ # Sets the operator input
1283
+ #
1284
+ def pipe(input, env = environment)
1285
+ raise NotImplementedError, "Operator#pipe should be overriden"
1286
+ end
1287
+
1288
+ #
1289
+ # Yields each tuple in turn
1290
+ #
1291
+ # This method is implemented in a way that ensures that all operators are
1292
+ # thread safe. It is not intended to be overriden, use _each instead.
1293
+ #
1294
+ def each
1295
+ op = self.dup
1296
+ op._prepare
1297
+ op._each(&Proc.new)
1298
+ end
1299
+
1300
+ protected
1301
+
1302
+ #
1303
+ # Prepares the iterator before subsequent call to _each.
1304
+ #
1305
+ # This method is intended to be overriden by suclasses to install what's
1306
+ # need for successful iteration. The default implementation does nothing.
1307
+ #
1308
+ def _prepare
1309
+ end
1310
+
1311
+ # Internal implementation of the iterator.
1312
+ #
1313
+ # This method must be implemented by subclasses. It is safe to use instance
1314
+ # variables (typically initialized in _prepare) here.
1315
+ #
1316
+ def _each
1317
+ end
1318
+
1319
+ #
1320
+ # Specialization of Operator for operators that work on a unary input
1321
+ #
1322
+ module Unary
1323
+ include Operator
1324
+
1325
+ #
1326
+ # Sets the operator input
1327
+ #
1328
+ def pipe(input, env = environment)
1329
+ self.environment = env
1330
+ self.datasets = [ input ]
1331
+ end
1332
+
1333
+ protected
1334
+
1335
+ def command_line_operands(operands)
1336
+ operands.first
1337
+ end
1338
+
1339
+ #
1340
+ # Simply returns the first dataset
1341
+ #
1342
+ def input
1343
+ Iterator.coerce(datasets.first, environment)
1344
+ end
1345
+
1346
+ #
1347
+ # Yields the block with each input tuple.
1348
+ #
1349
+ # This method should be preferred to <code>input.each</code> when possible.
1350
+ #
1351
+ def each_input_tuple
1352
+ input.each(&Proc.new)
1353
+ end
1354
+
1355
+ end # module Unary
1356
+
1357
+ #
1358
+ # Specialization of Operator for operators that work on a binary input
1359
+ #
1360
+ module Binary
1361
+ include Operator
1362
+
1363
+ #
1364
+ # Sets the operator input
1365
+ #
1366
+ def pipe(input, env = environment)
1367
+ self.environment = env
1368
+ self.datasets = input
1369
+ end
1370
+
1371
+ protected
1372
+
1373
+ def command_line_operands(operands)
1374
+ (operands.size < 2) ? ([$stdin] + operands) : operands
1375
+ end
1376
+
1377
+ # Returns the left operand
1378
+ def left
1379
+ Iterator.coerce(datasets.first, environment)
1380
+ end
1381
+
1382
+ # Returns the right operand
1383
+ def right
1384
+ Iterator.coerce(datasets.last, environment)
1385
+ end
1386
+
1387
+ end # module Binary
1388
+
1389
+ #
1390
+ # Specialization of Operator for operators that simply convert single tuples
1391
+ # to single tuples.
1392
+ #
1393
+ module Transform
1394
+ include Unary
1395
+
1396
+ protected
1397
+
1398
+ # (see Operator#_each)
1399
+ def _each
1400
+ each_input_tuple do |tuple|
1401
+ yield _tuple2tuple(tuple)
1402
+ end
1403
+ end
1404
+
1405
+ #
1406
+ # Transforms an input tuple to an output tuple
1407
+ #
1408
+ def _tuple2tuple(tuple)
1409
+ end
1410
+
1411
+ end # module Transform
1412
+
1413
+ #
1414
+ # Specialization of Operator for implementing operators that rely on a
1415
+ # cesure algorithm.
1416
+ #
1417
+ module Cesure
1418
+ include Unary
1419
+
1420
+ protected
1421
+
1422
+ # (see Operator#_each)
1423
+ def _each
1424
+ receiver, proj_key, prev_key = Proc.new, cesure_key, nil
1425
+ each_input_tuple do |tuple|
1426
+ cur_key = proj_key.project(tuple)
1427
+ if cur_key != prev_key
1428
+ flush_cesure(prev_key, receiver) unless prev_key.nil?
1429
+ start_cesure(cur_key, receiver)
1430
+ prev_key = cur_key
1431
+ end
1432
+ accumulate_cesure(tuple, receiver)
1433
+ end
1434
+ flush_cesure(prev_key, receiver) unless prev_key.nil?
1435
+ end
1436
+
1437
+ def cesure_key
1438
+ end
1439
+
1440
+ def start_cesure(key, receiver)
1441
+ end
1442
+
1443
+ def accumulate_cesure(tuple, receiver)
1444
+ end
1445
+
1446
+ def flush_cesure(key, receiver)
1447
+ end
1448
+
1449
+ end # module Cesure
1450
+
1451
+ #
1452
+ # Specialization of Operator for operators that are shortcuts for longer
1453
+ # expressions.
1454
+ #
1455
+ module Shortcut
1456
+ include Operator
1457
+
1458
+ #
1459
+ # Sets the operator input
1460
+ #
1461
+ def pipe(input, env = environment)
1462
+ self.environment = env
1463
+ self.datasets = input
1464
+ end
1465
+
1466
+ protected
1467
+
1468
+ # (see Operator#_each)
1469
+ def _each
1470
+ longexpr.each(&Proc.new)
1471
+ end
1472
+
1473
+ #
1474
+ # Compiles the longer expression and returns it.
1475
+ #
1476
+ # @return (Iterator) the compiled longer expression, typically another
1477
+ # Operator instance
1478
+ #
1479
+ def longexpr
1480
+ end
1481
+ undef :longexpr
1482
+
1483
+ #
1484
+ # This is an helper ala Lispy#chain for implementing (#longexpr).
1485
+ #
1486
+ # @param [Array] elements a list of Iterator-able
1487
+ # @return [Operator] the first element of the list, but piped with the
1488
+ # next one, and so on.
1489
+ #
1490
+ def chain(*elements)
1491
+ elements = elements.reverse
1492
+ elements[1..-1].inject(elements.first) do |c, elm|
1493
+ elm.pipe(c, environment)
1494
+ elm
1495
+ end
1496
+ end
1497
+
1498
+ end # module Shortcut
1499
+
1500
+ end # module Operator
1501
+
1502
+ #
1503
+ # Marker module and namespace for non relational operators
1504
+ #
1505
+ module Operator::NonRelational
1506
+
1507
+ #
1508
+ # Extend with an unique autonumber attribute
1509
+ #
1510
+ # SYNOPSIS
1511
+ # #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
1512
+ #
1513
+ # API & EXAMPLE
1514
+ #
1515
+ # # Autonumber suppliers (:autonum attribute name by default)
1516
+ # (autonum :suppliers)
1517
+ #
1518
+ # # You can specify the attribute name
1519
+ # (autonum :suppliers, :unique_id)
1520
+ #
1521
+ # DESCRIPTION
1522
+ #
1523
+ # This operator takes input tuples in any order they come and extends them
1524
+ # with an autonumber attribute ATTRNAME. This allows converting non-relational
1525
+ # tuple enumerators to relational ones by ensuring uniqueness of tuples in an
1526
+ # arbitrary manner.
1527
+ #
1528
+ # alf autonum suppliers
1529
+ # alf autonum suppliers -- unique_id
1530
+ #
1531
+ class Autonum < Factory::Operator(__FILE__, __LINE__)
1532
+ include Operator::NonRelational, Operator::Transform
1533
+
1534
+ # Names of the new attribute to add
1535
+ attr_accessor :attrname
1536
+
1537
+ def initialize(attrname = :autonum)
1538
+ @attrname = attrname
1539
+ end
1540
+
1541
+ protected
1542
+
1543
+ # (see Operator::CommandMethods#set_args)
1544
+ def set_args(args)
1545
+ @attrname = args.last.to_sym unless args.empty?
1546
+ end
1547
+
1548
+ # (see Operator#_prepare)
1549
+ def _prepare
1550
+ @autonum = -1
1551
+ end
1552
+
1553
+ # (see Operator::Transform#_tuple2tuple)
1554
+ def _tuple2tuple(tuple)
1555
+ tuple.merge(@attrname => (@autonum += 1))
1556
+ end
1557
+
1558
+ end # class Autonum
1559
+
1560
+ #
1561
+ # Force default values on missing/nil attributes
1562
+ #
1563
+ # SYNOPSIS
1564
+ # #{program_name} #{command_name} [OPERAND] -- ATTR1 VAL1 ...
1565
+ #
1566
+ # OPTIONS
1567
+ # #{summarized_options}
1568
+ #
1569
+ # API & EXAMPLE
1570
+ #
1571
+ # # Non strict mode
1572
+ # (defaults :suppliers, :country => 'Belgium')
1573
+ #
1574
+ # # Strict mode (--strict)
1575
+ # (defaults :suppliers, {:country => 'Belgium'}, true)
1576
+ #
1577
+ # DESCRIPTION
1578
+ #
1579
+ # This operator rewrites tuples so as to ensure that all values for specified
1580
+ # attributes ATTRx are defined and not nil. Missing or nil attributes are
1581
+ # replaced by the associated default value VALx.
1582
+ #
1583
+ # When used in shell, the hash of default values is built from commandline
1584
+ # arguments ala Hash[...]. However, to keep type safety VALx are interpreted
1585
+ # as ruby literals and built with Kernel.eval. This means that strings must
1586
+ # be doubly quoted. For the example of the API section:
1587
+ #
1588
+ # alf defaults suppliers -- country "'Belgium'"
1589
+ #
1590
+ # When used in --strict mode, the operator simply project resulting tuples on
1591
+ # attributes for which a default value has been specified. Using the strict
1592
+ # mode guarantess that the heading of all tuples is the same, and that no nil
1593
+ # value ever remains. However, this operator never remove duplicates.
1594
+ #
1595
+ class Defaults < Factory::Operator(__FILE__, __LINE__)
1596
+ include Operator::NonRelational, Operator::Transform
1597
+
1598
+ # Default values as a ATTR -> VAL hash
1599
+ attr_accessor :defaults
1600
+
1601
+ # Strict mode?
1602
+ attr_accessor :strict
1603
+
1604
+ # Builds a Defaults operator instance
1605
+ def initialize(defaults = {}, strict = false)
1606
+ @defaults = defaults
1607
+ @strict = strict
1608
+ end
1609
+
1610
+ options do |opt|
1611
+ opt.on('-s', '--strict', 'Strictly restrict to default attributes'){
1612
+ self.strict = true
1613
+ }
1614
+ end
1615
+
1616
+ protected
1617
+
1618
+ # (see Operator::CommandMethods#set_args)
1619
+ def set_args(args)
1620
+ @defaults = tuple_collect(args.each_slice(2)) do |k,v|
1621
+ [k.to_sym, Kernel.eval(v)]
1622
+ end
1623
+ self
1624
+ end
1625
+
1626
+ # (see Operator::Transform#_tuple2tuple)
1627
+ def _tuple2tuple(tuple)
1628
+ if strict
1629
+ tuple_collect(@defaults){|k,v|
1630
+ [k, coalesce(tuple[k], v)]
1631
+ }
1632
+ else
1633
+ @defaults.merge tuple_collect(tuple){|k,v|
1634
+ [k, coalesce(v, @defaults[k])]
1635
+ }
1636
+ end
1637
+ end
1638
+
1639
+ end # class Defaults
1640
+
1641
+ #
1642
+ # Remove tuple duplicates
1643
+ #
1644
+ # SYNOPSIS
1645
+ # #{program_name} #{command_name} [OPERAND]
1646
+ #
1647
+ # API & EXAMPLE
1648
+ #
1649
+ # # clip, unlike project, typically leave duplicates
1650
+ # (compact (clip :suppliers, [ :city ]))
1651
+ #
1652
+ # DESCRIPTION
1653
+ #
1654
+ # This operator remove duplicates from input tuples. As defaults, it is a non
1655
+ # relational operator that helps normalizing input for implementing relational
1656
+ # operators. This one is centric in converting bags of tuples to sets of
1657
+ # tuples, as required by true relations.
1658
+ #
1659
+ # alf compact ...
1660
+ #
1661
+ class Compact < Factory::Operator(__FILE__, __LINE__)
1662
+ include Operator::NonRelational, Operator::Shortcut, Operator::Unary
1663
+
1664
+ # Removes duplicates according to a complete order
1665
+ class SortBased
1666
+ include Operator::Cesure
1667
+
1668
+ def cesure_key
1669
+ @cesure_key ||= ProjectionKey.new([],true)
1670
+ end
1671
+
1672
+ def accumulate_cesure(tuple, receiver)
1673
+ @tuple = tuple
1674
+ end
1675
+
1676
+ def flush_cesure(key, receiver)
1677
+ receiver.call(@tuple)
1678
+ end
1679
+
1680
+ end # class SortBased
1681
+
1682
+ # Removes duplicates by loading all in memory and filtering
1683
+ # them there
1684
+ class BufferBased
1685
+ include Operator::Unary
1686
+
1687
+ def _prepare
1688
+ @tuples = input.to_a.uniq
1689
+ end
1690
+
1691
+ def _each
1692
+ @tuples.each(&Proc.new)
1693
+ end
1694
+
1695
+ end # class BufferBased
1696
+
1697
+ protected
1698
+
1699
+ def longexpr
1700
+ chain BufferBased.new,
1701
+ datasets
1702
+ end
1703
+
1704
+ end # class Compact
1705
+
1706
+ #
1707
+ # Sort input tuples according to an order relation
1708
+ #
1709
+ # SYNOPSIS
1710
+ # #{program_name} #{command_name} [OPERAND] -- ATTR1 ORDER1 ATTR2 ORDER2...
1711
+ #
1712
+ # API & EXAMPLE
1713
+ #
1714
+ # # sort on supplier name in ascending order
1715
+ # (sort :suppliers, [:name])
1716
+ #
1717
+ # # sort on city then on name
1718
+ # (sort :suppliers, [:city, :name])
1719
+ #
1720
+ # # sort on city DESC then on name ASC
1721
+ # (sort :suppliers, [[:city, :desc], [:name, :asc]])
1722
+ #
1723
+ # => See OrderingKey about specifying orderings
1724
+ #
1725
+ # DESCRIPTION
1726
+ #
1727
+ # This operator sorts input tuples on ATTR1 then ATTR2, etc. and outputs
1728
+ # them sorted after that. This is, of course, a non relational operator as
1729
+ # relations are unordered sets. It is provided to implement operators that
1730
+ # need tuples to be sorted to work correctly. When used in shell, the key
1731
+ # ordering must be specified in its longest form:
1732
+ #
1733
+ # alf sort suppliers -- name asc
1734
+ # alf sort suppliers -- city desc name asc
1735
+ #
1736
+ # LIMITATIONS
1737
+ #
1738
+ # The fact that the ordering must be completely specified with commandline
1739
+ # arguments is a limitation, shortcuts could be provided in the future.
1740
+ #
1741
+ class Sort < Factory::Operator(__FILE__, __LINE__)
1742
+ include Operator::NonRelational, Operator::Unary
1743
+
1744
+ def initialize(ordering_key = [])
1745
+ @ordering_key = OrderingKey.coerce(ordering_key)
1746
+ yield self if block_given?
1747
+ end
1748
+
1749
+ def ordering=(ordering)
1750
+ @ordering_key = OrderingKey.coerce(ordering)
1751
+ end
1752
+
1753
+ protected
1754
+
1755
+ def set_args(args)
1756
+ self.ordering = args.collect{|c| c.to_sym}.each_slice(2).to_a
1757
+ self
1758
+ end
1759
+
1760
+ def _prepare
1761
+ @buffer = Buffer::Sorted.new(@ordering_key)
1762
+ @buffer.add_all(input)
1763
+ end
1764
+
1765
+ def _each
1766
+ @buffer.each(&Proc.new)
1767
+ end
1768
+
1769
+ end # class Sort
1770
+
1771
+ #
1772
+ # Clip input tuples to a subset of attributes
1773
+ #
1774
+ # SYNOPSIS
1775
+ # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ...
1776
+ #
1777
+ # OPTIONS
1778
+ # #{summarized_options}
1779
+ #
1780
+ # API & EXAMPLE
1781
+ #
1782
+ # # Keep only name and city attributes
1783
+ # (clip :suppliers, [:name, :city])
1784
+ #
1785
+ # # Keep all but name and city attributes
1786
+ # (clip :suppliers, [:name, :city], true)
1787
+ #
1788
+ # DESCRIPTION
1789
+ #
1790
+ # This operator clips tuples on attributes whose names are specified as
1791
+ # arguments. This is similar to the relational PROJECT operator, expect
1792
+ # that this one does not removed duplicates that can occur from clipping.
1793
+ # In other words, clipping may lead to bags of tuples instead of sets.
1794
+ #
1795
+ # When used in shell, the clipping/projection key is simply taken from
1796
+ # commandline arguments:
1797
+ #
1798
+ # alf clip suppliers -- name city
1799
+ # alf clip suppliers --allbut -- name city
1800
+ #
1801
+ class Clip < Factory::Operator(__FILE__, __LINE__)
1802
+ include Operator::NonRelational, Operator::Transform
1803
+
1804
+ # Builds a Clip operator instance
1805
+ def initialize(attributes = [], allbut = false)
1806
+ @projection_key = ProjectionKey.new(attributes, allbut)
1807
+ yield self if block_given?
1808
+ end
1809
+
1810
+ def attributes=(attrs)
1811
+ @projection_key.attributes = attrs
1812
+ end
1813
+
1814
+ def allbut=(allbut)
1815
+ @projection_key.allbut = allbut
1816
+ end
1817
+
1818
+ # Installs the options
1819
+ options do |opt|
1820
+ opt.on('-a', '--allbut', 'Apply a ALLBUT clipping') do
1821
+ self.allbut = true
1822
+ end
1823
+ end
1824
+
1825
+ protected
1826
+
1827
+ # (see Operator::CommandMethods#set_args)
1828
+ def set_args(args)
1829
+ self.attributes = args.collect{|a| a.to_sym}
1830
+ self
1831
+ end
1832
+
1833
+ # (see Operator::Transform#_tuple2tuple)
1834
+ def _tuple2tuple(tuple)
1835
+ @projection_key.project(tuple)
1836
+ end
1837
+
1838
+ end # class Clip
1839
+
1840
+ end # Operator::NonRelational
1841
+
1842
+ #
1843
+ # Marker module and namespace for relational operators
1844
+ #
1845
+ module Operator::Relational
1846
+
1847
+ #
1848
+ # Relational projection (clip + compact)
1849
+ #
1850
+ # SYNOPSIS
1851
+ # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ...
1852
+ #
1853
+ # OPTIONS
1854
+ # #{summarized_options}
1855
+ #
1856
+ # API & EXAMPLE
1857
+ #
1858
+ # # Project on name and city attributes
1859
+ # (project :suppliers, [:name, :city])
1860
+ #
1861
+ # # Project on all but name and city attributes
1862
+ # (allbut :suppliers, [:name, :city])
1863
+ #
1864
+ # DESCRIPTION
1865
+ #
1866
+ # This operator projects tuples on attributes whose names are specified as
1867
+ # arguments. This is similar to clip, except that this ones is a truly
1868
+ # relational one, that is, it also removes duplicates tuples.
1869
+ #
1870
+ # When used in shell, the clipping/projection key is simply taken from
1871
+ # commandline arguments:
1872
+ #
1873
+ # alf project suppliers -- name city
1874
+ # alf project --allbut suppliers -- name city
1875
+ #
1876
+ class Project < Factory::Operator(__FILE__, __LINE__)
1877
+ include Operator::Relational, Operator::Shortcut, Operator::Unary
1878
+
1879
+ # Builds a Project operator instance
1880
+ def initialize(attributes = [], allbut = false)
1881
+ @projection_key = ProjectionKey.new(attributes, allbut)
1882
+ yield self if block_given?
1883
+ end
1884
+
1885
+ def attributes=(attrs)
1886
+ @projection_key.attributes = attrs
1887
+ end
1888
+
1889
+ def allbut=(allbut)
1890
+ @projection_key.allbut = allbut
1891
+ end
1892
+
1893
+ # Installs the options
1894
+ options do |opt|
1895
+ opt.on('-a', '--allbut', 'Apply a ALLBUT projection') do
1896
+ self.allbut = true
1897
+ end
1898
+ end
1899
+
1900
+ protected
1901
+
1902
+ # (see Operator::CommandMethods#set_args)
1903
+ def set_args(args)
1904
+ self.attributes = args.collect{|a| a.to_sym}
1905
+ self
1906
+ end
1907
+
1908
+ # (see Operator::Shortcut#longexpr)
1909
+ def longexpr
1910
+ chain Operator::NonRelational::Compact.new,
1911
+ Operator::NonRelational::Clip.new(@projection_key.attributes,
1912
+ @projection_key.allbut),
1913
+ datasets
1914
+ end
1915
+
1916
+ end # class Project
1917
+
1918
+ #
1919
+ # Relational extension (additional, computed attributes)
1920
+ #
1921
+ # SYNOPSIS
1922
+ # #{program_name} #{command_name} [OPERAND] -- ATTR1 EXPR1 ATTR2 EXPR2...
1923
+ #
1924
+ # API & EXAMPLE
1925
+ #
1926
+ # (extend :supplies, :sp => lambda{ sid + "/" + pid },
1927
+ # :big => lambda{ qty > 100 ? true : false })
1928
+ #
1929
+ # DESCRIPTION
1930
+ #
1931
+ # This command extend input tuples with new attributes (named ATTR1, ...)
1932
+ # whose value is the result of evaluating tuple expressions (i.e. EXPR1, ...).
1933
+ # See main documentation about the semantics of tuple expressions. When used
1934
+ # in shell, the hash of extensions is built from commandline arguments ala
1935
+ # Hash[...]. Tuple expressions must be specified as code literals there:
1936
+ #
1937
+ # alf extend supplies -- sp 'sid + "/" + pid' big "qty > 100 ? true : false"
1938
+ #
1939
+ # Attributes ATTRx should not already exist, no behavior is guaranteed if
1940
+ # this precondition is not respected.
1941
+ #
1942
+ class Extend < Factory::Operator(__FILE__, __LINE__)
1943
+ include Operator::Relational, Operator::Transform
1944
+
1945
+ # Extensions as a Hash attr => lambda{...}
1946
+ attr_accessor :extensions
1947
+
1948
+ # Builds an Extend operator instance
1949
+ def initialize(extensions = {})
1950
+ @extensions = extensions
1951
+ end
1952
+
1953
+ protected
1954
+
1955
+ # (see Operator::CommandMethods#set_args)
1956
+ def set_args(args)
1957
+ @extensions = tuple_collect(args.each_slice(2)){|k,v|
1958
+ [k.to_sym, TupleHandle.compile(v)]
1959
+ }
1960
+ self
1961
+ end
1962
+
1963
+ # (see Operator#_prepare)
1964
+ def _prepare
1965
+ @handle = TupleHandle.new
1966
+ end
1967
+
1968
+ # (see Operator::Transform#_tuple2tuple)
1969
+ def _tuple2tuple(tuple)
1970
+ tuple.merge tuple_collect(@extensions){|k,v|
1971
+ [k, @handle.set(tuple).evaluate(v)]
1972
+ }
1973
+ end
1974
+
1975
+ end # class Extend
1976
+
1977
+ #
1978
+ # Relational renaming (rename some attributes)
1979
+ #
1980
+ # SYNOPSIS
1981
+ # #{program_name} #{command_name} [OPERAND] -- OLD1 NEW1 ...
1982
+ #
1983
+ # OPTIONS
1984
+ # #{summarized_options}
1985
+ #
1986
+ # API & EXAMPLE
1987
+ #
1988
+ # (rename :suppliers, :name => :supplier_name, :city => :supplier_city)
1989
+ #
1990
+ # DESCRIPTION
1991
+ #
1992
+ # This command renames OLD attributes as NEW as specified by arguments.
1993
+ # Attributes OLD should exist in source tuples while attributes NEW should
1994
+ # not. When used in shell, renaming attributes are built ala Hash[...] from
1995
+ # commandline arguments:
1996
+ #
1997
+ # alf rename suppliers -- name supplier_name city supplier_city
1998
+ #
1999
+ class Rename < Factory::Operator(__FILE__, __LINE__)
2000
+ include Operator::Relational, Operator::Transform
2001
+
2002
+ # Hash of source -> target attribute renamings
2003
+ attr_accessor :renaming
2004
+
2005
+ # Builds a Rename operator instance
2006
+ def initialize(renaming = {})
2007
+ @renaming = renaming
2008
+ end
2009
+
2010
+ protected
2011
+
2012
+ # (see Operator::CommandMethods#set_args)
2013
+ def set_args(args)
2014
+ @renaming = Hash[*args.collect{|c| c.to_sym}]
2015
+ self
2016
+ end
2017
+
2018
+ # (see Operator::Transform#_tuple2tuple)
2019
+ def _tuple2tuple(tuple)
2020
+ tuple_collect(tuple){|k,v| [@renaming[k] || k, v]}
2021
+ end
2022
+
2023
+ end # class Rename
2024
+
2025
+ #
2026
+ # Relational restriction (aka where, predicate filtering)
2027
+ #
2028
+ # SYNOPSIS
2029
+ # #{program_name} #{command_name} [OPERAND] -- EXPR
2030
+ # #{program_name} #{command_name} [OPERAND] -- ATTR1 VAL1 ...
2031
+ #
2032
+ # API & EXAMPLE
2033
+ #
2034
+ # # Restrict to suppliers with status greater than 20
2035
+ # (restrict :suppliers, lambda{ status > 20 })
2036
+ #
2037
+ # # Restrict to suppliers that live in London
2038
+ # (restrict :suppliers, lambda{ city == 'London' })
2039
+ #
2040
+ # DESCRIPTION
2041
+ #
2042
+ # This command restricts tuples to those for which EXPR evaluates to true.
2043
+ # EXPR must be a valid tuple expression that should return a truth-value.
2044
+ # When used in shell, the predicate is taken as a string and compiled with
2045
+ # TupleHandle.compile. We also provide a shortcut for equality expressions.
2046
+ # Note that, in that case, values are expected to be ruby code literals,
2047
+ # evaluated with Kernel.eval. Therefore, strings must be doubly quoted.
2048
+ #
2049
+ # alf restrict suppliers -- "status > 20"
2050
+ # alf restrict suppliers -- city "'London'"
2051
+ #
2052
+ class Restrict < Factory::Operator(__FILE__, __LINE__)
2053
+ include Operator::Relational, Operator::Unary
2054
+
2055
+ # Restriction predicate
2056
+ attr_accessor :predicate
2057
+
2058
+ # Builds a Restrict operator instance
2059
+ def initialize(predicate = "true")
2060
+ @predicate = TupleHandle.compile(predicate)
2061
+ yield self if block_given?
2062
+ end
2063
+
2064
+ protected
2065
+
2066
+ # (see Operator::CommandMethods#set_args)
2067
+ def set_args(args)
2068
+ @predicate = if args.size > 1
2069
+ TupleHandle.compile tuple_collect(args.each_slice(2)){|a,expr|
2070
+ [a, Kernel.eval(expr)]
2071
+ }
2072
+ else
2073
+ TupleHandle.compile(args.first)
2074
+ end
2075
+ self
2076
+ end
2077
+
2078
+ # (see Operator#_each)
2079
+ def _each
2080
+ handle = TupleHandle.new
2081
+ each_input_tuple{|t| yield(t) if handle.set(t).evaluate(@predicate) }
2082
+ end
2083
+
2084
+ end # class Restrict
2085
+
2086
+ #
2087
+ # Relational join (and cross-join)
2088
+ #
2089
+ # SYNOPSIS
2090
+ # #{program_name} #{command_name} [LEFT] RIGHT
2091
+ #
2092
+ # API & EXAMPLE
2093
+ #
2094
+ # (join :suppliers, :parts)
2095
+ #
2096
+ # DESCRIPTION
2097
+ #
2098
+ # This operator computes the (natural) join of two input iterators. Natural
2099
+ # join means that, unlike what is commonly used in SQL, the default behavior
2100
+ # is to join on common attributes. You can use the rename operator if this
2101
+ # behavior does not fit your needs.
2102
+ #
2103
+ # alf join suppliers supplies
2104
+ #
2105
+ class Join < Factory::Operator(__FILE__, __LINE__)
2106
+ include Operator::Relational, Operator::Shortcut, Operator::Binary
2107
+
2108
+ class HashBased
2109
+ include Operator::Binary
2110
+
2111
+ class JoinBuffer
2112
+
2113
+ def initialize(enum)
2114
+ @buffer = nil
2115
+ @key = nil
2116
+ @enum = enum
2117
+ end
2118
+
2119
+ def split(tuple)
2120
+ _init(tuple) unless @key
2121
+ @key.split(tuple)
2122
+ end
2123
+
2124
+ def each(key)
2125
+ @buffer[key].each(&Proc.new) if @buffer.has_key?(key)
2126
+ end
2127
+
2128
+ private
2129
+
2130
+ def _init(right)
2131
+ @buffer = Hash.new{|h,k| h[k] = []}
2132
+ @enum.each do |left|
2133
+ @key = Tools::ProjectionKey.coerce(left.keys & right.keys) unless @key
2134
+ @buffer[@key.project(left)] << left
2135
+ end
2136
+ end
2137
+
2138
+ end
2139
+
2140
+ protected
2141
+
2142
+ def _each
2143
+ buffer = JoinBuffer.new(right)
2144
+ left.each do |left_tuple|
2145
+ key, rest = buffer.split(left_tuple)
2146
+ buffer.each(key) do |right|
2147
+ yield(left_tuple.merge(right))
2148
+ end
2149
+ end
2150
+ end
2151
+
2152
+ end
2153
+
2154
+ protected
2155
+
2156
+ # (see Shortcut#longexpr)
2157
+ def longexpr
2158
+ chain HashBased.new,
2159
+ datasets
2160
+ end
2161
+
2162
+ end # class Join
2163
+
2164
+ #
2165
+ # Relational intersection (aka a logical and)
2166
+ #
2167
+ # SYNOPSIS
2168
+ # #{program_name} #{command_name} [LEFT] RIGHT
2169
+ #
2170
+ # API & EXAMPLE
2171
+ #
2172
+ # # Give suppliers that live in Paris and have status >= 20
2173
+ # (intersect \\
2174
+ # (restrict :suppliers, lambda{ status >= 20 }),
2175
+ # (restrict :suppliers, lambda{ city == 'Paris' }))
2176
+ #
2177
+ # DESCRIPTION
2178
+ #
2179
+ # This operator computes the intersection between its two operands. The
2180
+ # intersection is simply the set of common tuples between them. Both operands
2181
+ # must have the same heading.
2182
+ #
2183
+ # alf intersect ... ...
2184
+ #
2185
+ class Intersect < Factory::Operator(__FILE__, __LINE__)
2186
+ include Operator::Relational, Operator::Shortcut, Operator::Binary
2187
+
2188
+ class HashBased
2189
+ include Operator::Binary
2190
+
2191
+ protected
2192
+
2193
+ def _prepare
2194
+ @index = Hash.new
2195
+ right.each{|t| @index[t] = true}
2196
+ end
2197
+
2198
+ def _each
2199
+ left.each do |left_tuple|
2200
+ yield(left_tuple) if @index.has_key?(left_tuple)
2201
+ end
2202
+ end
2203
+
2204
+ end
2205
+
2206
+ protected
2207
+
2208
+ # (see Shortcut#longexpr)
2209
+ def longexpr
2210
+ chain HashBased.new,
2211
+ datasets
2212
+ end
2213
+
2214
+ end # class Intersect
2215
+
2216
+ #
2217
+ # Relational minus (aka difference)
2218
+ #
2219
+ # SYNOPSIS
2220
+ # #{program_name} #{command_name} [LEFT] RIGHT
2221
+ #
2222
+ # API & EXAMPLE
2223
+ #
2224
+ # # Give all suppliers but those living in Paris
2225
+ # (minus :suppliers,
2226
+ # (restrict :suppliers, lambda{ city == 'Paris' }))
2227
+ #
2228
+ # DESCRIPTION
2229
+ #
2230
+ # This operator computes the difference between its two operands. The
2231
+ # difference is simply the set of tuples in left operands non shared by
2232
+ # the right one.
2233
+ #
2234
+ # alf minus ... ...
2235
+ #
2236
+ class Minus < Factory::Operator(__FILE__, __LINE__)
2237
+ include Operator::Relational, Operator::Shortcut, Operator::Binary
2238
+
2239
+ class HashBased
2240
+ include Operator::Binary
2241
+
2242
+ protected
2243
+
2244
+ def _prepare
2245
+ @index = Hash.new
2246
+ right.each{|t| @index[t] = true}
2247
+ end
2248
+
2249
+ def _each
2250
+ left.each do |left_tuple|
2251
+ yield(left_tuple) unless @index.has_key?(left_tuple)
2252
+ end
2253
+ end
2254
+
2255
+ end
2256
+
2257
+ protected
2258
+
2259
+ # (see Shortcut#longexpr)
2260
+ def longexpr
2261
+ chain HashBased.new,
2262
+ datasets
2263
+ end
2264
+
2265
+ end # class Minus
2266
+
2267
+ #
2268
+ # Relational union
2269
+ #
2270
+ # SYNOPSIS
2271
+ # #{program_name} #{command_name} [LEFT] RIGHT
2272
+ #
2273
+ # API & EXAMPLE
2274
+ #
2275
+ # (union (project :suppliers, [:city]),
2276
+ # (project :parts, [:city]))
2277
+ #
2278
+ # DESCRIPTION
2279
+ #
2280
+ # This operator computes the union join of two input iterators. Input
2281
+ # iterators should have the same heading. The result never contain duplicates.
2282
+ #
2283
+ # alf union ... ...
2284
+ #
2285
+ class Union < Factory::Operator(__FILE__, __LINE__)
2286
+ include Operator::Relational, Operator::Shortcut, Operator::Binary
2287
+
2288
+ class DisjointBased
2289
+ include Operator::Binary
2290
+
2291
+ protected
2292
+
2293
+ def _each
2294
+ left.each(&Proc.new)
2295
+ right.each(&Proc.new)
2296
+ end
2297
+
2298
+ end
2299
+
2300
+ protected
2301
+
2302
+ # (see Shortcut#longexpr)
2303
+ def longexpr
2304
+ chain Operator::NonRelational::Compact.new,
2305
+ DisjointBased.new,
2306
+ datasets
2307
+ end
2308
+
2309
+ end # class Union
2310
+
2311
+ #
2312
+ # Relational nesting (tuple-valued attributes)
2313
+ #
2314
+ # SYNOPSIS
2315
+ # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ... NEWNAME
2316
+ #
2317
+ # API & EXAMPLE
2318
+ #
2319
+ # (nest :suppliers, [:city, :status], :loc_and_status)
2320
+ #
2321
+ # DESCRIPTION
2322
+ #
2323
+ # This operator nests attributes ATTR1 to ATTRN as a new, tuple-based
2324
+ # attribute whose name is NEWNAME. When used in shell, names of nested
2325
+ # attributes are taken from commandline arguments, expected the last one
2326
+ # which defines the new name to use:
2327
+ #
2328
+ # alf nest suppliers -- city status loc_and_status
2329
+ #
2330
+ class Nest < Factory::Operator(__FILE__, __LINE__)
2331
+ include Operator::Relational, Operator::Transform
2332
+
2333
+ # Array of nesting attributes
2334
+ attr_accessor :attributes
2335
+
2336
+ # New name for the nested attribute
2337
+ attr_accessor :as
2338
+
2339
+ # Builds a Nest operator instance
2340
+ def initialize(attributes = [], as = :nested)
2341
+ @attributes = attributes
2342
+ @as = as
2343
+ end
2344
+
2345
+ protected
2346
+
2347
+ # (see Operator::CommandMethods#set_args)
2348
+ def set_args(args)
2349
+ @as = args.pop.to_sym
2350
+ @attributes = args.collect{|a| a.to_sym}
2351
+ self
2352
+ end
2353
+
2354
+ # (see Operator::Transform#_tuple2tuple)
2355
+ def _tuple2tuple(tuple)
2356
+ others = tuple_collect(tuple.keys - @attributes){|k| [k,tuple[k]] }
2357
+ others[as] = tuple_collect(attributes){|k| [k, tuple[k]] }
2358
+ others
2359
+ end
2360
+
2361
+ end # class Nest
2362
+
2363
+ #
2364
+ # Relational un-nesting (inverse of nest)
2365
+ #
2366
+ # SYNOPSIS
2367
+ # #{program_name} #{command_name} [OPERAND] -- ATTR
2368
+ #
2369
+ # API & EXAMPLE
2370
+ #
2371
+ # # Assuming nested = (nest :suppliers, [:city, :status], :loc_and_status)
2372
+ # (unnest nested, :loc_and_status)
2373
+ #
2374
+ # DESCRIPTION
2375
+ #
2376
+ # This operator unnests the tuple-valued attribute named ATTR so as to
2377
+ # flatten its pairs with 'upstream' tuple. The latter should be such so that
2378
+ # no name collision occurs. When used in shell, the name of the attribute to
2379
+ # unnest is taken as the first commandline argument:
2380
+ #
2381
+ # alf unnest nest -- loc_and_status
2382
+ #
2383
+ class Unnest < Factory::Operator(__FILE__, __LINE__)
2384
+ include Operator::Relational, Operator::Transform
2385
+
2386
+ # Name of the attribute to unnest
2387
+ attr_accessor :attribute
2388
+
2389
+ # Builds a Rename operator instance
2390
+ def initialize(attribute = :nested)
2391
+ @attribute = attribute
2392
+ end
2393
+
2394
+ protected
2395
+
2396
+ # (see Operator::CommandMethods#set_args)
2397
+ def set_args(args)
2398
+ @attribute = args.first.to_sym
2399
+ self
2400
+ end
2401
+
2402
+ # (see Operator::Transform#_tuple2tuple)
2403
+ def _tuple2tuple(tuple)
2404
+ tuple = tuple.dup
2405
+ nested = tuple.delete(@attribute) || {}
2406
+ tuple.merge(nested)
2407
+ end
2408
+
2409
+ end # class Unnest
2410
+
2411
+ #
2412
+ # Relational grouping (relation-valued attributes)
2413
+ #
2414
+ # SYNOPSIS
2415
+ # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ... NEWNAME
2416
+ #
2417
+ # API & EXAMPLE
2418
+ #
2419
+ # (group :supplies, [:pid, :qty], :supplying)
2420
+ # (group :supplies, [:sid], :supplying, true)
2421
+ #
2422
+ # DESCRIPTION
2423
+ #
2424
+ # This operator groups attributes ATTR1 to ATTRN as a new, relation-valued
2425
+ # attribute whose name is NEWNAME. When used in shell, names of grouped
2426
+ # attributes are taken from commandline arguments, expected the last one
2427
+ # which defines the new name to use:
2428
+ #
2429
+ # alf group supplies -- pid qty supplying
2430
+ # alf group supplies --allbut -- sid supplying
2431
+ #
2432
+ class Group < Factory::Operator(__FILE__, __LINE__)
2433
+ include Operator::Relational, Operator::Unary
2434
+
2435
+ # Attributes on which grouping applies
2436
+ attr_accessor :attributes
2437
+
2438
+ # Attribute name for grouping tuple
2439
+ attr_accessor :as
2440
+
2441
+ # Group all but attributes?
2442
+ attr_accessor :allbut
2443
+
2444
+ # Creates a Group instance
2445
+ def initialize(attributes = [], as = :group, allbut = false)
2446
+ @attributes = attributes
2447
+ @as = as
2448
+ @allbut = allbut
2449
+ end
2450
+
2451
+ options do |opt|
2452
+ opt.on('--allbut', "Group all but specified attributes"){ @allbut = true }
2453
+ end
2454
+
2455
+ protected
2456
+
2457
+ # (see Operator::CommandMethods#set_args)
2458
+ def set_args(args)
2459
+ @as = args.pop.to_sym
2460
+ @attributes = args.collect{|a| a.to_sym}
2461
+ self
2462
+ end
2463
+
2464
+ # See Operator#_prepare
2465
+ def _prepare
2466
+ pkey = ProjectionKey.new(attributes, !allbut)
2467
+ @index = Hash.new{|h,k| h[k] = []}
2468
+ each_input_tuple do |tuple|
2469
+ key, rest = pkey.split(tuple)
2470
+ @index[key] << rest
2471
+ end
2472
+ end
2473
+
2474
+ # See Operator#_each
2475
+ def _each
2476
+ @index.each_pair do |k,v|
2477
+ yield(k.merge(@as => v))
2478
+ end
2479
+ end
2480
+
2481
+ end # class Group
2482
+
2483
+ #
2484
+ # Relational un-grouping (inverse of group)
2485
+ #
2486
+ # SYNOPSIS
2487
+ # #{program_name} #{command_name} [OPERAND] -- ATTR
2488
+ #
2489
+ # API & EXAMPLE
2490
+ #
2491
+ # # Assuming grouped = (group enum, [:pid, :qty], :supplying)
2492
+ # (ungroup grouped, :supplying)
2493
+ #
2494
+ # DESCRIPTION
2495
+ #
2496
+ # This operator ungroups the relation-valued attribute named ATTR and outputs
2497
+ # tuples as the flattening of each of of its tuples merged with the upstream
2498
+ # one. Sub relation should be such so that no name collision occurs. When
2499
+ # used in shell, the name of the attribute to ungroup is taken as the first
2500
+ # commandline argument:
2501
+ #
2502
+ # alf ungroup group -- supplying
2503
+ #
2504
+ class Ungroup < Factory::Operator(__FILE__, __LINE__)
2505
+ include Operator::Relational, Operator::Unary
2506
+
2507
+ # Relation-value attribute to ungroup
2508
+ attr_accessor :attribute
2509
+
2510
+ # Creates a Group instance
2511
+ def initialize(attribute = :grouped)
2512
+ @attribute = attribute
2513
+ end
2514
+
2515
+ protected
2516
+
2517
+ # (see Operator::CommandMethods#set_args)
2518
+ def set_args(args)
2519
+ @attribute = args.pop.to_sym
2520
+ self
2521
+ end
2522
+
2523
+ # See Operator#_each
2524
+ def _each
2525
+ each_input_tuple do |tuple|
2526
+ tuple = tuple.dup
2527
+ subrel = tuple.delete(@attribute)
2528
+ subrel.each do |subtuple|
2529
+ yield(tuple.merge(subtuple))
2530
+ end
2531
+ end
2532
+ end
2533
+
2534
+ end # class Ungroup
2535
+
2536
+ #
2537
+ # Relational summarization (group-by + aggregate ops)
2538
+ #
2539
+ # SYNOPSIS
2540
+ # #{program_name} #{command_name} [OPERAND] --by=KEY1,KEY2... -- AGG1 EXPR1...
2541
+ #
2542
+ # OPTIONS
2543
+ # #{summarized_options}
2544
+ #
2545
+ # API & EXAMPLE
2546
+ #
2547
+ # (summarize :supplies, [:sid],
2548
+ # :total_qty => Aggregator.sum(:qty))
2549
+ #
2550
+ # DESCRIPTION
2551
+ #
2552
+ # This operator summarizes input tuples on the projection on KEY1,KEY2,...
2553
+ # attributes and applies aggregate operators on sets of matching tuples.
2554
+ # Introduced names AGG should be disjoint from KEY attributes.
2555
+ #
2556
+ # When used in shell, the aggregations are taken from commandline arguments
2557
+ # AGG and EXPR, where AGG is the name of a new attribute and EXPR is an
2558
+ # aggregation expression evaluated on Aggregator:
2559
+ #
2560
+ # alf summarize supplies --by=sid -- total_qty "sum(:qty)"
2561
+ #
2562
+ class Summarize < Factory::Operator(__FILE__, __LINE__)
2563
+ include Operator::Relational, Operator::Shortcut, Operator::Unary
2564
+
2565
+ # By attributes
2566
+ attr_accessor :by
2567
+
2568
+ # Aggregations as a AGG => Aggregator(EXPR) hash
2569
+ attr_accessor :aggregators
2570
+
2571
+ def initialize(by = [], aggregators = {})
2572
+ @by = by
2573
+ @aggregators = aggregators
2574
+ end
2575
+
2576
+ # Installs the options
2577
+ options do |opt|
2578
+ opt.on('--by=x,y,z', 'Specify by attributes', Array) do |args|
2579
+ @by = args.collect{|a| a.to_sym}
2580
+ end
2581
+ end
2582
+
2583
+ # Summarizes according to a complete order
2584
+ class SortBased
2585
+ include Alf::Operator::Cesure
2586
+
2587
+ attr_reader :cesure_key
2588
+ attr_reader :aggregators
2589
+
2590
+ def initialize(by_key, aggregators)
2591
+ @cesure_key, @aggregators = by_key, aggregators
2592
+ end
2593
+
2594
+ protected
2595
+
2596
+ def start_cesure(key, receiver)
2597
+ @aggs = tuple_collect(@aggregators) do |a,agg|
2598
+ [a, agg.least]
2599
+ end
2600
+ end
2601
+
2602
+ def accumulate_cesure(tuple, receiver)
2603
+ @aggs = tuple_collect(@aggregators) do |a,agg|
2604
+ [a, agg.happens(@aggs[a], tuple)]
2605
+ end
2606
+ end
2607
+
2608
+ def flush_cesure(key, receiver)
2609
+ @aggs = tuple_collect(@aggregators) do |a,agg|
2610
+ [a, agg.finalize(@aggs[a])]
2611
+ end
2612
+ receiver.call key.merge(@aggs)
2613
+ end
2614
+
2615
+ end # class SortBased
2616
+
2617
+ protected
2618
+
2619
+ # (see Operator::CommandMethods#set_args)
2620
+ def set_args(args)
2621
+ @aggregators = tuple_collect(args.each_slice(2)) do |a,expr|
2622
+ [a.to_sym, Aggregator.compile(expr)]
2623
+ end
2624
+ self
2625
+ end
2626
+
2627
+ def longexpr
2628
+ by_key = Tools::ProjectionKey.new(@by, false)
2629
+ chain SortBased.new(by_key, @aggregators),
2630
+ Operator::NonRelational::Sort.new(by_key.to_ordering_key),
2631
+ datasets
2632
+ end
2633
+
2634
+ end # class Summarize
2635
+
2636
+ #
2637
+ # Relational quota-queries (position, sum progression, etc.)
2638
+ #
2639
+ # SYNOPSIS
2640
+ # #{program_name} #{command_name} [OPERAND] --by=KEY1,... --order=OR1... AGG1 EXPR1...
2641
+ #
2642
+ # OPTIONS
2643
+ # #{summarized_options}
2644
+ #
2645
+ # API & EXAMPLE
2646
+ #
2647
+ # (quota :supplies, [:sid], [:qty],
2648
+ # :position => Aggregator.count,
2649
+ # :sum_qty => Aggregator.sum(:qty))
2650
+ #
2651
+ # DESCRIPTION
2652
+ #
2653
+ # This operator computes quota values on input tuples.
2654
+ #
2655
+ # alf quota supplies --by=sid --order=qty -- position count sum_qty "sum(:qty)"
2656
+ #
2657
+ class Quota < Factory::Operator(__FILE__, __LINE__)
2658
+ include Operator::Relational, Operator::Shortcut, Operator::Unary
2659
+
2660
+ # Quota by
2661
+ attr_accessor :by
2662
+
2663
+ # Quota order
2664
+ attr_accessor :order
2665
+
2666
+ # Quota aggregations
2667
+ attr_accessor :aggregators
2668
+
2669
+ def initialize(by = [], order = [], aggregators = {})
2670
+ @by, @order, @aggregators = by, order, aggregators
2671
+ end
2672
+
2673
+ options do |opt|
2674
+ opt.on('--by=x,y,z', 'Specify by attributes', Array) do |args|
2675
+ @by = args.collect{|a| a.to_sym}
2676
+ end
2677
+ opt.on('--order=x,y,z', 'Specify order attributes', Array) do |args|
2678
+ @order = args.collect{|a| a.to_sym}
2679
+ end
2680
+ end
2681
+
2682
+ class SortBased
2683
+ include Operator::Cesure
2684
+
2685
+ def initialize(by, order, aggregators)
2686
+ @by, @order, @aggregators = by, order, aggregators
2687
+ end
2688
+
2689
+ def cesure_key
2690
+ ProjectionKey.coerce @by
2691
+ end
2692
+
2693
+ def ordering_key
2694
+ OrderingKey.coerce @order
2695
+ end
2696
+
2697
+ def start_cesure(key, receiver)
2698
+ @aggs = tuple_collect(@aggregators) do |a,agg|
2699
+ [a, agg.least]
2700
+ end
2701
+ end
2702
+
2703
+ def accumulate_cesure(tuple, receiver)
2704
+ @aggs = tuple_collect(@aggregators) do |a,agg|
2705
+ [a, agg.happens(@aggs[a], tuple)]
2706
+ end
2707
+ thisone = tuple_collect(@aggregators) do |a,agg|
2708
+ [a, agg.finalize(@aggs[a])]
2709
+ end
2710
+ receiver.call tuple.merge(thisone)
2711
+ end
2712
+
2713
+ end # class SortBased
2714
+
2715
+ protected
2716
+
2717
+ # (see Operator::CommandMethods#set_args)
2718
+ def set_args(args)
2719
+ @aggregators = tuple_collect(args.each_slice(2)) do |a,expr|
2720
+ [a.to_sym, Aggregator.compile(expr)]
2721
+ end
2722
+ self
2723
+ end
2724
+
2725
+ def cesure_key
2726
+ ProjectionKey.coerce @by
2727
+ end
2728
+
2729
+ def ordering_key
2730
+ OrderingKey.coerce @order
2731
+ end
2732
+
2733
+ def longexpr
2734
+ sort_key = cesure_key.to_ordering_key + ordering_key
2735
+ chain SortBased.new(@by, @order, @aggregators),
2736
+ Operator::NonRelational::Sort.new(sort_key),
2737
+ datasets
2738
+ end
2739
+
2740
+ end # class Quota
2741
+
2742
+ end
2743
+
2744
+ #
2745
+ # Aggregation operator.
2746
+ #
2747
+ class Aggregator
2748
+
2749
+ # Aggregate options
2750
+ attr_reader :options
2751
+
2752
+ #
2753
+ # Automatically installs factory methods for inherited classes.
2754
+ #
2755
+ # Example:
2756
+ # class Sum < Aggregate # will give a method Aggregator.sum
2757
+ # ...
2758
+ # end
2759
+ # Aggregator.sum(:size) # factor an Sum aggregator on tuple[:size]
2760
+ # Aggregator.sum{ size } # idem but works on any tuple expression
2761
+ #
2762
+ def self.inherited(clazz)
2763
+ basename = Tools.ruby_case(Tools.class_name(clazz))
2764
+ instance_eval <<-EOF
2765
+ def #{basename}(*args, &block)
2766
+ #{clazz}.new(*args, &block)
2767
+ end
2768
+ EOF
2769
+ end
2770
+
2771
+ def self.compile(expr, &block)
2772
+ instance_eval(expr, &block)
2773
+ end
2774
+
2775
+ #
2776
+ # Creates an Aggregator instance.
2777
+ #
2778
+ # This constructor can be used either by passing an attribute
2779
+ # argument or a block that will be evaluated on a TupleHandle
2780
+ # instance set on each aggregated tuple.
2781
+ #
2782
+ # Aggregator.new(:size) # will aggregate on tuple[:size]
2783
+ # Aggregator.new{ size * price } # ... on tuple[:size] * tuple[:price]
2784
+ #
2785
+ def initialize(attribute = nil, options = {}, &block)
2786
+ attribute, options = nil, attribute if attribute.is_a?(Hash)
2787
+ @handle = Tools::TupleHandle.new
2788
+ @options = default_options.merge(options)
2789
+ @functor = Tools::TupleHandle.compile(attribute || block)
2790
+ end
2791
+
2792
+ #
2793
+ # Returns the default options to use
2794
+ #
2795
+ def default_options
2796
+ {}
2797
+ end
2798
+
2799
+ #
2800
+ # Returns the least value, which is the one to use on an empty
2801
+ # set.
2802
+ #
2803
+ # This method is intended to be overriden by subclasses; default
2804
+ # implementation returns nil.
2805
+ #
2806
+ def least
2807
+ nil
2808
+ end
2809
+
2810
+ #
2811
+ # This method is called on each aggregated tuple and must return
2812
+ # an updated _memo_ value. It can be seen as the block typically
2813
+ # given to Enumerable.inject.
2814
+ #
2815
+ # The default implementation collects the pre-value on the tuple
2816
+ # and delegates to _happens.
2817
+ #
2818
+ def happens(memo, tuple)
2819
+ _happens(memo, @handle.set(tuple).evaluate(@functor))
2820
+ end
2821
+
2822
+ #
2823
+ # This method finalizes a computation.
2824
+ #
2825
+ # Argument _memo_ is either _least_ or the result of aggregating
2826
+ # through _happens_. The default implementation simply returns
2827
+ # _memo_. The method is intended to be overriden for complex
2828
+ # aggregations that need statefull information. See Avg for an
2829
+ # example
2830
+ #
2831
+ def finalize(memo)
2832
+ memo
2833
+ end
2834
+
2835
+ #
2836
+ # Aggregates over an enumeration of tuples.
2837
+ #
2838
+ def aggregate(enum)
2839
+ finalize(
2840
+ enum.inject(least){|memo,tuple|
2841
+ happens(memo, tuple)
2842
+ })
2843
+ end
2844
+
2845
+ protected
2846
+
2847
+ #
2848
+ # @see happens.
2849
+ #
2850
+ # This method is intended to be overriden and returns _value_
2851
+ # by default, making this aggregator a "Last" one...
2852
+ #
2853
+ def _happens(memo, value)
2854
+ value
2855
+ end
2856
+
2857
+ #
2858
+ # Defines a COUNT aggregation operator
2859
+ #
2860
+ class Count < Aggregator
2861
+ def least(); 0; end
2862
+ def happens(memo, tuple) memo + 1; end
2863
+ end # class Count
2864
+
2865
+ #
2866
+ # Defines a SUM aggregation operator
2867
+ #
2868
+ class Sum < Aggregator
2869
+ def least(); 0; end
2870
+ def _happens(memo, val) memo + val; end
2871
+ end # class Sum
2872
+
2873
+ #
2874
+ # Defines an AVG aggregation operator
2875
+ #
2876
+ class Avg < Aggregator
2877
+ def least(); [0.0, 0.0]; end
2878
+ def _happens(memo, val) [memo.first + val, memo.last + 1]; end
2879
+ def finalize(memo) memo.first / memo.last end
2880
+ end # class Sum
2881
+
2882
+ #
2883
+ # Defines a MIN aggregation operator
2884
+ #
2885
+ class Min < Aggregator
2886
+ def least(); nil; end
2887
+ def _happens(memo, val)
2888
+ memo.nil? ? val : (memo < val ? memo : val)
2889
+ end
2890
+ end # class Min
2891
+
2892
+ #
2893
+ # Defines a MAX aggregation operator
2894
+ #
2895
+ class Max < Aggregator
2896
+ def least(); nil; end
2897
+ def _happens(memo, val)
2898
+ memo.nil? ? val : (memo > val ? memo : val)
2899
+ end
2900
+ end # class Max
2901
+
2902
+ #
2903
+ # Defines a COLLECT aggregation operator
2904
+ #
2905
+ class Group < Aggregator
2906
+ def initialize(*attrs)
2907
+ super(nil, {}){
2908
+ Tools.tuple_collect(attrs){|k| [k, self.send(k)] }
2909
+ }
2910
+ end
2911
+ def least(); []; end
2912
+ def _happens(memo, val)
2913
+ memo << val
2914
+ end
2915
+ def finalize(memo)
2916
+ memo.uniq
2917
+ end
2918
+ end
2919
+
2920
+ #
2921
+ # Defines a COLLECT aggregation operator
2922
+ #
2923
+ class Collect < Aggregator
2924
+ def least(); []; end
2925
+ def _happens(memo, val)
2926
+ memo << val
2927
+ end
2928
+ end
2929
+
2930
+ #
2931
+ # Defines a CONCAT aggregation operator
2932
+ #
2933
+ class Concat < Aggregator
2934
+ def least(); ""; end
2935
+ def default_options
2936
+ {:before => "", :after => "", :between => ""}
2937
+ end
2938
+ def _happens(memo, val)
2939
+ memo << options[:between].to_s unless memo.empty?
2940
+ memo << val.to_s
2941
+ end
2942
+ def finalize(memo)
2943
+ options[:before].to_s + memo + options[:after].to_s
2944
+ end
2945
+ end
2946
+
2947
+ Lispy::Agg = Aggregator
2948
+ end # class Aggregator
2949
+
2950
+ #
2951
+ # Base class for implementing buffers.
2952
+ #
2953
+ class Buffer
2954
+
2955
+ #
2956
+ # Keeps tuples ordered on a specific key
2957
+ #
2958
+ class Sorted < Buffer
2959
+
2960
+ def initialize(ordering_key)
2961
+ @ordering_key = ordering_key
2962
+ @buffer = []
2963
+ end
2964
+
2965
+ def add_all(enum)
2966
+ sorter = @ordering_key.sorter
2967
+ @buffer = merge_sort(@buffer, enum.to_a.sort(&sorter), sorter)
2968
+ end
2969
+
2970
+ def each
2971
+ @buffer.each(&Proc.new)
2972
+ end
2973
+
2974
+ private
2975
+
2976
+ def merge_sort(s1, s2, sorter)
2977
+ (s1 + s2).sort(&sorter)
2978
+ end
2979
+
2980
+ end # class Buffer::Sorted
2981
+
2982
+ end # class Buffer
2983
+
2984
+ end # module Alf