alf 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. data/CHANGELOG.md +5 -0
  2. data/Gemfile +2 -0
  3. data/Gemfile.lock +42 -0
  4. data/LICENCE.md +22 -0
  5. data/Manifest.txt +15 -0
  6. data/README.md +769 -0
  7. data/Rakefile +23 -0
  8. data/TODO.md +26 -0
  9. data/alf.gemspec +191 -0
  10. data/alf.noespec +30 -0
  11. data/bin/alf +31 -0
  12. data/examples/autonum.alf +6 -0
  13. data/examples/cities.rash +4 -0
  14. data/examples/clip.alf +3 -0
  15. data/examples/compact.alf +2 -0
  16. data/examples/database.alf +6 -0
  17. data/examples/defaults.alf +3 -0
  18. data/examples/extend.alf +3 -0
  19. data/examples/group.alf +3 -0
  20. data/examples/intersect.alf +4 -0
  21. data/examples/join.alf +2 -0
  22. data/examples/minus.alf +8 -0
  23. data/examples/nest.alf +2 -0
  24. data/examples/nulls.rash +3 -0
  25. data/examples/parts.rash +6 -0
  26. data/examples/project.alf +2 -0
  27. data/examples/quota.alf +4 -0
  28. data/examples/rename.alf +3 -0
  29. data/examples/restrict.alf +2 -0
  30. data/examples/runall.sh +26 -0
  31. data/examples/schema.yaml +28 -0
  32. data/examples/sort.alf +4 -0
  33. data/examples/summarize.alf +16 -0
  34. data/examples/suppliers.rash +5 -0
  35. data/examples/supplies.rash +12 -0
  36. data/examples/ungroup.alf +4 -0
  37. data/examples/union.alf +3 -0
  38. data/examples/unnest.alf +4 -0
  39. data/examples/with.alf +23 -0
  40. data/lib/alf.rb +2984 -0
  41. data/lib/alf/loader.rb +1 -0
  42. data/lib/alf/renderer/text.rb +153 -0
  43. data/lib/alf/renderer/yaml.rb +22 -0
  44. data/lib/alf/version.rb +14 -0
  45. data/spec/aggregator_spec.rb +62 -0
  46. data/spec/alf_spec.rb +47 -0
  47. data/spec/assumptions_spec.rb +15 -0
  48. data/spec/environment/explicit_spec.rb +15 -0
  49. data/spec/environment/folder_spec.rb +30 -0
  50. data/spec/examples_spec.rb +26 -0
  51. data/spec/lispy_spec.rb +23 -0
  52. data/spec/operator/command_methods_spec.rb +38 -0
  53. data/spec/operator/non_relational/autonum_spec.rb +61 -0
  54. data/spec/operator/non_relational/clip_spec.rb +49 -0
  55. data/spec/operator/non_relational/compact/buffer_based.rb +30 -0
  56. data/spec/operator/non_relational/compact/sort_based_spec.rb +30 -0
  57. data/spec/operator/non_relational/compact_spec.rb +38 -0
  58. data/spec/operator/non_relational/defaults_spec.rb +55 -0
  59. data/spec/operator/non_relational/sort_spec.rb +66 -0
  60. data/spec/operator/relational/extend_spec.rb +34 -0
  61. data/spec/operator/relational/group_spec.rb +54 -0
  62. data/spec/operator/relational/intersect_spec.rb +58 -0
  63. data/spec/operator/relational/join/hash_based_spec.rb +63 -0
  64. data/spec/operator/relational/minus_spec.rb +56 -0
  65. data/spec/operator/relational/nest_spec.rb +32 -0
  66. data/spec/operator/relational/project_spec.rb +65 -0
  67. data/spec/operator/relational/quota_spec.rb +44 -0
  68. data/spec/operator/relational/rename_spec.rb +32 -0
  69. data/spec/operator/relational/restrict_spec.rb +56 -0
  70. data/spec/operator/relational/summarize/sort_based_spec.rb +31 -0
  71. data/spec/operator/relational/summarize_spec.rb +41 -0
  72. data/spec/operator/relational/ungroup_spec.rb +35 -0
  73. data/spec/operator/relational/union_spec.rb +35 -0
  74. data/spec/operator/relational/unnest_spec.rb +32 -0
  75. data/spec/reader/alf_file_spec.rb +15 -0
  76. data/spec/reader/input.rb +2 -0
  77. data/spec/reader/rash_spec.rb +31 -0
  78. data/spec/reader_spec.rb +27 -0
  79. data/spec/renderer/text/cell_spec.rb +34 -0
  80. data/spec/renderer/text/row_spec.rb +30 -0
  81. data/spec/renderer/text/table_spec.rb +39 -0
  82. data/spec/renderer_spec.rb +42 -0
  83. data/spec/spec_helper.rb +26 -0
  84. data/spec/tools/ordering_key_spec.rb +81 -0
  85. data/spec/tools/projection_key_spec.rb +83 -0
  86. data/spec/tools/tools_spec.rb +25 -0
  87. data/spec/tools/tuple_handle_spec.rb +78 -0
  88. data/tasks/debug_mail.rake +78 -0
  89. data/tasks/debug_mail.txt +13 -0
  90. data/tasks/gem.rake +68 -0
  91. data/tasks/spec_test.rake +79 -0
  92. data/tasks/unit_test.rake +77 -0
  93. data/tasks/yard.rake +51 -0
  94. metadata +282 -0
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env alf
2
+ (sort :suppliers, [:name])
3
+ (sort :suppliers, [:city, :name])
4
+ (sort :suppliers, [[:city, :desc], [:name, :asc]])
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env alf
2
+
3
+ # What is the sum of supplied quantities by supplier?
4
+ (summarize :supplies,
5
+ [:sid],
6
+ :total_qty => Agg::sum(:qty))
7
+
8
+ # Give the maximal supplied quantity by country, taking only into account
9
+ # suppliers that have a status greater than 10
10
+ (summarize \
11
+ (join \
12
+ (join (restrict :suppliers, lambda{ status > 10 }),
13
+ :supplies),
14
+ :cities),
15
+ [:country],
16
+ :maxqty => Agg::sum{ qty })
@@ -0,0 +1,5 @@
1
+ {:sid => 'S1', :name => 'Smith', :status => 20, :city => 'London'}
2
+ {:sid => 'S2', :name => 'Jones', :status => 10, :city => 'Paris'}
3
+ {:sid => 'S3', :name => 'Blake', :status => 30, :city => 'Paris'}
4
+ {:sid => 'S4', :name => 'Clark', :status => 20, :city => 'London'}
5
+ {:sid => 'S5', :name => 'Adams', :status => 30, :city => 'Athens'}
@@ -0,0 +1,12 @@
1
+ {:sid => 'S1', :pid => 'P1', :qty => 300}
2
+ {:sid => 'S1', :pid => 'P2', :qty => 200}
3
+ {:sid => 'S1', :pid => 'P3', :qty => 400}
4
+ {:sid => 'S1', :pid => 'P4', :qty => 200}
5
+ {:sid => 'S1', :pid => 'P5', :qty => 100}
6
+ {:sid => 'S1', :pid => 'P6', :qty => 100}
7
+ {:sid => 'S2', :pid => 'P1', :qty => 300}
8
+ {:sid => 'S2', :pid => 'P2', :qty => 400}
9
+ {:sid => 'S3', :pid => 'P2', :qty => 200}
10
+ {:sid => 'S4', :pid => 'P2', :qty => 200}
11
+ {:sid => 'S4', :pid => 'P4', :qty => 300}
12
+ {:sid => 'S4', :pid => 'P5', :qty => 400}
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env alf
2
+ (ungroup \
3
+ (group :supplies, [:pid, :qty], :supplying),
4
+ :supplying)
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env alf
2
+ (union (project :suppliers, [:city]),
3
+ (project :parts, [:city]))
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env alf
2
+ (unnest \
3
+ (nest :suppliers, [:city, :status], :loc_and_status),
4
+ :loc_and_status)
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env alf
2
+
3
+ # Compute the total qty supplied in each country together with the subset
4
+ # of products shipped there. Only consider suppliers that have a status
5
+ # greater than 10, however.
6
+ (summarize \
7
+ (join \
8
+ (join (restrict :suppliers, lambda{ status > 10 }),
9
+ :supplies),
10
+ :cities),
11
+ [:country],
12
+ :which => Agg::group(:pid),
13
+ :total => Agg::sum{ qty })
14
+
15
+ # Another equivalent way to write it
16
+ with( :kept_suppliers => (restrict :suppliers, lambda{ status > 10 }),
17
+ :with_countries => (join :kept_suppliers, :cities),
18
+ :supplying => (join :with_countries, :supplies) ) do
19
+ (summarize :supplying,
20
+ [:country],
21
+ :which => Agg::group(:pid),
22
+ :total => Agg::sum{ qty })
23
+ end
@@ -0,0 +1,2984 @@
1
+ require "enumerator"
2
+ require "stringio"
3
+ require "alf/version"
4
+ require "alf/loader"
5
+
6
+ #
7
+ # Classy data-manipulation dressed in a DSL (+ commandline)
8
+ #
9
+ module Alf
10
+
11
+ #
12
+ # Provides tooling methods that are used here and there in Alf.
13
+ #
14
+ module Tools
15
+
16
+ #
17
+ # Returns the unqualified name of a ruby class or module
18
+ #
19
+ # Example
20
+ #
21
+ # class_name(Alf::Tools) -> :Tools
22
+ #
23
+ def class_name(clazz)
24
+ clazz.name.to_s =~ /([A-Za-z0-9_]+)$/
25
+ $1.to_sym
26
+ end
27
+
28
+ #
29
+ # Converts an unqualified class or module name to a ruby case method name.
30
+ #
31
+ # Example
32
+ #
33
+ # ruby_case(:Alf) -> "alf"
34
+ # ruby_case(:HelloWorld) -> "hello_world"
35
+ #
36
+ def ruby_case(s)
37
+ s.to_s.gsub(/[A-Z]/){|x| "_#{x.downcase}"}[1..-1]
38
+ end
39
+
40
+ #
41
+ # Returns the first non nil values from arguments
42
+ #
43
+ # Example
44
+ #
45
+ # coalesce(nil, 1, "abc") -> 1
46
+ #
47
+ def coalesce(*args)
48
+ args.find{|x| !x.nil?}
49
+ end
50
+
51
+ #
52
+ # Iterates over enum and yields the block on each element.
53
+ # Collect block results as key/value pairs returns them as
54
+ # a Hash.
55
+ #
56
+ def tuple_collect(enum)
57
+ tuple = {}
58
+ enum.each do |elm|
59
+ k, v = yield(elm)
60
+ tuple[k] = v
61
+ end
62
+ tuple
63
+ end
64
+
65
+ #
66
+ # Provides a handle, implementing a flyweight design pattern on tuples.
67
+ #
68
+ class TupleHandle
69
+
70
+ # Creates an handle instance
71
+ def initialize
72
+ @tuple = nil
73
+ end
74
+
75
+ #
76
+ # Sets the next tuple to use.
77
+ #
78
+ # This method installs the handle as a side effect
79
+ # on first call.
80
+ #
81
+ def set(tuple)
82
+ build(tuple) if @tuple.nil?
83
+ @tuple = tuple
84
+ self
85
+ end
86
+
87
+ #
88
+ # Compiles a tuple expression and returns a lambda
89
+ # instance that can be passed to evaluate later.
90
+ #
91
+ def self.compile(expr)
92
+ case expr
93
+ when Proc
94
+ expr
95
+ when NilClass
96
+ compile('true')
97
+ when Hash
98
+ if expr.empty?
99
+ compile(nil)
100
+ else
101
+ # TODO: replace inspect by to_ruby
102
+ compile expr.each_pair.collect{|k,v|
103
+ "(#{k} == #{v.inspect})"
104
+ }.join(" && ")
105
+ end
106
+ when Array
107
+ compile(Hash[*expr])
108
+ when String, Symbol
109
+ eval("lambda{ #{expr} }")
110
+ else
111
+ raise ArgumentError, "Unable to compile #{expr} to a TupleHandle"
112
+ end
113
+ end
114
+
115
+ #
116
+ # Evaluates an expression on the current tuple. Expression
117
+ # can be a lambda or a string (immediately compiled in the
118
+ # later case).
119
+ #
120
+ def evaluate(expr)
121
+ if RUBY_VERSION < "1.9"
122
+ instance_eval(&TupleHandle.compile(expr))
123
+ else
124
+ instance_exec(&TupleHandle.compile(expr))
125
+ end
126
+ end
127
+
128
+ private
129
+
130
+ #
131
+ # Builds this handle with a tuple.
132
+ #
133
+ # This method should be called only once and installs
134
+ # instance methods on the handle with keys of _tuple_.
135
+ #
136
+ def build(tuple)
137
+ tuple.keys.each do |k|
138
+ (class << self; self; end).send(:define_method, k) do
139
+ @tuple[k]
140
+ end
141
+ end
142
+ end
143
+
144
+ end # class TupleHandle
145
+
146
+ #
147
+ # Defines a projection key
148
+ #
149
+ class ProjectionKey
150
+ include Tools
151
+
152
+ # Projection attributes
153
+ attr_accessor :attributes
154
+
155
+ # Allbut projection?
156
+ attr_accessor :allbut
157
+
158
+ def initialize(attributes, allbut = false)
159
+ @attributes = attributes
160
+ @allbut = allbut
161
+ end
162
+
163
+ def self.coerce(arg)
164
+ case arg
165
+ when Array
166
+ ProjectionKey.new(arg, false)
167
+ when OrderingKey
168
+ ProjectionKey.new(arg.attributes, false)
169
+ when ProjectionKey
170
+ arg
171
+ else
172
+ raise ArgumentError, "Unable to coerce #{arg} to a projection key"
173
+ end
174
+ end
175
+
176
+ def to_ordering_key
177
+ OrderingKey.new attributes.collect{|arg|
178
+ [arg, :asc]
179
+ }
180
+ end
181
+
182
+ def project(tuple)
183
+ split(tuple).first
184
+ end
185
+
186
+ def split(tuple)
187
+ projection, rest = {}, tuple.dup
188
+ attributes.each do |a|
189
+ projection[a] = tuple[a]
190
+ rest.delete(a)
191
+ end
192
+ @allbut ? [rest, projection] : [projection, rest]
193
+ end
194
+
195
+ end # class ProjectionKey
196
+
197
+ #
198
+ # Encapsulates tools for computing orders on tuples
199
+ #
200
+ class OrderingKey
201
+
202
+ attr_reader :ordering
203
+
204
+ def initialize(ordering = [])
205
+ @ordering = ordering
206
+ @sorter = nil
207
+ end
208
+
209
+ def self.coerce(arg)
210
+ case arg
211
+ when Array
212
+ if arg.all?{|a| a.is_a?(Symbol)}
213
+ arg = arg.collect{|a| [a, :asc]}
214
+ end
215
+ OrderingKey.new(arg)
216
+ when ProjectionKey
217
+ arg.to_ordering_key
218
+ when OrderingKey
219
+ arg
220
+ else
221
+ raise ArgumentError, "Unable to coerce #{arg} to an ordering key"
222
+ end
223
+ end
224
+
225
+ def attributes
226
+ @ordering.collect{|arg| arg.first}
227
+ end
228
+
229
+ def order_by(attr, order = :asc)
230
+ @ordering << [attr, order]
231
+ @sorter = nil
232
+ self
233
+ end
234
+
235
+ def order_of(attr)
236
+ @ordering.find{|arg| arg.first == attr}.last
237
+ end
238
+
239
+ def compare(t1,t2)
240
+ @ordering.each do |attr,order|
241
+ comp = (t1[attr] <=> t2[attr])
242
+ comp *= -1 if order == :desc
243
+ return comp unless comp == 0
244
+ end
245
+ return 0
246
+ end
247
+
248
+ def sorter
249
+ @sorter ||= lambda{|t1,t2| compare(t1, t2)}
250
+ end
251
+
252
+ def +(other)
253
+ other = OrderingKey.coerce(other)
254
+ OrderingKey.new(@ordering + other.ordering)
255
+ end
256
+
257
+ end # class OrderingKey
258
+
259
+ extend Tools
260
+ end # module Tools
261
+
262
+ #
263
+ # Builds and returns a lispy engine on a specific environment.
264
+ #
265
+ # Example(s):
266
+ #
267
+ # # Returns a lispy instance on the default environment
268
+ # lispy = Alf.lispy
269
+ #
270
+ # # Returns a lispy instance on the examples' environment
271
+ # lispy = Alf.lispy(Alf::Environment.examples)
272
+ #
273
+ # # Returns a lispy instance on a folder environment of your choice
274
+ # lispy = Alf.lispy(Alf::Environment.folder('path/to/a/folder'))
275
+ #
276
+ # @see Alf::Environment about available environments and their contract
277
+ #
278
+ def self.lispy(env = Alf::Environment.default)
279
+ Command::Main.new(env)
280
+ end
281
+
282
+ #
283
+ # Implements a small LISP-like DSL on top of Alf.
284
+ #
285
+ # The lispy dialect is the functional one used in .alf files and in compiled
286
+ # expressions as below:
287
+ #
288
+ # Alf.lispy.compile do
289
+ # (restrict :suppliers, lambda{ city == 'London' })
290
+ # end
291
+ #
292
+ # The DSL this module provides is part of Alf's public API and won't be broken
293
+ # without a major version change. The module itself and its inclusion pre-
294
+ # conditions are not part of the DSL itself, thus not considered as part of
295
+ # the API, and may therefore evolve at any time. In other words, this module
296
+ # is not intended to be directly included by third-party classes.
297
+ #
298
+ module Lispy
299
+
300
+ # The environment
301
+ attr_accessor :environment
302
+
303
+ #
304
+ # Compiles a query expression given by a String or a block and returns
305
+ # the result (typically a tuple iterator)
306
+ #
307
+ def compile(expr = nil, &block)
308
+ expr.nil? ? instance_eval(&block) : instance_eval(expr)
309
+ end
310
+
311
+ # Delegated to the environment
312
+ def dataset(name)
313
+ raise "Environment not set" unless @environment
314
+ @environment.dataset(name)
315
+ end
316
+
317
+ #
318
+ # Compiles the subexpression given by the block in the context of
319
+ # additional temporary expressions given by definitions
320
+ #
321
+ def with(definitions)
322
+ # We branch with the definitions for compilation
323
+ self.environment = environment.branch(definitions)
324
+
325
+ # this is to ensure that sub definitions can reuse other
326
+ # ones
327
+ definitions.each_value do |defn|
328
+ defn.environment = self.environment
329
+ end
330
+
331
+ # compile now
332
+ op = compile(&Proc.new)
333
+
334
+ # We now unbranch for next expression
335
+ self.environment = environment.unbranch
336
+
337
+ op
338
+ end
339
+
340
+ #
341
+ # Chains some elements as a new operator
342
+ #
343
+ def chain(*elements)
344
+ elements = elements.reverse
345
+ elements[1..-1].inject(elements.first) do |c, elm|
346
+ elm.pipe(c, environment)
347
+ elm
348
+ end
349
+ end
350
+
351
+ [ :Autonum, :Clip, :Compact, :Defaults, :Sort ].each do |op_name|
352
+ meth_name = Tools.ruby_case(op_name).to_sym
353
+ define_method(meth_name) do |child, *args|
354
+ chain(Operator::NonRelational.const_get(op_name).new(*args), child)
355
+ end
356
+ end
357
+
358
+ [:Project,
359
+ :Extend,
360
+ :Rename,
361
+ :Restrict,
362
+ :Nest,
363
+ :Unnest,
364
+ :Group,
365
+ :Ungroup,
366
+ :Summarize,
367
+ :Quota ].each do |op_name|
368
+ meth_name = Tools.ruby_case(op_name).to_sym
369
+ define_method(meth_name) do |child, *args|
370
+ chain(Operator::Relational.const_get(op_name).new(*args), child)
371
+ end
372
+ end
373
+
374
+ def allbut(child, attributes)
375
+ chain(Operator::Relational::Project.new(attributes, true), child)
376
+ end
377
+
378
+ [ :Join,
379
+ :Union,
380
+ :Intersect,
381
+ :Minus ].each do |op_name|
382
+ meth_name = Tools.ruby_case(op_name).to_sym
383
+ define_method(meth_name) do |left, right, *args|
384
+ chain(Operator::Relational.const_get(op_name).new(*args), [left, right])
385
+ end
386
+ end
387
+
388
+ end # module Lispy
389
+
390
+ #
391
+ # Encapsulates the interface with the outside world, providing base iterators
392
+ # for named datasets, among others.
393
+ #
394
+ # An environment is typically obtained through the factory defined by this
395
+ # class:
396
+ #
397
+ # # Returns the default environment (examples, for now)
398
+ # Alf::Environment.default
399
+ #
400
+ # # Returns an environment on Alf's examples
401
+ # Alf::Environment.examples
402
+ #
403
+ # # Returns an environment on a specific folder, automatically
404
+ # # resolving datasources via Readers' recognized file extensions
405
+ # Alf::Environment.folder('path/to/a/folder')
406
+ #
407
+ # You can implement your own environment by subclassing this class and
408
+ # implementing the {#dataset} method. As additional support is implemented
409
+ # in the base class, Environment should never be mimiced.
410
+ #
411
+ class Environment
412
+
413
+ #
414
+ # Returns a dataset whose name is provided.
415
+ #
416
+ # This method resolves named datasets to tuple enumerables. When the
417
+ # dataset exists, this method must return an Iterator, typically a
418
+ # Reader instance. Otherwise, it must throw a NoSuchDatasetError.
419
+ #
420
+ # @param [Symbol] name the name of a dataset
421
+ # @return [Iterator] an iterator, typically a Reader instance
422
+ # @raise [NoSuchDatasetError] when the dataset does not exists
423
+ #
424
+ def dataset(name)
425
+ end
426
+ undef :dataset
427
+
428
+ #
429
+ # Branches this environment and puts some additional explicit
430
+ # definitions.
431
+ #
432
+ # This method is provided for (with ...) expressions and should not
433
+ # be overriden by subclasses.
434
+ #
435
+ # @param [Hash] a set of (name, Iterator) pairs.
436
+ # @return [Environment] an environment instance with new definitions set
437
+ #
438
+ def branch(defs)
439
+ Explicit.new(defs, self)
440
+ end
441
+
442
+ #
443
+ # Specialization of Environment that works with explicitely defined
444
+ # datasources and allow branching and unbranching.
445
+ #
446
+ class Explicit < Environment
447
+
448
+ #
449
+ # Creates a new environment instance with initial definitions
450
+ # and optional child environment.
451
+ #
452
+ def initialize(defs = {}, child = nil)
453
+ @defs = defs
454
+ @child = child
455
+ end
456
+
457
+ #
458
+ # Unbranches this environment and returns its child
459
+ #
460
+ def unbranch
461
+ @child
462
+ end
463
+
464
+ # (see Environment#dataset)
465
+ def dataset(name)
466
+ if @defs.has_key?(name)
467
+ @defs[name]
468
+ elsif @child
469
+ @child.dataset(name)
470
+ else
471
+ raise "No such dataset #{name}"
472
+ end
473
+ end
474
+
475
+ end # class Explicit
476
+
477
+ #
478
+ # Specialization of Environment to work on files of a given folder.
479
+ #
480
+ # This kind of environment resolves datasets by simply looking at
481
+ # recognized files in a specific folder. "Recognized" files are simply
482
+ # those for which a Reader subclass has been previously registered.
483
+ # This environment then serves reader instances.
484
+ #
485
+ class Folder < Environment
486
+
487
+ #
488
+ # Creates an environment instance, wired to the specified folder.
489
+ #
490
+ # @param [String] folder path to the folder to use as dataset source
491
+ #
492
+ def initialize(folder)
493
+ @folder = folder
494
+ end
495
+
496
+ # (see Environment#dataset)
497
+ def dataset(name)
498
+ if file = find_file(name)
499
+ Reader.reader(file, self)
500
+ else
501
+ raise "No such dataset #{name} (#{@folder})"
502
+ end
503
+ end
504
+
505
+ protected
506
+
507
+ def find_file(name)
508
+ # TODO: refactor this, because it allows getting out of the folder
509
+ if File.exists?(name.to_s)
510
+ name.to_s
511
+ elsif File.exists?(explicit = File.join(@folder, name.to_s)) &&
512
+ File.file?(explicit)
513
+ explicit
514
+ else
515
+ Dir[File.join(@folder, "#{name}.*")].find do |f|
516
+ File.file?(f)
517
+ end
518
+ end
519
+ end
520
+
521
+ end # class Folder
522
+
523
+ #
524
+ # Factors a Folder environment on a specific path
525
+ #
526
+ def self.folder(path)
527
+ Folder.new(path)
528
+ end
529
+
530
+ #
531
+ # Returns the default environment
532
+ #
533
+ def self.default
534
+ examples
535
+ end
536
+
537
+ #
538
+ # Returns the examples environment
539
+ #
540
+ def self.examples
541
+ folder File.expand_path('../../examples', __FILE__)
542
+ end
543
+
544
+ end # class Environment
545
+
546
+ #
547
+ # Marker module for all elements implementing tuple iterators.
548
+ #
549
+ # At first glance, an iterator is nothing else than an Enumerable that serves
550
+ # tuples (represented by ruby hashes). However, this module helps Alf's internal
551
+ # classes to recognize enumerables that may safely be considered as tuple
552
+ # iterators from other enumerables. For this reason, all elements that would
553
+ # like to participate to an iteration chain (that is, an logical operator
554
+ # implementation) should be marked with this module. This is the case for
555
+ # all Readers and Operators defined in Alf.
556
+ #
557
+ # Moreover, an Iterator should always define a {#pipe} method, which is the
558
+ # natural way to define the input and execution environment of operators and
559
+ # readers.
560
+ #
561
+ module Iterator
562
+ include Enumerable
563
+
564
+ #
565
+ # Wire the iterator input and an optional execution environment.
566
+ #
567
+ # Iterators (typically Reader and Operator instances) work from input data
568
+ # that come from files, or other operators, and so on. This method wires
569
+ # this input data to the iterator. Wiring is required before any attempt
570
+ # to call each, unless autowiring occurs at construction. The exact kind of
571
+ # input object is left at discretion of Iterator implementations.
572
+ #
573
+ # @param [Object] input the iterator input, at discretion of the Iterator
574
+ # implementation.
575
+ # @param [Environment] environment an optional environment for resolving
576
+ # named datasets if needed.
577
+ #
578
+ def pipe(input, environment = nil)
579
+ end
580
+ undef :pipe
581
+
582
+
583
+ #
584
+ # Coerces something to an iterator
585
+ #
586
+ def self.coerce(arg, env)
587
+ case arg
588
+ when Iterator, Array
589
+ arg
590
+ else
591
+ Reader.coerce(arg, env)
592
+ end
593
+ end
594
+
595
+ end # module Iterator
596
+
597
+ #
598
+ # Implements an Iterator at the interface with the outside world.
599
+ #
600
+ # The contrat of a Reader is simply to be an Iterator. Unlike operators,
601
+ # however, readers are not expected to take other iterators as input, but IO
602
+ # objects, database tables, or something similar instead. This base class
603
+ # provides a default behavior for readers that works with IO objects. It can
604
+ # be safely extended, overriden, or even mimiced (provided that you include
605
+ # and implement the Iterator contract).
606
+ #
607
+ # This class also provides a registration mechanism to help getting Reader
608
+ # instances for specific file extensions. A typical scenario for using this
609
+ # registration mechanism is as follows:
610
+ #
611
+ # # Registers a reader kind named :foo, associated with ".foo" file
612
+ # # extensions and the FooFileDecoder class (typically a subclass of
613
+ # # Reader)
614
+ # Reader.register(:foo, [".foo"], FooFileDecoder)
615
+ #
616
+ # # Later on, you can request a reader instance for a .foo file, as
617
+ # # illustrated below.
618
+ # r = Reader.reader('/a/path/to/a/file.foo')
619
+ #
620
+ # # Also, a factory method is automatically installed on the Reader class
621
+ # # itself. This factory method can be used with a String, or an IO object.
622
+ # r = Reader.foo([a path or a IO object])
623
+ #
624
+ class Reader
625
+ include Iterator
626
+
627
+ # Registered readers
628
+ @@readers = []
629
+
630
+ #
631
+ # Registers a reader class associated with specific file extensions
632
+ #
633
+ # Registered class must provide a constructor with the following signature
634
+ # <code>new(path_or_io, environment = nil)</code>. The name must be a symbol
635
+ # which can safely be used as a ruby method name. A factory class method of
636
+ # that name and same signature is automatically installed on the Reader
637
+ # class.
638
+ #
639
+ # @param [Symbol] name a name for the kind of data decoded
640
+ # @param [Array] extensions file extensions mapped to the registered reader
641
+ # class (should include the '.', e.g. '.foo')
642
+ # @param [Class] class Reader subclass used to decode this kind of files
643
+ #
644
+ def self.register(name, extensions, clazz)
645
+ @@readers << [name, extensions, clazz]
646
+ (class << self; self; end).
647
+ send(:define_method, name) do |*args|
648
+ clazz.new(*args)
649
+ end
650
+ end
651
+
652
+ #
653
+ # Returns a reader instance for a specific file whose path is given
654
+ # as argument.
655
+ #
656
+ # @param [String] filepath path to a file for which extension is recognized
657
+ # @param [Array] args optional additional arguments that must be passed at
658
+ # reader's class new method.
659
+ # @return [Reader] a reader instance
660
+ #
661
+ def self.reader(filepath, *args)
662
+ ext = File.extname(filepath)
663
+ if registered = @@readers.find{|r| r[1].include?(ext)}
664
+ registered[2].new(filepath, *args)
665
+ else
666
+ raise "No registered reader for #{ext} (#{filepath})"
667
+ end
668
+ end
669
+
670
+ #
671
+ # Coerces an argument to a reader, using an optional environment to convert
672
+ # named datasets.
673
+ #
674
+ # This method automatically provides readers for Strings and Symbols through
675
+ # passed environment (**not** through the reader factory) and for IO objects
676
+ # (through Rash reader). It is part if Alf's internals and should be used
677
+ # with care.
678
+ #
679
+ def self.coerce(arg, environment = nil)
680
+ case arg
681
+ when Reader
682
+ arg
683
+ when IO
684
+ rash(arg, environment)
685
+ when String, Symbol
686
+ if environment
687
+ environment.dataset(arg.to_sym)
688
+ else
689
+ raise "No environment set"
690
+ end
691
+ else
692
+ raise ArgumentError, "Unable to coerce #{arg.inspect} to a reader"
693
+ end
694
+ end
695
+
696
+ # @return [Environment] Wired environment
697
+ attr_accessor :environment
698
+
699
+ # @return [String or IO] Input IO, or file name
700
+ attr_accessor :input
701
+
702
+ #
703
+ # Creates a reader instance, with an optional input and environment wiring.
704
+ #
705
+ # @param [String or IO] path to a file or IO object for input
706
+ # @param [Environment] environment wired environment, serving this reader
707
+ #
708
+ def initialize(input = nil, environment = nil)
709
+ @input = input
710
+ @environment = environment
711
+ end
712
+
713
+ #
714
+ # (see Iterator#pipe)
715
+ #
716
+ def pipe(input, env = environment)
717
+ @input = input
718
+ end
719
+
720
+ #
721
+ # (see Iterator#each)
722
+ #
723
+ # @private the default implementation reads lines of the input stream and
724
+ # yields the block with <code>line2tuple(line)</code> on each of them. This
725
+ # method may be overriden if this behavior does not fit reader's needs.
726
+ #
727
+ def each
728
+ each_input_line do |line|
729
+ tuple = line2tuple(line)
730
+ yield tuple unless tuple.nil?
731
+ end
732
+ end
733
+
734
+ protected
735
+
736
+ #
737
+ # Coerces the input object to an IO and yields the block with it.
738
+ #
739
+ # StringIO and IO input are yield directly while file paths are first
740
+ # opened in read mode and then yield.
741
+ #
742
+ def with_input_io
743
+ case input
744
+ when IO, StringIO
745
+ yield input
746
+ when String
747
+ File.open(input, 'r'){|io| yield io}
748
+ else
749
+ raise "Unable to convert #{input} to an IO object"
750
+ end
751
+ end
752
+
753
+ #
754
+ # Returns the whole input text.
755
+ #
756
+ # This feature should only be used by subclasses on inputs that are
757
+ # small enough to fit in memory. Consider implementing readers without this
758
+ # feature on files that could be larger.
759
+ #
760
+ def input_text
761
+ with_input_io{|io| io.readlines.join}
762
+ end
763
+
764
+ #
765
+ # Yields the block with each line of the input text in turn.
766
+ #
767
+ # This method is an helper for files that capture one tuple on each input
768
+ # line. It should be used in those cases, as the resulting reader will not
769
+ # load all input in memory but serve tuples on demand.
770
+ #
771
+ def each_input_line
772
+ with_input_io{|io| io.each_line(&Proc.new)}
773
+ end
774
+
775
+ #
776
+ # Converts a line previously read from the input stream to a tuple.
777
+ #
778
+ # The line is simply ignored is this method return nil. Errors should be
779
+ # properly handled by raising exceptions. This method MUST be implemented
780
+ # by subclasses unless each is overriden.
781
+ #
782
+ def line2tuple(line)
783
+ end
784
+ undef :line2tuple
785
+
786
+ #
787
+ # Specialization of the Reader contract for .rash files.
788
+ #
789
+ # A .rash file/stream contains one ruby hash literal on each line. This
790
+ # reader simply decodes each of them in turn with Kernel.eval, providing a
791
+ # state-less reader (that is, tuples are not all loaded in memory at once).
792
+ #
793
+ class Rash < Reader
794
+
795
+ # (see Reader#line2tuple)
796
+ def line2tuple(line)
797
+ begin
798
+ h = Kernel.eval(line)
799
+ raise "hash expected, got #{h}" unless h.is_a?(Hash)
800
+ rescue Exception => ex
801
+ $stderr << "Skipping #{line.strip}: #{ex.message}\n"
802
+ nil
803
+ else
804
+ return h
805
+ end
806
+ end
807
+
808
+ Reader.register(:rash, [".rash"], self)
809
+ end # class Rash
810
+
811
+ #
812
+ # Specialization of the Reader contrat for .alf files.
813
+ #
814
+ # A .alf file simply contains a query expression in the Lispy DSL. This
815
+ # reader decodes and compiles the expression and delegates the enumeration
816
+ # to the obtained operator.
817
+ #
818
+ # Note that an Environment must be wired at creation or piping time.
819
+ # NoSuchDatasetError will certainly occur otherwise.
820
+ #
821
+ class AlfFile < Reader
822
+
823
+ # (see Reader#each)
824
+ def each
825
+ op = Alf.lispy(environment).compile(input_text)
826
+ op.each(&Proc.new)
827
+ end
828
+
829
+ Reader.register(:alf, [".alf"], self)
830
+ end # module AlfFile
831
+
832
+ end # module Reader
833
+
834
+ #
835
+ # Renders a relation (given by any Iterator) in a specific format.
836
+ #
837
+ # A renderer takes an Iterator instance as input and renders it on an output
838
+ # stream. Renderers are **not** iterators themselves, even if they mimic the
839
+ # {#pipe} method. Their usage is made via the {#execute} method.
840
+ #
841
+ # Similarly to the {Reader} class, this one provides a registration mechanism
842
+ # for specific output formats. The common scenario is as follows:
843
+ #
844
+ # # Register a new renderer for :foo format (automatically provides the
845
+ # # '--foo Render output as a foo stream' option of 'alf show') and with
846
+ # # the FooRenderer class for handling rendering.
847
+ # Renderer.register(:foo, "as a foo stream", FooRenderer)
848
+ #
849
+ # # Later on, you can request a renderer instance for a specific format
850
+ # # as follows (wiring input is optional)
851
+ # r = Renderer.renderer(:foo, [an Iterator])
852
+ #
853
+ # # Also, a factory method is automatically installed on the Renderer class
854
+ # # itself.
855
+ # r = Renderer.foo([an Iterator])
856
+ #
857
+ class Renderer
858
+
859
+ # Registered renderers
860
+ @@renderers = []
861
+
862
+ #
863
+ # Register a renderering class with a given name and description.
864
+ #
865
+ # Registered class must at least provide a constructor with an empty
866
+ # signature. The name must be a symbol which can safely be used as a ruby
867
+ # method name. A factory class method of that name and degelation signature
868
+ # is automatically installed on the Renderer class.
869
+ #
870
+ # @param [Symbol] name a name for the output format
871
+ # @param [String] description an output format description (for 'alf show')
872
+ # @param [Class] clazz Renderer subclass used to render in this format
873
+ #
874
+ def self.register(name, description, clazz)
875
+ @@renderers << [name, description, clazz]
876
+ (class << self; self; end).
877
+ send(:define_method, name) do |*args|
878
+ clazz.new(*args)
879
+ end
880
+ end
881
+
882
+ #
883
+ # Returns a Renderer instance for the given output format name.
884
+ #
885
+ # @param [Symbol] name name of an output format previously registered
886
+ # @param [...] args other arguments to pass to the renderer constructor
887
+ # @return [Renderer] a Renderer instance, already wired if args are
888
+ # provided
889
+ #
890
+ def self.renderer(name, *args)
891
+ if r = @@renderers.find{|triple| triple[0] == name}
892
+ r[2].new(*args)
893
+ else
894
+ raise "No renderer registered for #{name}"
895
+ end
896
+ end
897
+
898
+ #
899
+ # Yields each (name,description,clazz) previously registered in turn
900
+ #
901
+ def self.each_renderer
902
+ @@renderers.each(&Proc.new)
903
+ end
904
+
905
+ # Renderer input (typically an Iterator)
906
+ attr_accessor :input
907
+
908
+ # @return [Environment] Optional wired environment
909
+ attr_accessor :environment
910
+
911
+ #
912
+ # Creates a renderer instance, optionally wired to an input
913
+ #
914
+ def initialize(input = nil)
915
+ @input = input
916
+ end
917
+
918
+ #
919
+ # Sets the renderer input.
920
+ #
921
+ # This method mimics {Iterator#pipe} and have the same contract.
922
+ #
923
+ def pipe(input, env = environment)
924
+ self.environment = env
925
+ self.input = input
926
+ end
927
+
928
+ #
929
+ # Executes the rendering, outputting the resulting tuples on the provided
930
+ # output buffer.
931
+ #
932
+ # The default implementation simply coerces the input as an Iterator and
933
+ # delegates the call to {#render}.
934
+ #
935
+ def execute(output = $stdout)
936
+ render(Iterator.coerce(input, environment), output)
937
+ end
938
+
939
+ protected
940
+
941
+ #
942
+ # Renders tuples served by the iterator to the output buffer provided and
943
+ # returns the latter.
944
+ #
945
+ # This method must be implemented by subclasses unless {#execute} is
946
+ # overriden.
947
+ #
948
+ def render(iterator, output)
949
+ end
950
+ undef :render
951
+
952
+ #
953
+ # Implements the Renderer contract through inspect
954
+ #
955
+ class Rash < Renderer
956
+
957
+ # (see Renderer#render)
958
+ def render(input, output)
959
+ input.each do |tuple|
960
+ output << tuple.inspect << "\n"
961
+ end
962
+ output
963
+ end
964
+
965
+ Renderer.register(:rash, "as ruby hashes", self)
966
+ end # class Rash
967
+
968
+ require "alf/renderer/text"
969
+ require "alf/renderer/yaml"
970
+ end # module Renderer
971
+
972
+ #
973
+ # Provides a factory over Alf operators and handles the interface with
974
+ # Quickl for commandline support.
975
+ #
976
+ # This module is part of Alf's internal architecture and should not be used
977
+ # at all by third-party projects.
978
+ #
979
+ module Factory
980
+
981
+ # @see Quickl::Command
982
+ def Command(file, line)
983
+ Quickl::Command(file, line){|builder|
984
+ builder.command_parent = Alf::Command::Main
985
+ yield(builder) if block_given?
986
+ }
987
+ end
988
+
989
+ # @see Operator
990
+ def Operator(file, line)
991
+ Command(file, line) do |b|
992
+ b.instance_module Alf::Operator
993
+ end
994
+ end
995
+
996
+ extend Factory
997
+ end # module Factory
998
+
999
+ #
1000
+ # Marker module and namespace for Alf main commands, those that are **not**
1001
+ # operators at all.
1002
+ #
1003
+ module Command
1004
+
1005
+ #
1006
+ # alf - Classy data-manipulation dressed in a DSL (+ commandline)
1007
+ #
1008
+ # SYNOPSIS
1009
+ # alf [--version] [--help]
1010
+ # alf -e '(lispy command)'
1011
+ # alf [FILE.alf]
1012
+ # alf [alf opts] OPERATOR [operator opts] ARGS ...
1013
+ # alf help OPERATOR
1014
+ #
1015
+ # OPTIONS
1016
+ # #{summarized_options}
1017
+ #
1018
+ # RELATIONAL COMMANDS
1019
+ # #{summarized_subcommands subcommands.select{|cmd|
1020
+ # cmd.include?(Alf::Operator::Relational)
1021
+ # }}
1022
+ #
1023
+ # NON-RELATIONAL COMMANDS
1024
+ # #{summarized_subcommands subcommands.select{|cmd|
1025
+ # cmd.include?(Alf::Operator::NonRelational)
1026
+ # }}
1027
+ #
1028
+ # OTHER NON-RELATIONAL COMMANDS
1029
+ # #{summarized_subcommands subcommands.select{|cmd|
1030
+ # cmd.include?(Alf::Command)
1031
+ # }}
1032
+ #
1033
+ # See '#{program_name} help COMMAND' for details about a specific command.
1034
+ #
1035
+ class Main < Quickl::Delegator(__FILE__, __LINE__)
1036
+ include Command, Lispy
1037
+
1038
+ # Environment instance to use to get base iterators
1039
+ attr_accessor :environment
1040
+
1041
+ # Output renderer
1042
+ attr_accessor :renderer
1043
+
1044
+ # Creates a command instance
1045
+ def initialize(env = Environment.default)
1046
+ @environment = env
1047
+ end
1048
+
1049
+ # Install options
1050
+ options do |opt|
1051
+ @execute = false
1052
+ opt.on("-e", "--execute", "Execute one line of script (Lispy API)") do
1053
+ @execute = true
1054
+ end
1055
+
1056
+ @renderer = Renderer::Rash.new
1057
+ Renderer.each_renderer do |name,descr,clazz|
1058
+ opt.on("--#{name}", "Render output #{descr}"){
1059
+ @renderer = clazz.new
1060
+ }
1061
+ end
1062
+
1063
+ opt.on('--env=FOLDER',
1064
+ "Set the environment folder to use") do |value|
1065
+ @environment = Environment.folder(value)
1066
+ end
1067
+
1068
+ opt.on_tail('-h', "--help", "Show help") do
1069
+ raise Quickl::Help
1070
+ end
1071
+
1072
+ opt.on_tail('-v', "--version", "Show version") do
1073
+ raise Quickl::Exit, "#{program_name} #{Alf::VERSION}"\
1074
+ " (c) 2011, Bernard Lambeau"
1075
+ end
1076
+ end # Alf's options
1077
+
1078
+ #
1079
+ # Overrided because Quickl only keep --options but modifying it there
1080
+ # should probably be considered a broken API.
1081
+ #
1082
+ def _run(argv = [])
1083
+
1084
+ # 1) Extract my options and parse them
1085
+ my_argv = []
1086
+ while argv.first =~ /^-/
1087
+ my_argv << argv.shift
1088
+ end
1089
+ parse_options(my_argv)
1090
+
1091
+ # 2) build the operator according to -e option
1092
+ operator = if @execute
1093
+ instance_eval(argv.first)
1094
+ else
1095
+ super
1096
+ end
1097
+
1098
+ # 3) if there is a requester, then we do the job (assuming bin/alf)
1099
+ # with the renderer to use. Otherwise, we simply return built operator
1100
+ if operator && requester
1101
+ chain(renderer, operator).execute($stdout)
1102
+ else
1103
+ operator
1104
+ end
1105
+ end
1106
+
1107
+ end
1108
+
1109
+ #
1110
+ # Output input tuples through a specific renderer (text, yaml, ...)
1111
+ #
1112
+ # SYNOPSIS
1113
+ # #{program_name} #{command_name} [DATASET...]
1114
+ #
1115
+ # OPTIONS
1116
+ # #{summarized_options}
1117
+ #
1118
+ # DESCRIPTION
1119
+ #
1120
+ # When dataset names are specified as commandline args, request the environment
1121
+ # to provide those datasets and print them. Otherwise, take what comes on standard
1122
+ # input.
1123
+ #
1124
+ # Note that this command is not an operator and should not be piped anymore.
1125
+ #
1126
+ class Show < Factory::Command(__FILE__, __LINE__)
1127
+ include Command
1128
+
1129
+ options do |opt|
1130
+ @renderer = Renderer::Text.new
1131
+ Renderer.each_renderer do |name,descr,clazz|
1132
+ opt.on("--#{name}", "Render output #{descr}"){
1133
+ @renderer = clazz.new
1134
+ }
1135
+ end
1136
+ end
1137
+
1138
+ def execute(args)
1139
+ requester.renderer = @renderer
1140
+ args = [ $stdin ] if args.empty?
1141
+ requester.chain(*args)
1142
+ end
1143
+
1144
+ end # class Show
1145
+
1146
+ #
1147
+ # Executes an .alf file on current environment
1148
+ #
1149
+ # SYNOPSIS
1150
+ # #{program_name} #{command_name} [FILE]
1151
+ #
1152
+ # OPTIONS
1153
+ # #{summarized_options}
1154
+ #
1155
+ # DESCRIPTION
1156
+ #
1157
+ # This command executes the .alf file passed as first argument (or what comes
1158
+ # on standard input) as a alf query to be executed on the current environment.
1159
+ #
1160
+ class Exec < Factory::Command(__FILE__, __LINE__)
1161
+ include Command
1162
+
1163
+ def execute(args)
1164
+ Reader.alf(args.first || $stdin, requester.environment)
1165
+ end
1166
+
1167
+ end # class Exec
1168
+
1169
+ #
1170
+ # Show help about a specific command
1171
+ #
1172
+ # SYNOPSIS
1173
+ # #{program_name} #{command_name} COMMAND
1174
+ #
1175
+ class Help < Factory::Command(__FILE__, __LINE__)
1176
+ include Command
1177
+
1178
+ # Let NoSuchCommandError be passed to higher stage
1179
+ no_react_to Quickl::NoSuchCommand
1180
+
1181
+ # Command execution
1182
+ def execute(args)
1183
+ if args.size != 1
1184
+ puts super_command.help
1185
+ else
1186
+ cmd = has_command!(args.first, super_command)
1187
+ puts cmd.help
1188
+ end
1189
+ nil
1190
+ end
1191
+
1192
+ end # class Help
1193
+
1194
+ end
1195
+
1196
+ #
1197
+ # Marker for all operators, relational and non-relational ones.
1198
+ #
1199
+ module Operator
1200
+ include Iterator, Tools
1201
+
1202
+ #
1203
+ # Encapsulates method definitions that convert operators to Quickl
1204
+ # commands
1205
+ #
1206
+ module CommandMethods
1207
+
1208
+ protected
1209
+
1210
+ #
1211
+ # Configures the operator from arguments taken from command line.
1212
+ #
1213
+ # This method is intended to be overriden by subclasses and must return the
1214
+ # operator itself.
1215
+ #
1216
+ def set_args(args)
1217
+ self
1218
+ end
1219
+
1220
+ #
1221
+ # Overrides Quickl::Command::Single#_run to handles the '--' separator
1222
+ # correctly.
1223
+ #
1224
+ # This is because parse_options tend to eat the '--' separator... This
1225
+ # could be handled in Quickl itself, but it should be considered a broken
1226
+ # API and will only be available in quickl >= 0.3.0 (probably)
1227
+ #
1228
+ def _run(argv = [])
1229
+ operands, args = split_command_args(argv).collect do |arr|
1230
+ parse_options(arr)
1231
+ end
1232
+ self.set_args(args)
1233
+ if operands = command_line_operands(operands)
1234
+ env = environment || (requester ? requester.environment : nil)
1235
+ self.pipe(operands, env)
1236
+ end
1237
+ self
1238
+ end
1239
+
1240
+ def split_command_args(args)
1241
+ operands, args = case i = args.index("--")
1242
+ when NilClass
1243
+ [args, []]
1244
+ when 0
1245
+ [[ $stdin ], args[1..-1]]
1246
+ else
1247
+ [args[0...i], args[i+1..-1]]
1248
+ end
1249
+ end
1250
+
1251
+ def command_line_operands(operands)
1252
+ operands
1253
+ end
1254
+
1255
+ end # module CommandMethods
1256
+ include CommandMethods
1257
+
1258
+ # Operators input datasets
1259
+ attr_accessor :datasets
1260
+
1261
+ # Optional environment
1262
+ attr_reader :environment
1263
+
1264
+ # Sets the environment on this operator and propagate on
1265
+ # datasets
1266
+ def environment=(env)
1267
+ # this is to avoid infinite loop (TODO: why is there infinite loops??)
1268
+ return if @environment == env
1269
+
1270
+ # set and propagate on children
1271
+ @environment = env
1272
+ datasets.each do |dataset|
1273
+ if dataset.respond_to?(:environment)
1274
+ dataset.environment = env
1275
+ end
1276
+ end if datasets
1277
+
1278
+ env
1279
+ end
1280
+
1281
+ #
1282
+ # Sets the operator input
1283
+ #
1284
+ def pipe(input, env = environment)
1285
+ raise NotImplementedError, "Operator#pipe should be overriden"
1286
+ end
1287
+
1288
+ #
1289
+ # Yields each tuple in turn
1290
+ #
1291
+ # This method is implemented in a way that ensures that all operators are
1292
+ # thread safe. It is not intended to be overriden, use _each instead.
1293
+ #
1294
+ def each
1295
+ op = self.dup
1296
+ op._prepare
1297
+ op._each(&Proc.new)
1298
+ end
1299
+
1300
+ protected
1301
+
1302
+ #
1303
+ # Prepares the iterator before subsequent call to _each.
1304
+ #
1305
+ # This method is intended to be overriden by suclasses to install what's
1306
+ # need for successful iteration. The default implementation does nothing.
1307
+ #
1308
+ def _prepare
1309
+ end
1310
+
1311
+ # Internal implementation of the iterator.
1312
+ #
1313
+ # This method must be implemented by subclasses. It is safe to use instance
1314
+ # variables (typically initialized in _prepare) here.
1315
+ #
1316
+ def _each
1317
+ end
1318
+
1319
+ #
1320
+ # Specialization of Operator for operators that work on a unary input
1321
+ #
1322
+ module Unary
1323
+ include Operator
1324
+
1325
+ #
1326
+ # Sets the operator input
1327
+ #
1328
+ def pipe(input, env = environment)
1329
+ self.environment = env
1330
+ self.datasets = [ input ]
1331
+ end
1332
+
1333
+ protected
1334
+
1335
+ def command_line_operands(operands)
1336
+ operands.first
1337
+ end
1338
+
1339
+ #
1340
+ # Simply returns the first dataset
1341
+ #
1342
+ def input
1343
+ Iterator.coerce(datasets.first, environment)
1344
+ end
1345
+
1346
+ #
1347
+ # Yields the block with each input tuple.
1348
+ #
1349
+ # This method should be preferred to <code>input.each</code> when possible.
1350
+ #
1351
+ def each_input_tuple
1352
+ input.each(&Proc.new)
1353
+ end
1354
+
1355
+ end # module Unary
1356
+
1357
+ #
1358
+ # Specialization of Operator for operators that work on a binary input
1359
+ #
1360
+ module Binary
1361
+ include Operator
1362
+
1363
+ #
1364
+ # Sets the operator input
1365
+ #
1366
+ def pipe(input, env = environment)
1367
+ self.environment = env
1368
+ self.datasets = input
1369
+ end
1370
+
1371
+ protected
1372
+
1373
+ def command_line_operands(operands)
1374
+ (operands.size < 2) ? ([$stdin] + operands) : operands
1375
+ end
1376
+
1377
+ # Returns the left operand
1378
+ def left
1379
+ Iterator.coerce(datasets.first, environment)
1380
+ end
1381
+
1382
+ # Returns the right operand
1383
+ def right
1384
+ Iterator.coerce(datasets.last, environment)
1385
+ end
1386
+
1387
+ end # module Binary
1388
+
1389
+ #
1390
+ # Specialization of Operator for operators that simply convert single tuples
1391
+ # to single tuples.
1392
+ #
1393
+ module Transform
1394
+ include Unary
1395
+
1396
+ protected
1397
+
1398
+ # (see Operator#_each)
1399
+ def _each
1400
+ each_input_tuple do |tuple|
1401
+ yield _tuple2tuple(tuple)
1402
+ end
1403
+ end
1404
+
1405
+ #
1406
+ # Transforms an input tuple to an output tuple
1407
+ #
1408
+ def _tuple2tuple(tuple)
1409
+ end
1410
+
1411
+ end # module Transform
1412
+
1413
+ #
1414
+ # Specialization of Operator for implementing operators that rely on a
1415
+ # cesure algorithm.
1416
+ #
1417
+ module Cesure
1418
+ include Unary
1419
+
1420
+ protected
1421
+
1422
+ # (see Operator#_each)
1423
+ def _each
1424
+ receiver, proj_key, prev_key = Proc.new, cesure_key, nil
1425
+ each_input_tuple do |tuple|
1426
+ cur_key = proj_key.project(tuple)
1427
+ if cur_key != prev_key
1428
+ flush_cesure(prev_key, receiver) unless prev_key.nil?
1429
+ start_cesure(cur_key, receiver)
1430
+ prev_key = cur_key
1431
+ end
1432
+ accumulate_cesure(tuple, receiver)
1433
+ end
1434
+ flush_cesure(prev_key, receiver) unless prev_key.nil?
1435
+ end
1436
+
1437
+ def cesure_key
1438
+ end
1439
+
1440
+ def start_cesure(key, receiver)
1441
+ end
1442
+
1443
+ def accumulate_cesure(tuple, receiver)
1444
+ end
1445
+
1446
+ def flush_cesure(key, receiver)
1447
+ end
1448
+
1449
+ end # module Cesure
1450
+
1451
+ #
1452
+ # Specialization of Operator for operators that are shortcuts for longer
1453
+ # expressions.
1454
+ #
1455
+ module Shortcut
1456
+ include Operator
1457
+
1458
+ #
1459
+ # Sets the operator input
1460
+ #
1461
+ def pipe(input, env = environment)
1462
+ self.environment = env
1463
+ self.datasets = input
1464
+ end
1465
+
1466
+ protected
1467
+
1468
+ # (see Operator#_each)
1469
+ def _each
1470
+ longexpr.each(&Proc.new)
1471
+ end
1472
+
1473
+ #
1474
+ # Compiles the longer expression and returns it.
1475
+ #
1476
+ # @return (Iterator) the compiled longer expression, typically another
1477
+ # Operator instance
1478
+ #
1479
+ def longexpr
1480
+ end
1481
+ undef :longexpr
1482
+
1483
+ #
1484
+ # This is an helper ala Lispy#chain for implementing (#longexpr).
1485
+ #
1486
+ # @param [Array] elements a list of Iterator-able
1487
+ # @return [Operator] the first element of the list, but piped with the
1488
+ # next one, and so on.
1489
+ #
1490
+ def chain(*elements)
1491
+ elements = elements.reverse
1492
+ elements[1..-1].inject(elements.first) do |c, elm|
1493
+ elm.pipe(c, environment)
1494
+ elm
1495
+ end
1496
+ end
1497
+
1498
+ end # module Shortcut
1499
+
1500
+ end # module Operator
1501
+
1502
+ #
1503
+ # Marker module and namespace for non relational operators
1504
+ #
1505
+ module Operator::NonRelational
1506
+
1507
+ #
1508
+ # Extend with an unique autonumber attribute
1509
+ #
1510
+ # SYNOPSIS
1511
+ # #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
1512
+ #
1513
+ # API & EXAMPLE
1514
+ #
1515
+ # # Autonumber suppliers (:autonum attribute name by default)
1516
+ # (autonum :suppliers)
1517
+ #
1518
+ # # You can specify the attribute name
1519
+ # (autonum :suppliers, :unique_id)
1520
+ #
1521
+ # DESCRIPTION
1522
+ #
1523
+ # This operator takes input tuples in any order they come and extends them
1524
+ # with an autonumber attribute ATTRNAME. This allows converting non-relational
1525
+ # tuple enumerators to relational ones by ensuring uniqueness of tuples in an
1526
+ # arbitrary manner.
1527
+ #
1528
+ # alf autonum suppliers
1529
+ # alf autonum suppliers -- unique_id
1530
+ #
1531
+ class Autonum < Factory::Operator(__FILE__, __LINE__)
1532
+ include Operator::NonRelational, Operator::Transform
1533
+
1534
+ # Names of the new attribute to add
1535
+ attr_accessor :attrname
1536
+
1537
+ def initialize(attrname = :autonum)
1538
+ @attrname = attrname
1539
+ end
1540
+
1541
+ protected
1542
+
1543
+ # (see Operator::CommandMethods#set_args)
1544
+ def set_args(args)
1545
+ @attrname = args.last.to_sym unless args.empty?
1546
+ end
1547
+
1548
+ # (see Operator#_prepare)
1549
+ def _prepare
1550
+ @autonum = -1
1551
+ end
1552
+
1553
+ # (see Operator::Transform#_tuple2tuple)
1554
+ def _tuple2tuple(tuple)
1555
+ tuple.merge(@attrname => (@autonum += 1))
1556
+ end
1557
+
1558
+ end # class Autonum
1559
+
1560
+ #
1561
+ # Force default values on missing/nil attributes
1562
+ #
1563
+ # SYNOPSIS
1564
+ # #{program_name} #{command_name} [OPERAND] -- ATTR1 VAL1 ...
1565
+ #
1566
+ # OPTIONS
1567
+ # #{summarized_options}
1568
+ #
1569
+ # API & EXAMPLE
1570
+ #
1571
+ # # Non strict mode
1572
+ # (defaults :suppliers, :country => 'Belgium')
1573
+ #
1574
+ # # Strict mode (--strict)
1575
+ # (defaults :suppliers, {:country => 'Belgium'}, true)
1576
+ #
1577
+ # DESCRIPTION
1578
+ #
1579
+ # This operator rewrites tuples so as to ensure that all values for specified
1580
+ # attributes ATTRx are defined and not nil. Missing or nil attributes are
1581
+ # replaced by the associated default value VALx.
1582
+ #
1583
+ # When used in shell, the hash of default values is built from commandline
1584
+ # arguments ala Hash[...]. However, to keep type safety VALx are interpreted
1585
+ # as ruby literals and built with Kernel.eval. This means that strings must
1586
+ # be doubly quoted. For the example of the API section:
1587
+ #
1588
+ # alf defaults suppliers -- country "'Belgium'"
1589
+ #
1590
+ # When used in --strict mode, the operator simply project resulting tuples on
1591
+ # attributes for which a default value has been specified. Using the strict
1592
+ # mode guarantess that the heading of all tuples is the same, and that no nil
1593
+ # value ever remains. However, this operator never remove duplicates.
1594
+ #
1595
+ class Defaults < Factory::Operator(__FILE__, __LINE__)
1596
+ include Operator::NonRelational, Operator::Transform
1597
+
1598
+ # Default values as a ATTR -> VAL hash
1599
+ attr_accessor :defaults
1600
+
1601
+ # Strict mode?
1602
+ attr_accessor :strict
1603
+
1604
+ # Builds a Defaults operator instance
1605
+ def initialize(defaults = {}, strict = false)
1606
+ @defaults = defaults
1607
+ @strict = strict
1608
+ end
1609
+
1610
+ options do |opt|
1611
+ opt.on('-s', '--strict', 'Strictly restrict to default attributes'){
1612
+ self.strict = true
1613
+ }
1614
+ end
1615
+
1616
+ protected
1617
+
1618
+ # (see Operator::CommandMethods#set_args)
1619
+ def set_args(args)
1620
+ @defaults = tuple_collect(args.each_slice(2)) do |k,v|
1621
+ [k.to_sym, Kernel.eval(v)]
1622
+ end
1623
+ self
1624
+ end
1625
+
1626
+ # (see Operator::Transform#_tuple2tuple)
1627
+ def _tuple2tuple(tuple)
1628
+ if strict
1629
+ tuple_collect(@defaults){|k,v|
1630
+ [k, coalesce(tuple[k], v)]
1631
+ }
1632
+ else
1633
+ @defaults.merge tuple_collect(tuple){|k,v|
1634
+ [k, coalesce(v, @defaults[k])]
1635
+ }
1636
+ end
1637
+ end
1638
+
1639
+ end # class Defaults
1640
+
1641
+ #
1642
+ # Remove tuple duplicates
1643
+ #
1644
+ # SYNOPSIS
1645
+ # #{program_name} #{command_name} [OPERAND]
1646
+ #
1647
+ # API & EXAMPLE
1648
+ #
1649
+ # # clip, unlike project, typically leave duplicates
1650
+ # (compact (clip :suppliers, [ :city ]))
1651
+ #
1652
+ # DESCRIPTION
1653
+ #
1654
+ # This operator remove duplicates from input tuples. As defaults, it is a non
1655
+ # relational operator that helps normalizing input for implementing relational
1656
+ # operators. This one is centric in converting bags of tuples to sets of
1657
+ # tuples, as required by true relations.
1658
+ #
1659
+ # alf compact ...
1660
+ #
1661
+ class Compact < Factory::Operator(__FILE__, __LINE__)
1662
+ include Operator::NonRelational, Operator::Shortcut, Operator::Unary
1663
+
1664
+ # Removes duplicates according to a complete order
1665
+ class SortBased
1666
+ include Operator::Cesure
1667
+
1668
+ def cesure_key
1669
+ @cesure_key ||= ProjectionKey.new([],true)
1670
+ end
1671
+
1672
+ def accumulate_cesure(tuple, receiver)
1673
+ @tuple = tuple
1674
+ end
1675
+
1676
+ def flush_cesure(key, receiver)
1677
+ receiver.call(@tuple)
1678
+ end
1679
+
1680
+ end # class SortBased
1681
+
1682
+ # Removes duplicates by loading all in memory and filtering
1683
+ # them there
1684
+ class BufferBased
1685
+ include Operator::Unary
1686
+
1687
+ def _prepare
1688
+ @tuples = input.to_a.uniq
1689
+ end
1690
+
1691
+ def _each
1692
+ @tuples.each(&Proc.new)
1693
+ end
1694
+
1695
+ end # class BufferBased
1696
+
1697
+ protected
1698
+
1699
+ def longexpr
1700
+ chain BufferBased.new,
1701
+ datasets
1702
+ end
1703
+
1704
+ end # class Compact
1705
+
1706
+ #
1707
+ # Sort input tuples according to an order relation
1708
+ #
1709
+ # SYNOPSIS
1710
+ # #{program_name} #{command_name} [OPERAND] -- ATTR1 ORDER1 ATTR2 ORDER2...
1711
+ #
1712
+ # API & EXAMPLE
1713
+ #
1714
+ # # sort on supplier name in ascending order
1715
+ # (sort :suppliers, [:name])
1716
+ #
1717
+ # # sort on city then on name
1718
+ # (sort :suppliers, [:city, :name])
1719
+ #
1720
+ # # sort on city DESC then on name ASC
1721
+ # (sort :suppliers, [[:city, :desc], [:name, :asc]])
1722
+ #
1723
+ # => See OrderingKey about specifying orderings
1724
+ #
1725
+ # DESCRIPTION
1726
+ #
1727
+ # This operator sorts input tuples on ATTR1 then ATTR2, etc. and outputs
1728
+ # them sorted after that. This is, of course, a non relational operator as
1729
+ # relations are unordered sets. It is provided to implement operators that
1730
+ # need tuples to be sorted to work correctly. When used in shell, the key
1731
+ # ordering must be specified in its longest form:
1732
+ #
1733
+ # alf sort suppliers -- name asc
1734
+ # alf sort suppliers -- city desc name asc
1735
+ #
1736
+ # LIMITATIONS
1737
+ #
1738
+ # The fact that the ordering must be completely specified with commandline
1739
+ # arguments is a limitation, shortcuts could be provided in the future.
1740
+ #
1741
+ class Sort < Factory::Operator(__FILE__, __LINE__)
1742
+ include Operator::NonRelational, Operator::Unary
1743
+
1744
+ def initialize(ordering_key = [])
1745
+ @ordering_key = OrderingKey.coerce(ordering_key)
1746
+ yield self if block_given?
1747
+ end
1748
+
1749
+ def ordering=(ordering)
1750
+ @ordering_key = OrderingKey.coerce(ordering)
1751
+ end
1752
+
1753
+ protected
1754
+
1755
+ def set_args(args)
1756
+ self.ordering = args.collect{|c| c.to_sym}.each_slice(2).to_a
1757
+ self
1758
+ end
1759
+
1760
+ def _prepare
1761
+ @buffer = Buffer::Sorted.new(@ordering_key)
1762
+ @buffer.add_all(input)
1763
+ end
1764
+
1765
+ def _each
1766
+ @buffer.each(&Proc.new)
1767
+ end
1768
+
1769
+ end # class Sort
1770
+
1771
+ #
1772
+ # Clip input tuples to a subset of attributes
1773
+ #
1774
+ # SYNOPSIS
1775
+ # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ...
1776
+ #
1777
+ # OPTIONS
1778
+ # #{summarized_options}
1779
+ #
1780
+ # API & EXAMPLE
1781
+ #
1782
+ # # Keep only name and city attributes
1783
+ # (clip :suppliers, [:name, :city])
1784
+ #
1785
+ # # Keep all but name and city attributes
1786
+ # (clip :suppliers, [:name, :city], true)
1787
+ #
1788
+ # DESCRIPTION
1789
+ #
1790
+ # This operator clips tuples on attributes whose names are specified as
1791
+ # arguments. This is similar to the relational PROJECT operator, expect
1792
+ # that this one does not removed duplicates that can occur from clipping.
1793
+ # In other words, clipping may lead to bags of tuples instead of sets.
1794
+ #
1795
+ # When used in shell, the clipping/projection key is simply taken from
1796
+ # commandline arguments:
1797
+ #
1798
+ # alf clip suppliers -- name city
1799
+ # alf clip suppliers --allbut -- name city
1800
+ #
1801
+ class Clip < Factory::Operator(__FILE__, __LINE__)
1802
+ include Operator::NonRelational, Operator::Transform
1803
+
1804
+ # Builds a Clip operator instance
1805
+ def initialize(attributes = [], allbut = false)
1806
+ @projection_key = ProjectionKey.new(attributes, allbut)
1807
+ yield self if block_given?
1808
+ end
1809
+
1810
+ def attributes=(attrs)
1811
+ @projection_key.attributes = attrs
1812
+ end
1813
+
1814
+ def allbut=(allbut)
1815
+ @projection_key.allbut = allbut
1816
+ end
1817
+
1818
+ # Installs the options
1819
+ options do |opt|
1820
+ opt.on('-a', '--allbut', 'Apply a ALLBUT clipping') do
1821
+ self.allbut = true
1822
+ end
1823
+ end
1824
+
1825
+ protected
1826
+
1827
+ # (see Operator::CommandMethods#set_args)
1828
+ def set_args(args)
1829
+ self.attributes = args.collect{|a| a.to_sym}
1830
+ self
1831
+ end
1832
+
1833
+ # (see Operator::Transform#_tuple2tuple)
1834
+ def _tuple2tuple(tuple)
1835
+ @projection_key.project(tuple)
1836
+ end
1837
+
1838
+ end # class Clip
1839
+
1840
+ end # Operator::NonRelational
1841
+
1842
+ #
1843
+ # Marker module and namespace for relational operators
1844
+ #
1845
+ module Operator::Relational
1846
+
1847
+ #
1848
+ # Relational projection (clip + compact)
1849
+ #
1850
+ # SYNOPSIS
1851
+ # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ...
1852
+ #
1853
+ # OPTIONS
1854
+ # #{summarized_options}
1855
+ #
1856
+ # API & EXAMPLE
1857
+ #
1858
+ # # Project on name and city attributes
1859
+ # (project :suppliers, [:name, :city])
1860
+ #
1861
+ # # Project on all but name and city attributes
1862
+ # (allbut :suppliers, [:name, :city])
1863
+ #
1864
+ # DESCRIPTION
1865
+ #
1866
+ # This operator projects tuples on attributes whose names are specified as
1867
+ # arguments. This is similar to clip, except that this ones is a truly
1868
+ # relational one, that is, it also removes duplicates tuples.
1869
+ #
1870
+ # When used in shell, the clipping/projection key is simply taken from
1871
+ # commandline arguments:
1872
+ #
1873
+ # alf project suppliers -- name city
1874
+ # alf project --allbut suppliers -- name city
1875
+ #
1876
+ class Project < Factory::Operator(__FILE__, __LINE__)
1877
+ include Operator::Relational, Operator::Shortcut, Operator::Unary
1878
+
1879
+ # Builds a Project operator instance
1880
+ def initialize(attributes = [], allbut = false)
1881
+ @projection_key = ProjectionKey.new(attributes, allbut)
1882
+ yield self if block_given?
1883
+ end
1884
+
1885
+ def attributes=(attrs)
1886
+ @projection_key.attributes = attrs
1887
+ end
1888
+
1889
+ def allbut=(allbut)
1890
+ @projection_key.allbut = allbut
1891
+ end
1892
+
1893
+ # Installs the options
1894
+ options do |opt|
1895
+ opt.on('-a', '--allbut', 'Apply a ALLBUT projection') do
1896
+ self.allbut = true
1897
+ end
1898
+ end
1899
+
1900
+ protected
1901
+
1902
+ # (see Operator::CommandMethods#set_args)
1903
+ def set_args(args)
1904
+ self.attributes = args.collect{|a| a.to_sym}
1905
+ self
1906
+ end
1907
+
1908
+ # (see Operator::Shortcut#longexpr)
1909
+ def longexpr
1910
+ chain Operator::NonRelational::Compact.new,
1911
+ Operator::NonRelational::Clip.new(@projection_key.attributes,
1912
+ @projection_key.allbut),
1913
+ datasets
1914
+ end
1915
+
1916
+ end # class Project
1917
+
1918
+ #
1919
+ # Relational extension (additional, computed attributes)
1920
+ #
1921
+ # SYNOPSIS
1922
+ # #{program_name} #{command_name} [OPERAND] -- ATTR1 EXPR1 ATTR2 EXPR2...
1923
+ #
1924
+ # API & EXAMPLE
1925
+ #
1926
+ # (extend :supplies, :sp => lambda{ sid + "/" + pid },
1927
+ # :big => lambda{ qty > 100 ? true : false })
1928
+ #
1929
+ # DESCRIPTION
1930
+ #
1931
+ # This command extend input tuples with new attributes (named ATTR1, ...)
1932
+ # whose value is the result of evaluating tuple expressions (i.e. EXPR1, ...).
1933
+ # See main documentation about the semantics of tuple expressions. When used
1934
+ # in shell, the hash of extensions is built from commandline arguments ala
1935
+ # Hash[...]. Tuple expressions must be specified as code literals there:
1936
+ #
1937
+ # alf extend supplies -- sp 'sid + "/" + pid' big "qty > 100 ? true : false"
1938
+ #
1939
+ # Attributes ATTRx should not already exist, no behavior is guaranteed if
1940
+ # this precondition is not respected.
1941
+ #
1942
+ class Extend < Factory::Operator(__FILE__, __LINE__)
1943
+ include Operator::Relational, Operator::Transform
1944
+
1945
+ # Extensions as a Hash attr => lambda{...}
1946
+ attr_accessor :extensions
1947
+
1948
+ # Builds an Extend operator instance
1949
+ def initialize(extensions = {})
1950
+ @extensions = extensions
1951
+ end
1952
+
1953
+ protected
1954
+
1955
+ # (see Operator::CommandMethods#set_args)
1956
+ def set_args(args)
1957
+ @extensions = tuple_collect(args.each_slice(2)){|k,v|
1958
+ [k.to_sym, TupleHandle.compile(v)]
1959
+ }
1960
+ self
1961
+ end
1962
+
1963
+ # (see Operator#_prepare)
1964
+ def _prepare
1965
+ @handle = TupleHandle.new
1966
+ end
1967
+
1968
+ # (see Operator::Transform#_tuple2tuple)
1969
+ def _tuple2tuple(tuple)
1970
+ tuple.merge tuple_collect(@extensions){|k,v|
1971
+ [k, @handle.set(tuple).evaluate(v)]
1972
+ }
1973
+ end
1974
+
1975
+ end # class Extend
1976
+
1977
+ #
1978
+ # Relational renaming (rename some attributes)
1979
+ #
1980
+ # SYNOPSIS
1981
+ # #{program_name} #{command_name} [OPERAND] -- OLD1 NEW1 ...
1982
+ #
1983
+ # OPTIONS
1984
+ # #{summarized_options}
1985
+ #
1986
+ # API & EXAMPLE
1987
+ #
1988
+ # (rename :suppliers, :name => :supplier_name, :city => :supplier_city)
1989
+ #
1990
+ # DESCRIPTION
1991
+ #
1992
+ # This command renames OLD attributes as NEW as specified by arguments.
1993
+ # Attributes OLD should exist in source tuples while attributes NEW should
1994
+ # not. When used in shell, renaming attributes are built ala Hash[...] from
1995
+ # commandline arguments:
1996
+ #
1997
+ # alf rename suppliers -- name supplier_name city supplier_city
1998
+ #
1999
+ class Rename < Factory::Operator(__FILE__, __LINE__)
2000
+ include Operator::Relational, Operator::Transform
2001
+
2002
+ # Hash of source -> target attribute renamings
2003
+ attr_accessor :renaming
2004
+
2005
+ # Builds a Rename operator instance
2006
+ def initialize(renaming = {})
2007
+ @renaming = renaming
2008
+ end
2009
+
2010
+ protected
2011
+
2012
+ # (see Operator::CommandMethods#set_args)
2013
+ def set_args(args)
2014
+ @renaming = Hash[*args.collect{|c| c.to_sym}]
2015
+ self
2016
+ end
2017
+
2018
+ # (see Operator::Transform#_tuple2tuple)
2019
+ def _tuple2tuple(tuple)
2020
+ tuple_collect(tuple){|k,v| [@renaming[k] || k, v]}
2021
+ end
2022
+
2023
+ end # class Rename
2024
+
2025
+ #
2026
+ # Relational restriction (aka where, predicate filtering)
2027
+ #
2028
+ # SYNOPSIS
2029
+ # #{program_name} #{command_name} [OPERAND] -- EXPR
2030
+ # #{program_name} #{command_name} [OPERAND] -- ATTR1 VAL1 ...
2031
+ #
2032
+ # API & EXAMPLE
2033
+ #
2034
+ # # Restrict to suppliers with status greater than 20
2035
+ # (restrict :suppliers, lambda{ status > 20 })
2036
+ #
2037
+ # # Restrict to suppliers that live in London
2038
+ # (restrict :suppliers, lambda{ city == 'London' })
2039
+ #
2040
+ # DESCRIPTION
2041
+ #
2042
+ # This command restricts tuples to those for which EXPR evaluates to true.
2043
+ # EXPR must be a valid tuple expression that should return a truth-value.
2044
+ # When used in shell, the predicate is taken as a string and compiled with
2045
+ # TupleHandle.compile. We also provide a shortcut for equality expressions.
2046
+ # Note that, in that case, values are expected to be ruby code literals,
2047
+ # evaluated with Kernel.eval. Therefore, strings must be doubly quoted.
2048
+ #
2049
+ # alf restrict suppliers -- "status > 20"
2050
+ # alf restrict suppliers -- city "'London'"
2051
+ #
2052
+ class Restrict < Factory::Operator(__FILE__, __LINE__)
2053
+ include Operator::Relational, Operator::Unary
2054
+
2055
+ # Restriction predicate
2056
+ attr_accessor :predicate
2057
+
2058
+ # Builds a Restrict operator instance
2059
+ def initialize(predicate = "true")
2060
+ @predicate = TupleHandle.compile(predicate)
2061
+ yield self if block_given?
2062
+ end
2063
+
2064
+ protected
2065
+
2066
+ # (see Operator::CommandMethods#set_args)
2067
+ def set_args(args)
2068
+ @predicate = if args.size > 1
2069
+ TupleHandle.compile tuple_collect(args.each_slice(2)){|a,expr|
2070
+ [a, Kernel.eval(expr)]
2071
+ }
2072
+ else
2073
+ TupleHandle.compile(args.first)
2074
+ end
2075
+ self
2076
+ end
2077
+
2078
+ # (see Operator#_each)
2079
+ def _each
2080
+ handle = TupleHandle.new
2081
+ each_input_tuple{|t| yield(t) if handle.set(t).evaluate(@predicate) }
2082
+ end
2083
+
2084
+ end # class Restrict
2085
+
2086
+ #
2087
+ # Relational join (and cross-join)
2088
+ #
2089
+ # SYNOPSIS
2090
+ # #{program_name} #{command_name} [LEFT] RIGHT
2091
+ #
2092
+ # API & EXAMPLE
2093
+ #
2094
+ # (join :suppliers, :parts)
2095
+ #
2096
+ # DESCRIPTION
2097
+ #
2098
+ # This operator computes the (natural) join of two input iterators. Natural
2099
+ # join means that, unlike what is commonly used in SQL, the default behavior
2100
+ # is to join on common attributes. You can use the rename operator if this
2101
+ # behavior does not fit your needs.
2102
+ #
2103
+ # alf join suppliers supplies
2104
+ #
2105
+ class Join < Factory::Operator(__FILE__, __LINE__)
2106
+ include Operator::Relational, Operator::Shortcut, Operator::Binary
2107
+
2108
+ class HashBased
2109
+ include Operator::Binary
2110
+
2111
+ class JoinBuffer
2112
+
2113
+ def initialize(enum)
2114
+ @buffer = nil
2115
+ @key = nil
2116
+ @enum = enum
2117
+ end
2118
+
2119
+ def split(tuple)
2120
+ _init(tuple) unless @key
2121
+ @key.split(tuple)
2122
+ end
2123
+
2124
+ def each(key)
2125
+ @buffer[key].each(&Proc.new) if @buffer.has_key?(key)
2126
+ end
2127
+
2128
+ private
2129
+
2130
+ def _init(right)
2131
+ @buffer = Hash.new{|h,k| h[k] = []}
2132
+ @enum.each do |left|
2133
+ @key = Tools::ProjectionKey.coerce(left.keys & right.keys) unless @key
2134
+ @buffer[@key.project(left)] << left
2135
+ end
2136
+ end
2137
+
2138
+ end
2139
+
2140
+ protected
2141
+
2142
+ def _each
2143
+ buffer = JoinBuffer.new(right)
2144
+ left.each do |left_tuple|
2145
+ key, rest = buffer.split(left_tuple)
2146
+ buffer.each(key) do |right|
2147
+ yield(left_tuple.merge(right))
2148
+ end
2149
+ end
2150
+ end
2151
+
2152
+ end
2153
+
2154
+ protected
2155
+
2156
+ # (see Shortcut#longexpr)
2157
+ def longexpr
2158
+ chain HashBased.new,
2159
+ datasets
2160
+ end
2161
+
2162
+ end # class Join
2163
+
2164
+ #
2165
+ # Relational intersection (aka a logical and)
2166
+ #
2167
+ # SYNOPSIS
2168
+ # #{program_name} #{command_name} [LEFT] RIGHT
2169
+ #
2170
+ # API & EXAMPLE
2171
+ #
2172
+ # # Give suppliers that live in Paris and have status >= 20
2173
+ # (intersect \\
2174
+ # (restrict :suppliers, lambda{ status >= 20 }),
2175
+ # (restrict :suppliers, lambda{ city == 'Paris' }))
2176
+ #
2177
+ # DESCRIPTION
2178
+ #
2179
+ # This operator computes the intersection between its two operands. The
2180
+ # intersection is simply the set of common tuples between them. Both operands
2181
+ # must have the same heading.
2182
+ #
2183
+ # alf intersect ... ...
2184
+ #
2185
+ class Intersect < Factory::Operator(__FILE__, __LINE__)
2186
+ include Operator::Relational, Operator::Shortcut, Operator::Binary
2187
+
2188
+ class HashBased
2189
+ include Operator::Binary
2190
+
2191
+ protected
2192
+
2193
+ def _prepare
2194
+ @index = Hash.new
2195
+ right.each{|t| @index[t] = true}
2196
+ end
2197
+
2198
+ def _each
2199
+ left.each do |left_tuple|
2200
+ yield(left_tuple) if @index.has_key?(left_tuple)
2201
+ end
2202
+ end
2203
+
2204
+ end
2205
+
2206
+ protected
2207
+
2208
+ # (see Shortcut#longexpr)
2209
+ def longexpr
2210
+ chain HashBased.new,
2211
+ datasets
2212
+ end
2213
+
2214
+ end # class Intersect
2215
+
2216
+ #
2217
+ # Relational minus (aka difference)
2218
+ #
2219
+ # SYNOPSIS
2220
+ # #{program_name} #{command_name} [LEFT] RIGHT
2221
+ #
2222
+ # API & EXAMPLE
2223
+ #
2224
+ # # Give all suppliers but those living in Paris
2225
+ # (minus :suppliers,
2226
+ # (restrict :suppliers, lambda{ city == 'Paris' }))
2227
+ #
2228
+ # DESCRIPTION
2229
+ #
2230
+ # This operator computes the difference between its two operands. The
2231
+ # difference is simply the set of tuples in left operands non shared by
2232
+ # the right one.
2233
+ #
2234
+ # alf minus ... ...
2235
+ #
2236
+ class Minus < Factory::Operator(__FILE__, __LINE__)
2237
+ include Operator::Relational, Operator::Shortcut, Operator::Binary
2238
+
2239
+ class HashBased
2240
+ include Operator::Binary
2241
+
2242
+ protected
2243
+
2244
+ def _prepare
2245
+ @index = Hash.new
2246
+ right.each{|t| @index[t] = true}
2247
+ end
2248
+
2249
+ def _each
2250
+ left.each do |left_tuple|
2251
+ yield(left_tuple) unless @index.has_key?(left_tuple)
2252
+ end
2253
+ end
2254
+
2255
+ end
2256
+
2257
+ protected
2258
+
2259
+ # (see Shortcut#longexpr)
2260
+ def longexpr
2261
+ chain HashBased.new,
2262
+ datasets
2263
+ end
2264
+
2265
+ end # class Minus
2266
+
2267
+ #
2268
+ # Relational union
2269
+ #
2270
+ # SYNOPSIS
2271
+ # #{program_name} #{command_name} [LEFT] RIGHT
2272
+ #
2273
+ # API & EXAMPLE
2274
+ #
2275
+ # (union (project :suppliers, [:city]),
2276
+ # (project :parts, [:city]))
2277
+ #
2278
+ # DESCRIPTION
2279
+ #
2280
+ # This operator computes the union join of two input iterators. Input
2281
+ # iterators should have the same heading. The result never contain duplicates.
2282
+ #
2283
+ # alf union ... ...
2284
+ #
2285
+ class Union < Factory::Operator(__FILE__, __LINE__)
2286
+ include Operator::Relational, Operator::Shortcut, Operator::Binary
2287
+
2288
+ class DisjointBased
2289
+ include Operator::Binary
2290
+
2291
+ protected
2292
+
2293
+ def _each
2294
+ left.each(&Proc.new)
2295
+ right.each(&Proc.new)
2296
+ end
2297
+
2298
+ end
2299
+
2300
+ protected
2301
+
2302
+ # (see Shortcut#longexpr)
2303
+ def longexpr
2304
+ chain Operator::NonRelational::Compact.new,
2305
+ DisjointBased.new,
2306
+ datasets
2307
+ end
2308
+
2309
+ end # class Union
2310
+
2311
+ #
2312
+ # Relational nesting (tuple-valued attributes)
2313
+ #
2314
+ # SYNOPSIS
2315
+ # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ... NEWNAME
2316
+ #
2317
+ # API & EXAMPLE
2318
+ #
2319
+ # (nest :suppliers, [:city, :status], :loc_and_status)
2320
+ #
2321
+ # DESCRIPTION
2322
+ #
2323
+ # This operator nests attributes ATTR1 to ATTRN as a new, tuple-based
2324
+ # attribute whose name is NEWNAME. When used in shell, names of nested
2325
+ # attributes are taken from commandline arguments, expected the last one
2326
+ # which defines the new name to use:
2327
+ #
2328
+ # alf nest suppliers -- city status loc_and_status
2329
+ #
2330
+ class Nest < Factory::Operator(__FILE__, __LINE__)
2331
+ include Operator::Relational, Operator::Transform
2332
+
2333
+ # Array of nesting attributes
2334
+ attr_accessor :attributes
2335
+
2336
+ # New name for the nested attribute
2337
+ attr_accessor :as
2338
+
2339
+ # Builds a Nest operator instance
2340
+ def initialize(attributes = [], as = :nested)
2341
+ @attributes = attributes
2342
+ @as = as
2343
+ end
2344
+
2345
+ protected
2346
+
2347
+ # (see Operator::CommandMethods#set_args)
2348
+ def set_args(args)
2349
+ @as = args.pop.to_sym
2350
+ @attributes = args.collect{|a| a.to_sym}
2351
+ self
2352
+ end
2353
+
2354
+ # (see Operator::Transform#_tuple2tuple)
2355
+ def _tuple2tuple(tuple)
2356
+ others = tuple_collect(tuple.keys - @attributes){|k| [k,tuple[k]] }
2357
+ others[as] = tuple_collect(attributes){|k| [k, tuple[k]] }
2358
+ others
2359
+ end
2360
+
2361
+ end # class Nest
2362
+
2363
+ #
2364
+ # Relational un-nesting (inverse of nest)
2365
+ #
2366
+ # SYNOPSIS
2367
+ # #{program_name} #{command_name} [OPERAND] -- ATTR
2368
+ #
2369
+ # API & EXAMPLE
2370
+ #
2371
+ # # Assuming nested = (nest :suppliers, [:city, :status], :loc_and_status)
2372
+ # (unnest nested, :loc_and_status)
2373
+ #
2374
+ # DESCRIPTION
2375
+ #
2376
+ # This operator unnests the tuple-valued attribute named ATTR so as to
2377
+ # flatten its pairs with 'upstream' tuple. The latter should be such so that
2378
+ # no name collision occurs. When used in shell, the name of the attribute to
2379
+ # unnest is taken as the first commandline argument:
2380
+ #
2381
+ # alf unnest nest -- loc_and_status
2382
+ #
2383
+ class Unnest < Factory::Operator(__FILE__, __LINE__)
2384
+ include Operator::Relational, Operator::Transform
2385
+
2386
+ # Name of the attribute to unnest
2387
+ attr_accessor :attribute
2388
+
2389
+ # Builds a Rename operator instance
2390
+ def initialize(attribute = :nested)
2391
+ @attribute = attribute
2392
+ end
2393
+
2394
+ protected
2395
+
2396
+ # (see Operator::CommandMethods#set_args)
2397
+ def set_args(args)
2398
+ @attribute = args.first.to_sym
2399
+ self
2400
+ end
2401
+
2402
+ # (see Operator::Transform#_tuple2tuple)
2403
+ def _tuple2tuple(tuple)
2404
+ tuple = tuple.dup
2405
+ nested = tuple.delete(@attribute) || {}
2406
+ tuple.merge(nested)
2407
+ end
2408
+
2409
+ end # class Unnest
2410
+
2411
+ #
2412
+ # Relational grouping (relation-valued attributes)
2413
+ #
2414
+ # SYNOPSIS
2415
+ # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ... NEWNAME
2416
+ #
2417
+ # API & EXAMPLE
2418
+ #
2419
+ # (group :supplies, [:pid, :qty], :supplying)
2420
+ # (group :supplies, [:sid], :supplying, true)
2421
+ #
2422
+ # DESCRIPTION
2423
+ #
2424
+ # This operator groups attributes ATTR1 to ATTRN as a new, relation-valued
2425
+ # attribute whose name is NEWNAME. When used in shell, names of grouped
2426
+ # attributes are taken from commandline arguments, expected the last one
2427
+ # which defines the new name to use:
2428
+ #
2429
+ # alf group supplies -- pid qty supplying
2430
+ # alf group supplies --allbut -- sid supplying
2431
+ #
2432
+ class Group < Factory::Operator(__FILE__, __LINE__)
2433
+ include Operator::Relational, Operator::Unary
2434
+
2435
+ # Attributes on which grouping applies
2436
+ attr_accessor :attributes
2437
+
2438
+ # Attribute name for grouping tuple
2439
+ attr_accessor :as
2440
+
2441
+ # Group all but attributes?
2442
+ attr_accessor :allbut
2443
+
2444
+ # Creates a Group instance
2445
+ def initialize(attributes = [], as = :group, allbut = false)
2446
+ @attributes = attributes
2447
+ @as = as
2448
+ @allbut = allbut
2449
+ end
2450
+
2451
+ options do |opt|
2452
+ opt.on('--allbut', "Group all but specified attributes"){ @allbut = true }
2453
+ end
2454
+
2455
+ protected
2456
+
2457
+ # (see Operator::CommandMethods#set_args)
2458
+ def set_args(args)
2459
+ @as = args.pop.to_sym
2460
+ @attributes = args.collect{|a| a.to_sym}
2461
+ self
2462
+ end
2463
+
2464
+ # See Operator#_prepare
2465
+ def _prepare
2466
+ pkey = ProjectionKey.new(attributes, !allbut)
2467
+ @index = Hash.new{|h,k| h[k] = []}
2468
+ each_input_tuple do |tuple|
2469
+ key, rest = pkey.split(tuple)
2470
+ @index[key] << rest
2471
+ end
2472
+ end
2473
+
2474
+ # See Operator#_each
2475
+ def _each
2476
+ @index.each_pair do |k,v|
2477
+ yield(k.merge(@as => v))
2478
+ end
2479
+ end
2480
+
2481
+ end # class Group
2482
+
2483
+ #
2484
+ # Relational un-grouping (inverse of group)
2485
+ #
2486
+ # SYNOPSIS
2487
+ # #{program_name} #{command_name} [OPERAND] -- ATTR
2488
+ #
2489
+ # API & EXAMPLE
2490
+ #
2491
+ # # Assuming grouped = (group enum, [:pid, :qty], :supplying)
2492
+ # (ungroup grouped, :supplying)
2493
+ #
2494
+ # DESCRIPTION
2495
+ #
2496
+ # This operator ungroups the relation-valued attribute named ATTR and outputs
2497
+ # tuples as the flattening of each of of its tuples merged with the upstream
2498
+ # one. Sub relation should be such so that no name collision occurs. When
2499
+ # used in shell, the name of the attribute to ungroup is taken as the first
2500
+ # commandline argument:
2501
+ #
2502
+ # alf ungroup group -- supplying
2503
+ #
2504
+ class Ungroup < Factory::Operator(__FILE__, __LINE__)
2505
+ include Operator::Relational, Operator::Unary
2506
+
2507
+ # Relation-value attribute to ungroup
2508
+ attr_accessor :attribute
2509
+
2510
+ # Creates a Group instance
2511
+ def initialize(attribute = :grouped)
2512
+ @attribute = attribute
2513
+ end
2514
+
2515
+ protected
2516
+
2517
+ # (see Operator::CommandMethods#set_args)
2518
+ def set_args(args)
2519
+ @attribute = args.pop.to_sym
2520
+ self
2521
+ end
2522
+
2523
+ # See Operator#_each
2524
+ def _each
2525
+ each_input_tuple do |tuple|
2526
+ tuple = tuple.dup
2527
+ subrel = tuple.delete(@attribute)
2528
+ subrel.each do |subtuple|
2529
+ yield(tuple.merge(subtuple))
2530
+ end
2531
+ end
2532
+ end
2533
+
2534
+ end # class Ungroup
2535
+
2536
+ #
2537
+ # Relational summarization (group-by + aggregate ops)
2538
+ #
2539
+ # SYNOPSIS
2540
+ # #{program_name} #{command_name} [OPERAND] --by=KEY1,KEY2... -- AGG1 EXPR1...
2541
+ #
2542
+ # OPTIONS
2543
+ # #{summarized_options}
2544
+ #
2545
+ # API & EXAMPLE
2546
+ #
2547
+ # (summarize :supplies, [:sid],
2548
+ # :total_qty => Aggregator.sum(:qty))
2549
+ #
2550
+ # DESCRIPTION
2551
+ #
2552
+ # This operator summarizes input tuples on the projection on KEY1,KEY2,...
2553
+ # attributes and applies aggregate operators on sets of matching tuples.
2554
+ # Introduced names AGG should be disjoint from KEY attributes.
2555
+ #
2556
+ # When used in shell, the aggregations are taken from commandline arguments
2557
+ # AGG and EXPR, where AGG is the name of a new attribute and EXPR is an
2558
+ # aggregation expression evaluated on Aggregator:
2559
+ #
2560
+ # alf summarize supplies --by=sid -- total_qty "sum(:qty)"
2561
+ #
2562
+ class Summarize < Factory::Operator(__FILE__, __LINE__)
2563
+ include Operator::Relational, Operator::Shortcut, Operator::Unary
2564
+
2565
+ # By attributes
2566
+ attr_accessor :by
2567
+
2568
+ # Aggregations as a AGG => Aggregator(EXPR) hash
2569
+ attr_accessor :aggregators
2570
+
2571
+ def initialize(by = [], aggregators = {})
2572
+ @by = by
2573
+ @aggregators = aggregators
2574
+ end
2575
+
2576
+ # Installs the options
2577
+ options do |opt|
2578
+ opt.on('--by=x,y,z', 'Specify by attributes', Array) do |args|
2579
+ @by = args.collect{|a| a.to_sym}
2580
+ end
2581
+ end
2582
+
2583
+ # Summarizes according to a complete order
2584
+ class SortBased
2585
+ include Alf::Operator::Cesure
2586
+
2587
+ attr_reader :cesure_key
2588
+ attr_reader :aggregators
2589
+
2590
+ def initialize(by_key, aggregators)
2591
+ @cesure_key, @aggregators = by_key, aggregators
2592
+ end
2593
+
2594
+ protected
2595
+
2596
+ def start_cesure(key, receiver)
2597
+ @aggs = tuple_collect(@aggregators) do |a,agg|
2598
+ [a, agg.least]
2599
+ end
2600
+ end
2601
+
2602
+ def accumulate_cesure(tuple, receiver)
2603
+ @aggs = tuple_collect(@aggregators) do |a,agg|
2604
+ [a, agg.happens(@aggs[a], tuple)]
2605
+ end
2606
+ end
2607
+
2608
+ def flush_cesure(key, receiver)
2609
+ @aggs = tuple_collect(@aggregators) do |a,agg|
2610
+ [a, agg.finalize(@aggs[a])]
2611
+ end
2612
+ receiver.call key.merge(@aggs)
2613
+ end
2614
+
2615
+ end # class SortBased
2616
+
2617
+ protected
2618
+
2619
+ # (see Operator::CommandMethods#set_args)
2620
+ def set_args(args)
2621
+ @aggregators = tuple_collect(args.each_slice(2)) do |a,expr|
2622
+ [a.to_sym, Aggregator.compile(expr)]
2623
+ end
2624
+ self
2625
+ end
2626
+
2627
+ def longexpr
2628
+ by_key = Tools::ProjectionKey.new(@by, false)
2629
+ chain SortBased.new(by_key, @aggregators),
2630
+ Operator::NonRelational::Sort.new(by_key.to_ordering_key),
2631
+ datasets
2632
+ end
2633
+
2634
+ end # class Summarize
2635
+
2636
+ #
2637
+ # Relational quota-queries (position, sum progression, etc.)
2638
+ #
2639
+ # SYNOPSIS
2640
+ # #{program_name} #{command_name} [OPERAND] --by=KEY1,... --order=OR1... AGG1 EXPR1...
2641
+ #
2642
+ # OPTIONS
2643
+ # #{summarized_options}
2644
+ #
2645
+ # API & EXAMPLE
2646
+ #
2647
+ # (quota :supplies, [:sid], [:qty],
2648
+ # :position => Aggregator.count,
2649
+ # :sum_qty => Aggregator.sum(:qty))
2650
+ #
2651
+ # DESCRIPTION
2652
+ #
2653
+ # This operator computes quota values on input tuples.
2654
+ #
2655
+ # alf quota supplies --by=sid --order=qty -- position count sum_qty "sum(:qty)"
2656
+ #
2657
+ class Quota < Factory::Operator(__FILE__, __LINE__)
2658
+ include Operator::Relational, Operator::Shortcut, Operator::Unary
2659
+
2660
+ # Quota by
2661
+ attr_accessor :by
2662
+
2663
+ # Quota order
2664
+ attr_accessor :order
2665
+
2666
+ # Quota aggregations
2667
+ attr_accessor :aggregators
2668
+
2669
+ def initialize(by = [], order = [], aggregators = {})
2670
+ @by, @order, @aggregators = by, order, aggregators
2671
+ end
2672
+
2673
+ options do |opt|
2674
+ opt.on('--by=x,y,z', 'Specify by attributes', Array) do |args|
2675
+ @by = args.collect{|a| a.to_sym}
2676
+ end
2677
+ opt.on('--order=x,y,z', 'Specify order attributes', Array) do |args|
2678
+ @order = args.collect{|a| a.to_sym}
2679
+ end
2680
+ end
2681
+
2682
+ class SortBased
2683
+ include Operator::Cesure
2684
+
2685
+ def initialize(by, order, aggregators)
2686
+ @by, @order, @aggregators = by, order, aggregators
2687
+ end
2688
+
2689
+ def cesure_key
2690
+ ProjectionKey.coerce @by
2691
+ end
2692
+
2693
+ def ordering_key
2694
+ OrderingKey.coerce @order
2695
+ end
2696
+
2697
+ def start_cesure(key, receiver)
2698
+ @aggs = tuple_collect(@aggregators) do |a,agg|
2699
+ [a, agg.least]
2700
+ end
2701
+ end
2702
+
2703
+ def accumulate_cesure(tuple, receiver)
2704
+ @aggs = tuple_collect(@aggregators) do |a,agg|
2705
+ [a, agg.happens(@aggs[a], tuple)]
2706
+ end
2707
+ thisone = tuple_collect(@aggregators) do |a,agg|
2708
+ [a, agg.finalize(@aggs[a])]
2709
+ end
2710
+ receiver.call tuple.merge(thisone)
2711
+ end
2712
+
2713
+ end # class SortBased
2714
+
2715
+ protected
2716
+
2717
+ # (see Operator::CommandMethods#set_args)
2718
+ def set_args(args)
2719
+ @aggregators = tuple_collect(args.each_slice(2)) do |a,expr|
2720
+ [a.to_sym, Aggregator.compile(expr)]
2721
+ end
2722
+ self
2723
+ end
2724
+
2725
+ def cesure_key
2726
+ ProjectionKey.coerce @by
2727
+ end
2728
+
2729
+ def ordering_key
2730
+ OrderingKey.coerce @order
2731
+ end
2732
+
2733
+ def longexpr
2734
+ sort_key = cesure_key.to_ordering_key + ordering_key
2735
+ chain SortBased.new(@by, @order, @aggregators),
2736
+ Operator::NonRelational::Sort.new(sort_key),
2737
+ datasets
2738
+ end
2739
+
2740
+ end # class Quota
2741
+
2742
+ end
2743
+
2744
+ #
2745
+ # Aggregation operator.
2746
+ #
2747
+ class Aggregator
2748
+
2749
+ # Aggregate options
2750
+ attr_reader :options
2751
+
2752
+ #
2753
+ # Automatically installs factory methods for inherited classes.
2754
+ #
2755
+ # Example:
2756
+ # class Sum < Aggregate # will give a method Aggregator.sum
2757
+ # ...
2758
+ # end
2759
+ # Aggregator.sum(:size) # factor an Sum aggregator on tuple[:size]
2760
+ # Aggregator.sum{ size } # idem but works on any tuple expression
2761
+ #
2762
+ def self.inherited(clazz)
2763
+ basename = Tools.ruby_case(Tools.class_name(clazz))
2764
+ instance_eval <<-EOF
2765
+ def #{basename}(*args, &block)
2766
+ #{clazz}.new(*args, &block)
2767
+ end
2768
+ EOF
2769
+ end
2770
+
2771
+ def self.compile(expr, &block)
2772
+ instance_eval(expr, &block)
2773
+ end
2774
+
2775
+ #
2776
+ # Creates an Aggregator instance.
2777
+ #
2778
+ # This constructor can be used either by passing an attribute
2779
+ # argument or a block that will be evaluated on a TupleHandle
2780
+ # instance set on each aggregated tuple.
2781
+ #
2782
+ # Aggregator.new(:size) # will aggregate on tuple[:size]
2783
+ # Aggregator.new{ size * price } # ... on tuple[:size] * tuple[:price]
2784
+ #
2785
+ def initialize(attribute = nil, options = {}, &block)
2786
+ attribute, options = nil, attribute if attribute.is_a?(Hash)
2787
+ @handle = Tools::TupleHandle.new
2788
+ @options = default_options.merge(options)
2789
+ @functor = Tools::TupleHandle.compile(attribute || block)
2790
+ end
2791
+
2792
+ #
2793
+ # Returns the default options to use
2794
+ #
2795
+ def default_options
2796
+ {}
2797
+ end
2798
+
2799
+ #
2800
+ # Returns the least value, which is the one to use on an empty
2801
+ # set.
2802
+ #
2803
+ # This method is intended to be overriden by subclasses; default
2804
+ # implementation returns nil.
2805
+ #
2806
+ def least
2807
+ nil
2808
+ end
2809
+
2810
+ #
2811
+ # This method is called on each aggregated tuple and must return
2812
+ # an updated _memo_ value. It can be seen as the block typically
2813
+ # given to Enumerable.inject.
2814
+ #
2815
+ # The default implementation collects the pre-value on the tuple
2816
+ # and delegates to _happens.
2817
+ #
2818
+ def happens(memo, tuple)
2819
+ _happens(memo, @handle.set(tuple).evaluate(@functor))
2820
+ end
2821
+
2822
+ #
2823
+ # This method finalizes a computation.
2824
+ #
2825
+ # Argument _memo_ is either _least_ or the result of aggregating
2826
+ # through _happens_. The default implementation simply returns
2827
+ # _memo_. The method is intended to be overriden for complex
2828
+ # aggregations that need statefull information. See Avg for an
2829
+ # example
2830
+ #
2831
+ def finalize(memo)
2832
+ memo
2833
+ end
2834
+
2835
+ #
2836
+ # Aggregates over an enumeration of tuples.
2837
+ #
2838
+ def aggregate(enum)
2839
+ finalize(
2840
+ enum.inject(least){|memo,tuple|
2841
+ happens(memo, tuple)
2842
+ })
2843
+ end
2844
+
2845
+ protected
2846
+
2847
+ #
2848
+ # @see happens.
2849
+ #
2850
+ # This method is intended to be overriden and returns _value_
2851
+ # by default, making this aggregator a "Last" one...
2852
+ #
2853
+ def _happens(memo, value)
2854
+ value
2855
+ end
2856
+
2857
+ #
2858
+ # Defines a COUNT aggregation operator
2859
+ #
2860
+ class Count < Aggregator
2861
+ def least(); 0; end
2862
+ def happens(memo, tuple) memo + 1; end
2863
+ end # class Count
2864
+
2865
+ #
2866
+ # Defines a SUM aggregation operator
2867
+ #
2868
+ class Sum < Aggregator
2869
+ def least(); 0; end
2870
+ def _happens(memo, val) memo + val; end
2871
+ end # class Sum
2872
+
2873
+ #
2874
+ # Defines an AVG aggregation operator
2875
+ #
2876
+ class Avg < Aggregator
2877
+ def least(); [0.0, 0.0]; end
2878
+ def _happens(memo, val) [memo.first + val, memo.last + 1]; end
2879
+ def finalize(memo) memo.first / memo.last end
2880
+ end # class Sum
2881
+
2882
+ #
2883
+ # Defines a MIN aggregation operator
2884
+ #
2885
+ class Min < Aggregator
2886
+ def least(); nil; end
2887
+ def _happens(memo, val)
2888
+ memo.nil? ? val : (memo < val ? memo : val)
2889
+ end
2890
+ end # class Min
2891
+
2892
+ #
2893
+ # Defines a MAX aggregation operator
2894
+ #
2895
+ class Max < Aggregator
2896
+ def least(); nil; end
2897
+ def _happens(memo, val)
2898
+ memo.nil? ? val : (memo > val ? memo : val)
2899
+ end
2900
+ end # class Max
2901
+
2902
+ #
2903
+ # Defines a COLLECT aggregation operator
2904
+ #
2905
+ class Group < Aggregator
2906
+ def initialize(*attrs)
2907
+ super(nil, {}){
2908
+ Tools.tuple_collect(attrs){|k| [k, self.send(k)] }
2909
+ }
2910
+ end
2911
+ def least(); []; end
2912
+ def _happens(memo, val)
2913
+ memo << val
2914
+ end
2915
+ def finalize(memo)
2916
+ memo.uniq
2917
+ end
2918
+ end
2919
+
2920
+ #
2921
+ # Defines a COLLECT aggregation operator
2922
+ #
2923
+ class Collect < Aggregator
2924
+ def least(); []; end
2925
+ def _happens(memo, val)
2926
+ memo << val
2927
+ end
2928
+ end
2929
+
2930
+ #
2931
+ # Defines a CONCAT aggregation operator
2932
+ #
2933
+ class Concat < Aggregator
2934
+ def least(); ""; end
2935
+ def default_options
2936
+ {:before => "", :after => "", :between => ""}
2937
+ end
2938
+ def _happens(memo, val)
2939
+ memo << options[:between].to_s unless memo.empty?
2940
+ memo << val.to_s
2941
+ end
2942
+ def finalize(memo)
2943
+ options[:before].to_s + memo + options[:after].to_s
2944
+ end
2945
+ end
2946
+
2947
+ Lispy::Agg = Aggregator
2948
+ end # class Aggregator
2949
+
2950
+ #
2951
+ # Base class for implementing buffers.
2952
+ #
2953
+ class Buffer
2954
+
2955
+ #
2956
+ # Keeps tuples ordered on a specific key
2957
+ #
2958
+ class Sorted < Buffer
2959
+
2960
+ def initialize(ordering_key)
2961
+ @ordering_key = ordering_key
2962
+ @buffer = []
2963
+ end
2964
+
2965
+ def add_all(enum)
2966
+ sorter = @ordering_key.sorter
2967
+ @buffer = merge_sort(@buffer, enum.to_a.sort(&sorter), sorter)
2968
+ end
2969
+
2970
+ def each
2971
+ @buffer.each(&Proc.new)
2972
+ end
2973
+
2974
+ private
2975
+
2976
+ def merge_sort(s1, s2, sorter)
2977
+ (s1 + s2).sort(&sorter)
2978
+ end
2979
+
2980
+ end # class Buffer::Sorted
2981
+
2982
+ end # class Buffer
2983
+
2984
+ end # module Alf