alf 0.9.0 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. data/CHANGELOG.md +64 -0
  2. data/Gemfile.lock +4 -4
  3. data/README.md +257 -171
  4. data/TODO.md +4 -4
  5. data/alf.gemspec +3 -3
  6. data/alf.noespec +11 -6
  7. data/examples/pseudo-with.alf +7 -0
  8. data/examples/runall.sh +2 -2
  9. data/examples/unwrap.alf +4 -0
  10. data/examples/wrap.alf +2 -0
  11. data/lib/alf/relation.rb +118 -0
  12. data/lib/alf/version.rb +1 -1
  13. data/lib/alf.rb +320 -169
  14. data/spec/integration/src/test_minus.alf +5 -0
  15. data/spec/integration/src/test_project.alf +9 -0
  16. data/spec/{alf_spec.rb → integration/test_alf.rb} +8 -21
  17. data/spec/integration/test_alf_specs.rb +37 -0
  18. data/spec/{examples_spec.rb → integration/test_examples.rb} +1 -1
  19. data/spec/spec_helper.rb +19 -1
  20. data/spec/unit/environment/examples/suppliers.rash +5 -0
  21. data/spec/{environment/explicit_spec.rb → unit/environment/test_explicit.rb} +0 -0
  22. data/spec/{environment/folder_spec.rb → unit/environment/test_folder.rb} +1 -1
  23. data/spec/{operator → unit/operator}/non_relational/compact/buffer_based.rb +0 -0
  24. data/spec/{operator/non_relational/compact/sort_based_spec.rb → unit/operator/non_relational/compact/test_sort_based.rb} +0 -0
  25. data/spec/{operator/non_relational/autonum_spec.rb → unit/operator/non_relational/test_autonum.rb} +0 -0
  26. data/spec/{operator/non_relational/clip_spec.rb → unit/operator/non_relational/test_clip.rb} +0 -0
  27. data/spec/{operator/non_relational/compact_spec.rb → unit/operator/non_relational/test_compact.rb} +0 -0
  28. data/spec/{operator/non_relational/defaults_spec.rb → unit/operator/non_relational/test_defaults.rb} +0 -0
  29. data/spec/{operator/non_relational/sort_spec.rb → unit/operator/non_relational/test_sort.rb} +0 -0
  30. data/spec/{operator/relational/join/hash_based_spec.rb → unit/operator/relational/join/test_hash_based.rb} +0 -0
  31. data/spec/unit/operator/relational/summarize/test_hash_based.rb +38 -0
  32. data/spec/{operator/relational/summarize/sort_based_spec.rb → unit/operator/relational/summarize/test_sort_based.rb} +0 -0
  33. data/spec/{operator/relational/extend_spec.rb → unit/operator/relational/test_extend.rb} +0 -0
  34. data/spec/{operator/relational/group_spec.rb → unit/operator/relational/test_group.rb} +3 -2
  35. data/spec/{operator/relational/intersect_spec.rb → unit/operator/relational/test_intersect.rb} +0 -0
  36. data/spec/unit/operator/relational/test_join.rb +36 -0
  37. data/spec/{operator/relational/minus_spec.rb → unit/operator/relational/test_minus.rb} +0 -0
  38. data/spec/{operator/relational/project_spec.rb → unit/operator/relational/test_project.rb} +0 -0
  39. data/spec/{operator/relational/quota_spec.rb → unit/operator/relational/test_quota.rb} +0 -0
  40. data/spec/{operator/relational/rename_spec.rb → unit/operator/relational/test_rename.rb} +0 -0
  41. data/spec/{operator/relational/restrict_spec.rb → unit/operator/relational/test_restrict.rb} +0 -0
  42. data/spec/unit/operator/relational/test_summarize.rb +64 -0
  43. data/spec/{operator/relational/ungroup_spec.rb → unit/operator/relational/test_ungroup.rb} +0 -0
  44. data/spec/{operator/relational/union_spec.rb → unit/operator/relational/test_union.rb} +0 -0
  45. data/spec/{operator/relational/unnest_spec.rb → unit/operator/relational/test_unwrap.rb} +5 -5
  46. data/spec/{operator/relational/nest_spec.rb → unit/operator/relational/test_wrap.rb} +5 -5
  47. data/spec/{operator/command_methods_spec.rb → unit/operator/test_command_methods.rb} +0 -0
  48. data/spec/unit/operator/test_non_relational.rb +18 -0
  49. data/spec/unit/operator/test_relational.rb +27 -0
  50. data/spec/{reader → unit/reader}/input.rb +0 -0
  51. data/spec/unit/reader/test_alf_file.rb +27 -0
  52. data/spec/{reader/rash_spec.rb → unit/reader/test_rash.rb} +0 -0
  53. data/spec/unit/relation/test_coerce.rb +53 -0
  54. data/spec/unit/relation/test_inspect.rb +20 -0
  55. data/spec/unit/relation/test_relops.rb +46 -0
  56. data/spec/{renderer/text/cell_spec.rb → unit/renderer/text/test_cell.rb} +0 -0
  57. data/spec/{renderer/text/row_spec.rb → unit/renderer/text/test_row.rb} +0 -0
  58. data/spec/{renderer/text/table_spec.rb → unit/renderer/text/test_table.rb} +0 -0
  59. data/spec/{aggregator_spec.rb → unit/test_aggregator.rb} +6 -6
  60. data/spec/{assumptions_spec.rb → unit/test_assumptions.rb} +0 -0
  61. data/spec/{lispy_spec.rb → unit/test_lispy.rb} +0 -0
  62. data/spec/unit/test_operator.rb +16 -0
  63. data/spec/{reader_spec.rb → unit/test_reader.rb} +4 -0
  64. data/spec/unit/test_relation.rb +40 -0
  65. data/spec/{renderer_spec.rb → unit/test_renderer.rb} +0 -0
  66. data/spec/{tools/ordering_key_spec.rb → unit/tools/test_ordering_key.rb} +0 -0
  67. data/spec/{tools/projection_key_spec.rb → unit/tools/test_projection_key.rb} +0 -0
  68. data/spec/{tools/tools_spec.rb → unit/tools/test_tools.rb} +0 -0
  69. data/spec/{tools/tuple_handle_spec.rb → unit/tools/test_tuple_handle.rb} +0 -0
  70. data/tasks/clean.rake +3 -0
  71. data/tasks/spec_test.rake +1 -1
  72. metadata +143 -114
  73. data/examples/nest.alf +0 -2
  74. data/examples/unnest.alf +0 -4
  75. data/examples/with.alf +0 -23
  76. data/spec/operator/relational/summarize_spec.rb +0 -41
  77. data/spec/reader/alf_file_spec.rb +0 -15
data/lib/alf.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require "enumerator"
2
2
  require "stringio"
3
+ require "set"
3
4
  require "alf/version"
4
5
  require "alf/loader"
5
6
 
@@ -238,7 +239,8 @@ module Alf
238
239
 
239
240
  def compare(t1,t2)
240
241
  @ordering.each do |attr,order|
241
- comp = (t1[attr] <=> t2[attr])
242
+ x, y = t1[attr], t2[attr]
243
+ comp = x.respond_to?(:<=>) ? (x <=> y) : (x.to_s <=> y.to_s)
242
244
  comp *= -1 if order == :desc
243
245
  return comp unless comp == 0
244
246
  end
@@ -279,114 +281,6 @@ module Alf
279
281
  Command::Main.new(env)
280
282
  end
281
283
 
282
- #
283
- # Implements a small LISP-like DSL on top of Alf.
284
- #
285
- # The lispy dialect is the functional one used in .alf files and in compiled
286
- # expressions as below:
287
- #
288
- # Alf.lispy.compile do
289
- # (restrict :suppliers, lambda{ city == 'London' })
290
- # end
291
- #
292
- # The DSL this module provides is part of Alf's public API and won't be broken
293
- # without a major version change. The module itself and its inclusion pre-
294
- # conditions are not part of the DSL itself, thus not considered as part of
295
- # the API, and may therefore evolve at any time. In other words, this module
296
- # is not intended to be directly included by third-party classes.
297
- #
298
- module Lispy
299
-
300
- # The environment
301
- attr_accessor :environment
302
-
303
- #
304
- # Compiles a query expression given by a String or a block and returns
305
- # the result (typically a tuple iterator)
306
- #
307
- def compile(expr = nil, &block)
308
- expr.nil? ? instance_eval(&block) : instance_eval(expr)
309
- end
310
-
311
- # Delegated to the environment
312
- def dataset(name)
313
- raise "Environment not set" unless @environment
314
- @environment.dataset(name)
315
- end
316
-
317
- #
318
- # Compiles the subexpression given by the block in the context of
319
- # additional temporary expressions given by definitions
320
- #
321
- def with(definitions)
322
- # We branch with the definitions for compilation
323
- self.environment = environment.branch(definitions)
324
-
325
- # this is to ensure that sub definitions can reuse other
326
- # ones
327
- definitions.each_value do |defn|
328
- defn.environment = self.environment
329
- end
330
-
331
- # compile now
332
- op = compile(&Proc.new)
333
-
334
- # We now unbranch for next expression
335
- self.environment = environment.unbranch
336
-
337
- op
338
- end
339
-
340
- #
341
- # Chains some elements as a new operator
342
- #
343
- def chain(*elements)
344
- elements = elements.reverse
345
- elements[1..-1].inject(elements.first) do |c, elm|
346
- elm.pipe(c, environment)
347
- elm
348
- end
349
- end
350
-
351
- [ :Autonum, :Clip, :Compact, :Defaults, :Sort ].each do |op_name|
352
- meth_name = Tools.ruby_case(op_name).to_sym
353
- define_method(meth_name) do |child, *args|
354
- chain(Operator::NonRelational.const_get(op_name).new(*args), child)
355
- end
356
- end
357
-
358
- [:Project,
359
- :Extend,
360
- :Rename,
361
- :Restrict,
362
- :Nest,
363
- :Unnest,
364
- :Group,
365
- :Ungroup,
366
- :Summarize,
367
- :Quota ].each do |op_name|
368
- meth_name = Tools.ruby_case(op_name).to_sym
369
- define_method(meth_name) do |child, *args|
370
- chain(Operator::Relational.const_get(op_name).new(*args), child)
371
- end
372
- end
373
-
374
- def allbut(child, attributes)
375
- chain(Operator::Relational::Project.new(attributes, true), child)
376
- end
377
-
378
- [ :Join,
379
- :Union,
380
- :Intersect,
381
- :Minus ].each do |op_name|
382
- meth_name = Tools.ruby_case(op_name).to_sym
383
- define_method(meth_name) do |left, right, *args|
384
- chain(Operator::Relational.const_get(op_name).new(*args), [left, right])
385
- end
386
- end
387
-
388
- end # module Lispy
389
-
390
284
  #
391
285
  # Encapsulates the interface with the outside world, providing base iterators
392
286
  # for named datasets, among others.
@@ -574,24 +468,35 @@ module Alf
574
468
  # implementation.
575
469
  # @param [Environment] environment an optional environment for resolving
576
470
  # named datasets if needed.
471
+ # @return [Object] self
577
472
  #
578
473
  def pipe(input, environment = nil)
474
+ self
579
475
  end
580
476
  undef :pipe
581
477
 
582
-
583
478
  #
584
479
  # Coerces something to an iterator
585
480
  #
586
- def self.coerce(arg, env)
481
+ def self.coerce(arg, environment = nil)
587
482
  case arg
588
483
  when Iterator, Array
589
484
  arg
590
485
  else
591
- Reader.coerce(arg, env)
486
+ Reader.coerce(arg, environment)
592
487
  end
593
488
  end
594
489
 
490
+ #
491
+ # Converts this iterator to an in-memory Relation.
492
+ #
493
+ # @return [Relation] a relation instance, as the set of tuples
494
+ # that would be yield by this iterator.
495
+ #
496
+ def to_rel
497
+ Relation::coerce(self)
498
+ end
499
+
595
500
  end # module Iterator
596
501
 
597
502
  #
@@ -650,8 +555,9 @@ module Alf
650
555
  end
651
556
 
652
557
  #
653
- # Returns a reader instance for a specific file whose path is given
654
- # as argument.
558
+ # When filepath is a String, returns a reader instance for a specific file
559
+ # whose path is given as argument. Otherwise, delegate the call to
560
+ # <code>coerce(filepath)</code>
655
561
  #
656
562
  # @param [String] filepath path to a file for which extension is recognized
657
563
  # @param [Array] args optional additional arguments that must be passed at
@@ -659,11 +565,15 @@ module Alf
659
565
  # @return [Reader] a reader instance
660
566
  #
661
567
  def self.reader(filepath, *args)
662
- ext = File.extname(filepath)
663
- if registered = @@readers.find{|r| r[1].include?(ext)}
664
- registered[2].new(filepath, *args)
568
+ if filepath.is_a?(String)
569
+ ext = File.extname(filepath)
570
+ if registered = @@readers.find{|r| r[1].include?(ext)}
571
+ registered[2].new(filepath, *args)
572
+ else
573
+ raise "No registered reader for #{ext} (#{filepath})"
574
+ end
665
575
  else
666
- raise "No registered reader for #{ext} (#{filepath})"
576
+ coerce(filepath)
667
577
  end
668
578
  end
669
579
 
@@ -715,6 +625,7 @@ module Alf
715
625
  #
716
626
  def pipe(input, env = environment)
717
627
  @input = input
628
+ self
718
629
  end
719
630
 
720
631
  #
@@ -733,6 +644,14 @@ module Alf
733
644
 
734
645
  protected
735
646
 
647
+ #
648
+ # Returns the input file path, or nil if this Reader is bound to an IO
649
+ # directly.
650
+ #
651
+ def input_path
652
+ input.is_a?(String) ? input : nil
653
+ end
654
+
736
655
  #
737
656
  # Coerces the input object to an IO and yields the block with it.
738
657
  #
@@ -822,7 +741,7 @@ module Alf
822
741
 
823
742
  # (see Reader#each)
824
743
  def each
825
- op = Alf.lispy(environment).compile(input_text)
744
+ op = Alf.lispy(environment).compile(input_text, input_path)
826
745
  op.each(&Proc.new)
827
746
  end
828
747
 
@@ -923,6 +842,7 @@ module Alf
923
842
  def pipe(input, env = environment)
924
843
  self.environment = env
925
844
  self.input = input
845
+ self
926
846
  end
927
847
 
928
848
  #
@@ -1033,7 +953,7 @@ module Alf
1033
953
  # See '#{program_name} help COMMAND' for details about a specific command.
1034
954
  #
1035
955
  class Main < Quickl::Delegator(__FILE__, __LINE__)
1036
- include Command, Lispy
956
+ include Command
1037
957
 
1038
958
  # Environment instance to use to get base iterators
1039
959
  attr_accessor :environment
@@ -1044,6 +964,7 @@ module Alf
1044
964
  # Creates a command instance
1045
965
  def initialize(env = Environment.default)
1046
966
  @environment = env
967
+ extend(Lispy)
1047
968
  end
1048
969
 
1049
970
  # Install options
@@ -1098,7 +1019,7 @@ module Alf
1098
1019
  # 3) if there is a requester, then we do the job (assuming bin/alf)
1099
1020
  # with the renderer to use. Otherwise, we simply return built operator
1100
1021
  if operator && requester
1101
- chain(renderer, operator).execute($stdout)
1022
+ renderer.pipe(operator, environment).execute($stdout)
1102
1023
  else
1103
1024
  operator
1104
1025
  end
@@ -1138,7 +1059,7 @@ module Alf
1138
1059
  def execute(args)
1139
1060
  requester.renderer = @renderer
1140
1061
  args = [ $stdin ] if args.empty?
1141
- requester.chain(*args)
1062
+ requester.send(:chain,*args)
1142
1063
  end
1143
1064
 
1144
1065
  end # class Show
@@ -1198,7 +1119,42 @@ module Alf
1198
1119
  #
1199
1120
  module Operator
1200
1121
  include Iterator, Tools
1122
+
1123
+ #
1124
+ # Yields non-relational then relational operators, in turn.
1125
+ #
1126
+ def self.each
1127
+ Operator::NonRelational.each{|x| yield(x)}
1128
+ Operator::Relational.each{|x| yield(x)}
1129
+ end
1201
1130
 
1131
+ #
1132
+ # Encapsulates method that allows making operator introspection, that is,
1133
+ # knowing operator cardinality and similar stuff.
1134
+ #
1135
+ module Introspection
1136
+
1137
+ #
1138
+ # Returns true if this operator is an unary operator, false otherwise
1139
+ #
1140
+ def unary?
1141
+ ancestors.include?(Operator::Unary)
1142
+ end
1143
+
1144
+ #
1145
+ # Returns true if this operator is a binary operator, false otherwise
1146
+ #
1147
+ def binary?
1148
+ ancestors.include?(Operator::Binary)
1149
+ end
1150
+
1151
+ end # module Introspection
1152
+
1153
+ # Ensures that the Introspection module is set on real operators
1154
+ def self.included(mod)
1155
+ mod.extend(Introspection) if mod.is_a?(Class)
1156
+ end
1157
+
1202
1158
  #
1203
1159
  # Encapsulates method definitions that convert operators to Quickl
1204
1160
  # commands
@@ -1238,7 +1194,7 @@ module Alf
1238
1194
  end
1239
1195
 
1240
1196
  def split_command_args(args)
1241
- operands, args = case i = args.index("--")
1197
+ case (i = args.index("--"))
1242
1198
  when NilClass
1243
1199
  [args, []]
1244
1200
  when 0
@@ -1328,12 +1284,13 @@ module Alf
1328
1284
  def pipe(input, env = environment)
1329
1285
  self.environment = env
1330
1286
  self.datasets = [ input ]
1287
+ self
1331
1288
  end
1332
1289
 
1333
1290
  protected
1334
1291
 
1335
1292
  def command_line_operands(operands)
1336
- operands.first
1293
+ operands.first || $stdin
1337
1294
  end
1338
1295
 
1339
1296
  #
@@ -1366,6 +1323,7 @@ module Alf
1366
1323
  def pipe(input, env = environment)
1367
1324
  self.environment = env
1368
1325
  self.datasets = input
1326
+ self
1369
1327
  end
1370
1328
 
1371
1329
  protected
@@ -1461,6 +1419,7 @@ module Alf
1461
1419
  def pipe(input, env = environment)
1462
1420
  self.environment = env
1463
1421
  self.datasets = input
1422
+ self
1464
1423
  end
1465
1424
 
1466
1425
  protected
@@ -1504,26 +1463,42 @@ module Alf
1504
1463
  #
1505
1464
  module Operator::NonRelational
1506
1465
 
1466
+ #
1467
+ # Yields the block with each operator module in turn
1468
+ #
1469
+ def self.each
1470
+ constants.each do |c|
1471
+ val = const_get(c)
1472
+ yield(val) if val.ancestors.include?(Operator::NonRelational)
1473
+ end
1474
+ end
1475
+
1507
1476
  #
1508
- # Extend with an unique autonumber attribute
1477
+ # Extend its operand with an unique autonumber attribute
1509
1478
  #
1510
1479
  # SYNOPSIS
1511
- # #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
1512
1480
  #
1513
- # API & EXAMPLE
1481
+ # #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
1514
1482
  #
1515
- # # Autonumber suppliers (:autonum attribute name by default)
1516
- # (autonum :suppliers)
1483
+ # DESCRIPTION
1517
1484
  #
1518
- # # You can specify the attribute name
1485
+ # This non-relational operator guarantees uniqueness of output tuples by
1486
+ # adding an attribute called 'ATTRNAME' whose value is an Integer. No
1487
+ # guarantee is given about ordering of output tuples, nor to the fact
1488
+ # that this autonumber is sequential. Only that all values are different.
1489
+ # If the presence of duplicates was the only "non-relational" aspect of
1490
+ # input tuples, the result may be considered a valid relation representation.
1491
+ #
1492
+ # IN RUBY
1493
+ #
1494
+ # (autonum OPERAND, ATTRNAME = :autonum)
1495
+ #
1496
+ # (autonum :suppliers)
1519
1497
  # (autonum :suppliers, :unique_id)
1520
1498
  #
1521
- # DESCRIPTION
1499
+ # IN SHELL
1522
1500
  #
1523
- # This operator takes input tuples in any order they come and extends them
1524
- # with an autonumber attribute ATTRNAME. This allows converting non-relational
1525
- # tuple enumerators to relational ones by ensuring uniqueness of tuples in an
1526
- # arbitrary manner.
1501
+ # #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
1527
1502
  #
1528
1503
  # alf autonum suppliers
1529
1504
  # alf autonum suppliers -- unique_id
@@ -1845,6 +1820,15 @@ module Alf
1845
1820
  module Operator::Relational
1846
1821
 
1847
1822
  #
1823
+ # Yields the block with each operator module in turn
1824
+ #
1825
+ def self.each
1826
+ constants.each do |c|
1827
+ val = const_get(c)
1828
+ yield(val) if val.ancestors.include?(Operator::Relational)
1829
+ end
1830
+ end
1831
+
1848
1832
  # Relational projection (clip + compact)
1849
1833
  #
1850
1834
  # SYNOPSIS
@@ -2309,35 +2293,35 @@ module Alf
2309
2293
  end # class Union
2310
2294
 
2311
2295
  #
2312
- # Relational nesting (tuple-valued attributes)
2296
+ # Relational wraping (tuple-valued attributes)
2313
2297
  #
2314
2298
  # SYNOPSIS
2315
2299
  # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ... NEWNAME
2316
2300
  #
2317
2301
  # API & EXAMPLE
2318
2302
  #
2319
- # (nest :suppliers, [:city, :status], :loc_and_status)
2303
+ # (wrap :suppliers, [:city, :status], :loc_and_status)
2320
2304
  #
2321
2305
  # DESCRIPTION
2322
2306
  #
2323
- # This operator nests attributes ATTR1 to ATTRN as a new, tuple-based
2324
- # attribute whose name is NEWNAME. When used in shell, names of nested
2307
+ # This operator wraps attributes ATTR1 to ATTRN as a new, tuple-based
2308
+ # attribute whose name is NEWNAME. When used in shell, names of wrapped
2325
2309
  # attributes are taken from commandline arguments, expected the last one
2326
2310
  # which defines the new name to use:
2327
2311
  #
2328
- # alf nest suppliers -- city status loc_and_status
2312
+ # alf wrap suppliers -- city status loc_and_status
2329
2313
  #
2330
- class Nest < Factory::Operator(__FILE__, __LINE__)
2314
+ class Wrap < Factory::Operator(__FILE__, __LINE__)
2331
2315
  include Operator::Relational, Operator::Transform
2332
2316
 
2333
- # Array of nesting attributes
2317
+ # Array of wraping attributes
2334
2318
  attr_accessor :attributes
2335
2319
 
2336
- # New name for the nested attribute
2320
+ # New name for the wrapped attribute
2337
2321
  attr_accessor :as
2338
2322
 
2339
- # Builds a Nest operator instance
2340
- def initialize(attributes = [], as = :nested)
2323
+ # Builds a Wrap operator instance
2324
+ def initialize(attributes = [], as = :wrapped)
2341
2325
  @attributes = attributes
2342
2326
  @as = as
2343
2327
  end
@@ -2358,36 +2342,36 @@ module Alf
2358
2342
  others
2359
2343
  end
2360
2344
 
2361
- end # class Nest
2345
+ end # class Wrap
2362
2346
 
2363
2347
  #
2364
- # Relational un-nesting (inverse of nest)
2348
+ # Relational un-wraping (inverse of wrap)
2365
2349
  #
2366
2350
  # SYNOPSIS
2367
2351
  # #{program_name} #{command_name} [OPERAND] -- ATTR
2368
2352
  #
2369
2353
  # API & EXAMPLE
2370
2354
  #
2371
- # # Assuming nested = (nest :suppliers, [:city, :status], :loc_and_status)
2372
- # (unnest nested, :loc_and_status)
2355
+ # # Assuming wrapped = (wrap :suppliers, [:city, :status], :loc_and_status)
2356
+ # (unwrap wrapped, :loc_and_status)
2373
2357
  #
2374
2358
  # DESCRIPTION
2375
2359
  #
2376
- # This operator unnests the tuple-valued attribute named ATTR so as to
2360
+ # This operator unwraps the tuple-valued attribute named ATTR so as to
2377
2361
  # flatten its pairs with 'upstream' tuple. The latter should be such so that
2378
2362
  # no name collision occurs. When used in shell, the name of the attribute to
2379
- # unnest is taken as the first commandline argument:
2363
+ # unwrap is taken as the first commandline argument:
2380
2364
  #
2381
- # alf unnest nest -- loc_and_status
2365
+ # alf unwrap wrap -- loc_and_status
2382
2366
  #
2383
- class Unnest < Factory::Operator(__FILE__, __LINE__)
2367
+ class Unwrap < Factory::Operator(__FILE__, __LINE__)
2384
2368
  include Operator::Relational, Operator::Transform
2385
2369
 
2386
- # Name of the attribute to unnest
2370
+ # Name of the attribute to unwrap
2387
2371
  attr_accessor :attribute
2388
2372
 
2389
2373
  # Builds a Rename operator instance
2390
- def initialize(attribute = :nested)
2374
+ def initialize(attribute = :wrapped)
2391
2375
  @attribute = attribute
2392
2376
  end
2393
2377
 
@@ -2402,11 +2386,11 @@ module Alf
2402
2386
  # (see Operator::Transform#_tuple2tuple)
2403
2387
  def _tuple2tuple(tuple)
2404
2388
  tuple = tuple.dup
2405
- nested = tuple.delete(@attribute) || {}
2406
- tuple.merge(nested)
2389
+ wrapped = tuple.delete(@attribute) || {}
2390
+ tuple.merge(wrapped)
2407
2391
  end
2408
2392
 
2409
- end # class Unnest
2393
+ end # class Unwrap
2410
2394
 
2411
2395
  #
2412
2396
  # Relational grouping (relation-valued attributes)
@@ -2464,7 +2448,7 @@ module Alf
2464
2448
  # See Operator#_prepare
2465
2449
  def _prepare
2466
2450
  pkey = ProjectionKey.new(attributes, !allbut)
2467
- @index = Hash.new{|h,k| h[k] = []}
2451
+ @index = Hash.new{|h,k| h[k] = Set.new}
2468
2452
  each_input_tuple do |tuple|
2469
2453
  key, rest = pkey.split(tuple)
2470
2454
  @index[key] << rest
@@ -2474,7 +2458,7 @@ module Alf
2474
2458
  # See Operator#_each
2475
2459
  def _each
2476
2460
  @index.each_pair do |k,v|
2477
- yield(k.merge(@as => v))
2461
+ yield(k.merge(@as => Relation.coerce(v)))
2478
2462
  end
2479
2463
  end
2480
2464
 
@@ -2537,7 +2521,7 @@ module Alf
2537
2521
  # Relational summarization (group-by + aggregate ops)
2538
2522
  #
2539
2523
  # SYNOPSIS
2540
- # #{program_name} #{command_name} [OPERAND] --by=KEY1,KEY2... -- AGG1 EXPR1...
2524
+ # #{program_name} #{command_name} [OPERAND] [--allbut] --by=KEY1,KEY2... -- AGG1 EXPR1...
2541
2525
  #
2542
2526
  # OPTIONS
2543
2527
  # #{summarized_options}
@@ -2547,6 +2531,10 @@ module Alf
2547
2531
  # (summarize :supplies, [:sid],
2548
2532
  # :total_qty => Aggregator.sum(:qty))
2549
2533
  #
2534
+ # # Or, to specify an allbut projection
2535
+ # (summarize :supplies, [:qty, :pid],
2536
+ # :total_qty => Aggregator.sum(:qty), true)
2537
+ #
2550
2538
  # DESCRIPTION
2551
2539
  #
2552
2540
  # This operator summarizes input tuples on the projection on KEY1,KEY2,...
@@ -2558,6 +2546,7 @@ module Alf
2558
2546
  # aggregation expression evaluated on Aggregator:
2559
2547
  #
2560
2548
  # alf summarize supplies --by=sid -- total_qty "sum(:qty)"
2549
+ # alf summarize supplies --allbut --by=pid,qty -- total_qty "sum(:qty)"
2561
2550
  #
2562
2551
  class Summarize < Factory::Operator(__FILE__, __LINE__)
2563
2552
  include Operator::Relational, Operator::Shortcut, Operator::Unary
@@ -2565,11 +2554,15 @@ module Alf
2565
2554
  # By attributes
2566
2555
  attr_accessor :by
2567
2556
 
2557
+ # Allbut on by?
2558
+ attr_accessor :allbut
2559
+
2568
2560
  # Aggregations as a AGG => Aggregator(EXPR) hash
2569
2561
  attr_accessor :aggregators
2570
2562
 
2571
- def initialize(by = [], aggregators = {})
2563
+ def initialize(by = [], aggregators = {}, allbut = false)
2572
2564
  @by = by
2565
+ @allbut = allbut
2573
2566
  @aggregators = aggregators
2574
2567
  end
2575
2568
 
@@ -2578,6 +2571,9 @@ module Alf
2578
2571
  opt.on('--by=x,y,z', 'Specify by attributes', Array) do |args|
2579
2572
  @by = args.collect{|a| a.to_sym}
2580
2573
  end
2574
+ opt.on('--allbut', 'Make an allbut projection/summarization') do
2575
+ @allbut = true
2576
+ end
2581
2577
  end
2582
2578
 
2583
2579
  # Summarizes according to a complete order
@@ -2613,7 +2609,42 @@ module Alf
2613
2609
  end
2614
2610
 
2615
2611
  end # class SortBased
2612
+
2613
+ # Summarizes in-memory with a hash
2614
+ class HashBased
2615
+ include Operator::Relational, Operator::Unary
2616
+
2617
+ attr_reader :by_key
2618
+ attr_reader :aggregators
2616
2619
 
2620
+ def initialize(by_key, aggregators)
2621
+ @by_key, @aggregators = by_key, aggregators
2622
+ end
2623
+
2624
+ protected
2625
+
2626
+ def _each
2627
+ index = Hash.new do |h,k|
2628
+ h[k] = tuple_collect(@aggregators) do |a,agg|
2629
+ [a, agg.least]
2630
+ end
2631
+ end
2632
+ each_input_tuple do |tuple|
2633
+ key, rest = by_key.split(tuple)
2634
+ index[key] = tuple_collect(@aggregators) do |a,agg|
2635
+ [a, agg.happens(index[key][a], tuple)]
2636
+ end
2637
+ end
2638
+ index.each_pair do |key,aggs|
2639
+ aggs = tuple_collect(@aggregators) do |a,agg|
2640
+ [a, agg.finalize(aggs[a])]
2641
+ end
2642
+ yield key.merge(aggs)
2643
+ end
2644
+ end
2645
+
2646
+ end
2647
+
2617
2648
  protected
2618
2649
 
2619
2650
  # (see Operator::CommandMethods#set_args)
@@ -2625,10 +2656,16 @@ module Alf
2625
2656
  end
2626
2657
 
2627
2658
  def longexpr
2628
- by_key = Tools::ProjectionKey.new(@by, false)
2629
- chain SortBased.new(by_key, @aggregators),
2630
- Operator::NonRelational::Sort.new(by_key.to_ordering_key),
2631
- datasets
2659
+ if @allbut
2660
+ by_key = Tools::ProjectionKey.new(@by, @allbut)
2661
+ chain HashBased.new(by_key, @aggregators),
2662
+ datasets
2663
+ else
2664
+ by_key = Tools::ProjectionKey.new(@by, @allbut)
2665
+ chain SortBased.new(by_key, @aggregators),
2666
+ Operator::NonRelational::Sort.new(by_key.to_ordering_key),
2667
+ datasets
2668
+ end
2632
2669
  end
2633
2670
 
2634
2671
  end # class Summarize
@@ -2908,12 +2945,12 @@ module Alf
2908
2945
  Tools.tuple_collect(attrs){|k| [k, self.send(k)] }
2909
2946
  }
2910
2947
  end
2911
- def least(); []; end
2948
+ def least(); Set.new; end
2912
2949
  def _happens(memo, val)
2913
2950
  memo << val
2914
2951
  end
2915
2952
  def finalize(memo)
2916
- memo.uniq
2953
+ Relation.coerce memo
2917
2954
  end
2918
2955
  end
2919
2956
 
@@ -2944,7 +2981,6 @@ module Alf
2944
2981
  end
2945
2982
  end
2946
2983
 
2947
- Lispy::Agg = Aggregator
2948
2984
  end # class Aggregator
2949
2985
 
2950
2986
  #
@@ -2981,4 +3017,119 @@ module Alf
2981
3017
 
2982
3018
  end # class Buffer
2983
3019
 
3020
+ #
3021
+ # Implements a small LISP-like DSL on top of Alf.
3022
+ #
3023
+ # The lispy dialect is the functional one used in .alf files and in compiled
3024
+ # expressions as below:
3025
+ #
3026
+ # Alf.lispy.compile do
3027
+ # (restrict :suppliers, lambda{ city == 'London' })
3028
+ # end
3029
+ #
3030
+ # The DSL this module provides is part of Alf's public API and won't be broken
3031
+ # without a major version change. The module itself and its inclusion pre-
3032
+ # conditions are not part of the DSL itself, thus not considered as part of
3033
+ # the API, and may therefore evolve at any time. In other words, this module
3034
+ # is not intended to be directly included by third-party classes.
3035
+ #
3036
+ module Lispy
3037
+
3038
+ alias :ruby_extend :extend
3039
+
3040
+ # The environment
3041
+ attr_accessor :environment
3042
+
3043
+ #
3044
+ # Compiles a query expression given by a String or a block and returns
3045
+ # the result (typically a tuple iterator)
3046
+ #
3047
+ # Example
3048
+ #
3049
+ # # with a string
3050
+ # op = compile "(restrict :suppliers, lambda{ city == 'London' })"
3051
+ #
3052
+ # # or with a block
3053
+ # op = compile {
3054
+ # (restrict :suppliers, lambda{ city == 'London' })
3055
+ # }
3056
+ #
3057
+ # @param [String] expr a Lispy expression to compile
3058
+ # @return [Iterator] the iterator resulting from compilation
3059
+ #
3060
+ def compile(expr = nil, path = nil, &block)
3061
+ if expr.nil?
3062
+ instance_eval(&block)
3063
+ else
3064
+ (path ? Kernel.eval(expr, binding, path) : Kernel.eval(expr, binding))
3065
+ end
3066
+ end
3067
+
3068
+ #
3069
+ # Evaluates a query expression given by a String or a block and returns
3070
+ # the result as an in-memory relation (Alf::Relation)
3071
+ #
3072
+ # Example:
3073
+ #
3074
+ # # with a string
3075
+ # rel = evaluate "(restrict :suppliers, lambda{ city == 'London' })"
3076
+ #
3077
+ # # or with a block
3078
+ # rel = evaluate {
3079
+ # (restrict :suppliers, lambda{ city == 'London' })
3080
+ # }
3081
+ #
3082
+ def evaluate(expr = nil, path = nil, &block)
3083
+ compile(expr, path, &block).to_rel
3084
+ end
3085
+
3086
+ #
3087
+ # Delegated to the current environment
3088
+ #
3089
+ # This method returns the dataset associated to a given name. The result
3090
+ # may depend on the current environment, but is generally an Iterator,
3091
+ # often a Reader instance.
3092
+ #
3093
+ # @param [Symbol] name name of the dataset to retrieve
3094
+ # @return [Iterator] the dataset as an iterator
3095
+ # @see Environment#dataset
3096
+ #
3097
+ def dataset(name)
3098
+ raise "Environment not set" unless @environment
3099
+ @environment.dataset(name)
3100
+ end
3101
+
3102
+ # Functional equivalent to Alf::Relation[...]
3103
+ def relation(*tuples)
3104
+ Relation.coerce(tuples)
3105
+ end
3106
+
3107
+ #
3108
+ # Install the DSL through iteration over defined operators
3109
+ #
3110
+ Operator::each do |op_class|
3111
+ meth_name = Tools.ruby_case(Tools.class_name(op_class)).to_sym
3112
+ if op_class.unary?
3113
+ define_method(meth_name) do |child, *args|
3114
+ child = Iterator.coerce(child, environment)
3115
+ op_class.new(*args).pipe(child, environment)
3116
+ end
3117
+ elsif op_class.binary?
3118
+ define_method(meth_name) do |left, right, *args|
3119
+ operands = [left, right].collect{|x| Iterator.coerce(x, environment)}
3120
+ op_class.new(*args).pipe(operands, environment)
3121
+ end
3122
+ else
3123
+ raise "Unexpected operator #{op_class}"
3124
+ end
3125
+ end # Operators::each
3126
+
3127
+ def allbut(child, attributes)
3128
+ (project child, attributes, true)
3129
+ end
3130
+
3131
+ Agg = Alf::Aggregator
3132
+ end # module Lispy
3133
+
2984
3134
  end # module Alf
3135
+ require "alf/relation"