alf 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. data/CHANGELOG.md +64 -0
  2. data/Gemfile.lock +4 -4
  3. data/README.md +257 -171
  4. data/TODO.md +4 -4
  5. data/alf.gemspec +3 -3
  6. data/alf.noespec +11 -6
  7. data/examples/pseudo-with.alf +7 -0
  8. data/examples/runall.sh +2 -2
  9. data/examples/unwrap.alf +4 -0
  10. data/examples/wrap.alf +2 -0
  11. data/lib/alf/relation.rb +118 -0
  12. data/lib/alf/version.rb +1 -1
  13. data/lib/alf.rb +320 -169
  14. data/spec/integration/src/test_minus.alf +5 -0
  15. data/spec/integration/src/test_project.alf +9 -0
  16. data/spec/{alf_spec.rb → integration/test_alf.rb} +8 -21
  17. data/spec/integration/test_alf_specs.rb +37 -0
  18. data/spec/{examples_spec.rb → integration/test_examples.rb} +1 -1
  19. data/spec/spec_helper.rb +19 -1
  20. data/spec/unit/environment/examples/suppliers.rash +5 -0
  21. data/spec/{environment/explicit_spec.rb → unit/environment/test_explicit.rb} +0 -0
  22. data/spec/{environment/folder_spec.rb → unit/environment/test_folder.rb} +1 -1
  23. data/spec/{operator → unit/operator}/non_relational/compact/buffer_based.rb +0 -0
  24. data/spec/{operator/non_relational/compact/sort_based_spec.rb → unit/operator/non_relational/compact/test_sort_based.rb} +0 -0
  25. data/spec/{operator/non_relational/autonum_spec.rb → unit/operator/non_relational/test_autonum.rb} +0 -0
  26. data/spec/{operator/non_relational/clip_spec.rb → unit/operator/non_relational/test_clip.rb} +0 -0
  27. data/spec/{operator/non_relational/compact_spec.rb → unit/operator/non_relational/test_compact.rb} +0 -0
  28. data/spec/{operator/non_relational/defaults_spec.rb → unit/operator/non_relational/test_defaults.rb} +0 -0
  29. data/spec/{operator/non_relational/sort_spec.rb → unit/operator/non_relational/test_sort.rb} +0 -0
  30. data/spec/{operator/relational/join/hash_based_spec.rb → unit/operator/relational/join/test_hash_based.rb} +0 -0
  31. data/spec/unit/operator/relational/summarize/test_hash_based.rb +38 -0
  32. data/spec/{operator/relational/summarize/sort_based_spec.rb → unit/operator/relational/summarize/test_sort_based.rb} +0 -0
  33. data/spec/{operator/relational/extend_spec.rb → unit/operator/relational/test_extend.rb} +0 -0
  34. data/spec/{operator/relational/group_spec.rb → unit/operator/relational/test_group.rb} +3 -2
  35. data/spec/{operator/relational/intersect_spec.rb → unit/operator/relational/test_intersect.rb} +0 -0
  36. data/spec/unit/operator/relational/test_join.rb +36 -0
  37. data/spec/{operator/relational/minus_spec.rb → unit/operator/relational/test_minus.rb} +0 -0
  38. data/spec/{operator/relational/project_spec.rb → unit/operator/relational/test_project.rb} +0 -0
  39. data/spec/{operator/relational/quota_spec.rb → unit/operator/relational/test_quota.rb} +0 -0
  40. data/spec/{operator/relational/rename_spec.rb → unit/operator/relational/test_rename.rb} +0 -0
  41. data/spec/{operator/relational/restrict_spec.rb → unit/operator/relational/test_restrict.rb} +0 -0
  42. data/spec/unit/operator/relational/test_summarize.rb +64 -0
  43. data/spec/{operator/relational/ungroup_spec.rb → unit/operator/relational/test_ungroup.rb} +0 -0
  44. data/spec/{operator/relational/union_spec.rb → unit/operator/relational/test_union.rb} +0 -0
  45. data/spec/{operator/relational/unnest_spec.rb → unit/operator/relational/test_unwrap.rb} +5 -5
  46. data/spec/{operator/relational/nest_spec.rb → unit/operator/relational/test_wrap.rb} +5 -5
  47. data/spec/{operator/command_methods_spec.rb → unit/operator/test_command_methods.rb} +0 -0
  48. data/spec/unit/operator/test_non_relational.rb +18 -0
  49. data/spec/unit/operator/test_relational.rb +27 -0
  50. data/spec/{reader → unit/reader}/input.rb +0 -0
  51. data/spec/unit/reader/test_alf_file.rb +27 -0
  52. data/spec/{reader/rash_spec.rb → unit/reader/test_rash.rb} +0 -0
  53. data/spec/unit/relation/test_coerce.rb +53 -0
  54. data/spec/unit/relation/test_inspect.rb +20 -0
  55. data/spec/unit/relation/test_relops.rb +46 -0
  56. data/spec/{renderer/text/cell_spec.rb → unit/renderer/text/test_cell.rb} +0 -0
  57. data/spec/{renderer/text/row_spec.rb → unit/renderer/text/test_row.rb} +0 -0
  58. data/spec/{renderer/text/table_spec.rb → unit/renderer/text/test_table.rb} +0 -0
  59. data/spec/{aggregator_spec.rb → unit/test_aggregator.rb} +6 -6
  60. data/spec/{assumptions_spec.rb → unit/test_assumptions.rb} +0 -0
  61. data/spec/{lispy_spec.rb → unit/test_lispy.rb} +0 -0
  62. data/spec/unit/test_operator.rb +16 -0
  63. data/spec/{reader_spec.rb → unit/test_reader.rb} +4 -0
  64. data/spec/unit/test_relation.rb +40 -0
  65. data/spec/{renderer_spec.rb → unit/test_renderer.rb} +0 -0
  66. data/spec/{tools/ordering_key_spec.rb → unit/tools/test_ordering_key.rb} +0 -0
  67. data/spec/{tools/projection_key_spec.rb → unit/tools/test_projection_key.rb} +0 -0
  68. data/spec/{tools/tools_spec.rb → unit/tools/test_tools.rb} +0 -0
  69. data/spec/{tools/tuple_handle_spec.rb → unit/tools/test_tuple_handle.rb} +0 -0
  70. data/tasks/clean.rake +3 -0
  71. data/tasks/spec_test.rake +1 -1
  72. metadata +143 -114
  73. data/examples/nest.alf +0 -2
  74. data/examples/unnest.alf +0 -4
  75. data/examples/with.alf +0 -23
  76. data/spec/operator/relational/summarize_spec.rb +0 -41
  77. data/spec/reader/alf_file_spec.rb +0 -15
data/lib/alf.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require "enumerator"
2
2
  require "stringio"
3
+ require "set"
3
4
  require "alf/version"
4
5
  require "alf/loader"
5
6
 
@@ -238,7 +239,8 @@ module Alf
238
239
 
239
240
  def compare(t1,t2)
240
241
  @ordering.each do |attr,order|
241
- comp = (t1[attr] <=> t2[attr])
242
+ x, y = t1[attr], t2[attr]
243
+ comp = x.respond_to?(:<=>) ? (x <=> y) : (x.to_s <=> y.to_s)
242
244
  comp *= -1 if order == :desc
243
245
  return comp unless comp == 0
244
246
  end
@@ -279,114 +281,6 @@ module Alf
279
281
  Command::Main.new(env)
280
282
  end
281
283
 
282
- #
283
- # Implements a small LISP-like DSL on top of Alf.
284
- #
285
- # The lispy dialect is the functional one used in .alf files and in compiled
286
- # expressions as below:
287
- #
288
- # Alf.lispy.compile do
289
- # (restrict :suppliers, lambda{ city == 'London' })
290
- # end
291
- #
292
- # The DSL this module provides is part of Alf's public API and won't be broken
293
- # without a major version change. The module itself and its inclusion pre-
294
- # conditions are not part of the DSL itself, thus not considered as part of
295
- # the API, and may therefore evolve at any time. In other words, this module
296
- # is not intended to be directly included by third-party classes.
297
- #
298
- module Lispy
299
-
300
- # The environment
301
- attr_accessor :environment
302
-
303
- #
304
- # Compiles a query expression given by a String or a block and returns
305
- # the result (typically a tuple iterator)
306
- #
307
- def compile(expr = nil, &block)
308
- expr.nil? ? instance_eval(&block) : instance_eval(expr)
309
- end
310
-
311
- # Delegated to the environment
312
- def dataset(name)
313
- raise "Environment not set" unless @environment
314
- @environment.dataset(name)
315
- end
316
-
317
- #
318
- # Compiles the subexpression given by the block in the context of
319
- # additional temporary expressions given by definitions
320
- #
321
- def with(definitions)
322
- # We branch with the definitions for compilation
323
- self.environment = environment.branch(definitions)
324
-
325
- # this is to ensure that sub definitions can reuse other
326
- # ones
327
- definitions.each_value do |defn|
328
- defn.environment = self.environment
329
- end
330
-
331
- # compile now
332
- op = compile(&Proc.new)
333
-
334
- # We now unbranch for next expression
335
- self.environment = environment.unbranch
336
-
337
- op
338
- end
339
-
340
- #
341
- # Chains some elements as a new operator
342
- #
343
- def chain(*elements)
344
- elements = elements.reverse
345
- elements[1..-1].inject(elements.first) do |c, elm|
346
- elm.pipe(c, environment)
347
- elm
348
- end
349
- end
350
-
351
- [ :Autonum, :Clip, :Compact, :Defaults, :Sort ].each do |op_name|
352
- meth_name = Tools.ruby_case(op_name).to_sym
353
- define_method(meth_name) do |child, *args|
354
- chain(Operator::NonRelational.const_get(op_name).new(*args), child)
355
- end
356
- end
357
-
358
- [:Project,
359
- :Extend,
360
- :Rename,
361
- :Restrict,
362
- :Nest,
363
- :Unnest,
364
- :Group,
365
- :Ungroup,
366
- :Summarize,
367
- :Quota ].each do |op_name|
368
- meth_name = Tools.ruby_case(op_name).to_sym
369
- define_method(meth_name) do |child, *args|
370
- chain(Operator::Relational.const_get(op_name).new(*args), child)
371
- end
372
- end
373
-
374
- def allbut(child, attributes)
375
- chain(Operator::Relational::Project.new(attributes, true), child)
376
- end
377
-
378
- [ :Join,
379
- :Union,
380
- :Intersect,
381
- :Minus ].each do |op_name|
382
- meth_name = Tools.ruby_case(op_name).to_sym
383
- define_method(meth_name) do |left, right, *args|
384
- chain(Operator::Relational.const_get(op_name).new(*args), [left, right])
385
- end
386
- end
387
-
388
- end # module Lispy
389
-
390
284
  #
391
285
  # Encapsulates the interface with the outside world, providing base iterators
392
286
  # for named datasets, among others.
@@ -574,24 +468,35 @@ module Alf
574
468
  # implementation.
575
469
  # @param [Environment] environment an optional environment for resolving
576
470
  # named datasets if needed.
471
+ # @return [Object] self
577
472
  #
578
473
  def pipe(input, environment = nil)
474
+ self
579
475
  end
580
476
  undef :pipe
581
477
 
582
-
583
478
  #
584
479
  # Coerces something to an iterator
585
480
  #
586
- def self.coerce(arg, env)
481
+ def self.coerce(arg, environment = nil)
587
482
  case arg
588
483
  when Iterator, Array
589
484
  arg
590
485
  else
591
- Reader.coerce(arg, env)
486
+ Reader.coerce(arg, environment)
592
487
  end
593
488
  end
594
489
 
490
+ #
491
+ # Converts this iterator to an in-memory Relation.
492
+ #
493
+ # @return [Relation] a relation instance, as the set of tuples
494
+ # that would be yield by this iterator.
495
+ #
496
+ def to_rel
497
+ Relation::coerce(self)
498
+ end
499
+
595
500
  end # module Iterator
596
501
 
597
502
  #
@@ -650,8 +555,9 @@ module Alf
650
555
  end
651
556
 
652
557
  #
653
- # Returns a reader instance for a specific file whose path is given
654
- # as argument.
558
+ # When filepath is a String, returns a reader instance for a specific file
559
+ # whose path is given as argument. Otherwise, delegate the call to
560
+ # <code>coerce(filepath)</code>
655
561
  #
656
562
  # @param [String] filepath path to a file for which extension is recognized
657
563
  # @param [Array] args optional additional arguments that must be passed at
@@ -659,11 +565,15 @@ module Alf
659
565
  # @return [Reader] a reader instance
660
566
  #
661
567
  def self.reader(filepath, *args)
662
- ext = File.extname(filepath)
663
- if registered = @@readers.find{|r| r[1].include?(ext)}
664
- registered[2].new(filepath, *args)
568
+ if filepath.is_a?(String)
569
+ ext = File.extname(filepath)
570
+ if registered = @@readers.find{|r| r[1].include?(ext)}
571
+ registered[2].new(filepath, *args)
572
+ else
573
+ raise "No registered reader for #{ext} (#{filepath})"
574
+ end
665
575
  else
666
- raise "No registered reader for #{ext} (#{filepath})"
576
+ coerce(filepath)
667
577
  end
668
578
  end
669
579
 
@@ -715,6 +625,7 @@ module Alf
715
625
  #
716
626
  def pipe(input, env = environment)
717
627
  @input = input
628
+ self
718
629
  end
719
630
 
720
631
  #
@@ -733,6 +644,14 @@ module Alf
733
644
 
734
645
  protected
735
646
 
647
+ #
648
+ # Returns the input file path, or nil if this Reader is bound to an IO
649
+ # directly.
650
+ #
651
+ def input_path
652
+ input.is_a?(String) ? input : nil
653
+ end
654
+
736
655
  #
737
656
  # Coerces the input object to an IO and yields the block with it.
738
657
  #
@@ -822,7 +741,7 @@ module Alf
822
741
 
823
742
  # (see Reader#each)
824
743
  def each
825
- op = Alf.lispy(environment).compile(input_text)
744
+ op = Alf.lispy(environment).compile(input_text, input_path)
826
745
  op.each(&Proc.new)
827
746
  end
828
747
 
@@ -923,6 +842,7 @@ module Alf
923
842
  def pipe(input, env = environment)
924
843
  self.environment = env
925
844
  self.input = input
845
+ self
926
846
  end
927
847
 
928
848
  #
@@ -1033,7 +953,7 @@ module Alf
1033
953
  # See '#{program_name} help COMMAND' for details about a specific command.
1034
954
  #
1035
955
  class Main < Quickl::Delegator(__FILE__, __LINE__)
1036
- include Command, Lispy
956
+ include Command
1037
957
 
1038
958
  # Environment instance to use to get base iterators
1039
959
  attr_accessor :environment
@@ -1044,6 +964,7 @@ module Alf
1044
964
  # Creates a command instance
1045
965
  def initialize(env = Environment.default)
1046
966
  @environment = env
967
+ extend(Lispy)
1047
968
  end
1048
969
 
1049
970
  # Install options
@@ -1098,7 +1019,7 @@ module Alf
1098
1019
  # 3) if there is a requester, then we do the job (assuming bin/alf)
1099
1020
  # with the renderer to use. Otherwise, we simply return built operator
1100
1021
  if operator && requester
1101
- chain(renderer, operator).execute($stdout)
1022
+ renderer.pipe(operator, environment).execute($stdout)
1102
1023
  else
1103
1024
  operator
1104
1025
  end
@@ -1138,7 +1059,7 @@ module Alf
1138
1059
  def execute(args)
1139
1060
  requester.renderer = @renderer
1140
1061
  args = [ $stdin ] if args.empty?
1141
- requester.chain(*args)
1062
+ requester.send(:chain,*args)
1142
1063
  end
1143
1064
 
1144
1065
  end # class Show
@@ -1198,7 +1119,42 @@ module Alf
1198
1119
  #
1199
1120
  module Operator
1200
1121
  include Iterator, Tools
1122
+
1123
+ #
1124
+ # Yields non-relational then relational operators, in turn.
1125
+ #
1126
+ def self.each
1127
+ Operator::NonRelational.each{|x| yield(x)}
1128
+ Operator::Relational.each{|x| yield(x)}
1129
+ end
1201
1130
 
1131
+ #
1132
+ # Encapsulates method that allows making operator introspection, that is,
1133
+ # knowing operator cardinality and similar stuff.
1134
+ #
1135
+ module Introspection
1136
+
1137
+ #
1138
+ # Returns true if this operator is an unary operator, false otherwise
1139
+ #
1140
+ def unary?
1141
+ ancestors.include?(Operator::Unary)
1142
+ end
1143
+
1144
+ #
1145
+ # Returns true if this operator is a binary operator, false otherwise
1146
+ #
1147
+ def binary?
1148
+ ancestors.include?(Operator::Binary)
1149
+ end
1150
+
1151
+ end # module Introspection
1152
+
1153
+ # Ensures that the Introspection module is set on real operators
1154
+ def self.included(mod)
1155
+ mod.extend(Introspection) if mod.is_a?(Class)
1156
+ end
1157
+
1202
1158
  #
1203
1159
  # Encapsulates method definitions that convert operators to Quickl
1204
1160
  # commands
@@ -1238,7 +1194,7 @@ module Alf
1238
1194
  end
1239
1195
 
1240
1196
  def split_command_args(args)
1241
- operands, args = case i = args.index("--")
1197
+ case (i = args.index("--"))
1242
1198
  when NilClass
1243
1199
  [args, []]
1244
1200
  when 0
@@ -1328,12 +1284,13 @@ module Alf
1328
1284
  def pipe(input, env = environment)
1329
1285
  self.environment = env
1330
1286
  self.datasets = [ input ]
1287
+ self
1331
1288
  end
1332
1289
 
1333
1290
  protected
1334
1291
 
1335
1292
  def command_line_operands(operands)
1336
- operands.first
1293
+ operands.first || $stdin
1337
1294
  end
1338
1295
 
1339
1296
  #
@@ -1366,6 +1323,7 @@ module Alf
1366
1323
  def pipe(input, env = environment)
1367
1324
  self.environment = env
1368
1325
  self.datasets = input
1326
+ self
1369
1327
  end
1370
1328
 
1371
1329
  protected
@@ -1461,6 +1419,7 @@ module Alf
1461
1419
  def pipe(input, env = environment)
1462
1420
  self.environment = env
1463
1421
  self.datasets = input
1422
+ self
1464
1423
  end
1465
1424
 
1466
1425
  protected
@@ -1504,26 +1463,42 @@ module Alf
1504
1463
  #
1505
1464
  module Operator::NonRelational
1506
1465
 
1466
+ #
1467
+ # Yields the block with each operator module in turn
1468
+ #
1469
+ def self.each
1470
+ constants.each do |c|
1471
+ val = const_get(c)
1472
+ yield(val) if val.ancestors.include?(Operator::NonRelational)
1473
+ end
1474
+ end
1475
+
1507
1476
  #
1508
- # Extend with an unique autonumber attribute
1477
+ # Extend its operand with an unique autonumber attribute
1509
1478
  #
1510
1479
  # SYNOPSIS
1511
- # #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
1512
1480
  #
1513
- # API & EXAMPLE
1481
+ # #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
1514
1482
  #
1515
- # # Autonumber suppliers (:autonum attribute name by default)
1516
- # (autonum :suppliers)
1483
+ # DESCRIPTION
1517
1484
  #
1518
- # # You can specify the attribute name
1485
+ # This non-relational operator guarantees uniqueness of output tuples by
1486
+ # adding an attribute called 'ATTRNAME' whose value is an Integer. No
1487
+ # guarantee is given about ordering of output tuples, nor to the fact
1488
+ # that this autonumber is sequential. Only that all values are different.
1489
+ # If the presence of duplicates was the only "non-relational" aspect of
1490
+ # input tuples, the result may be considered a valid relation representation.
1491
+ #
1492
+ # IN RUBY
1493
+ #
1494
+ # (autonum OPERAND, ATTRNAME = :autonum)
1495
+ #
1496
+ # (autonum :suppliers)
1519
1497
  # (autonum :suppliers, :unique_id)
1520
1498
  #
1521
- # DESCRIPTION
1499
+ # IN SHELL
1522
1500
  #
1523
- # This operator takes input tuples in any order they come and extends them
1524
- # with an autonumber attribute ATTRNAME. This allows converting non-relational
1525
- # tuple enumerators to relational ones by ensuring uniqueness of tuples in an
1526
- # arbitrary manner.
1501
+ # #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
1527
1502
  #
1528
1503
  # alf autonum suppliers
1529
1504
  # alf autonum suppliers -- unique_id
@@ -1845,6 +1820,15 @@ module Alf
1845
1820
  module Operator::Relational
1846
1821
 
1847
1822
  #
1823
+ # Yields the block with each operator module in turn
1824
+ #
1825
+ def self.each
1826
+ constants.each do |c|
1827
+ val = const_get(c)
1828
+ yield(val) if val.ancestors.include?(Operator::Relational)
1829
+ end
1830
+ end
1831
+
1848
1832
  # Relational projection (clip + compact)
1849
1833
  #
1850
1834
  # SYNOPSIS
@@ -2309,35 +2293,35 @@ module Alf
2309
2293
  end # class Union
2310
2294
 
2311
2295
  #
2312
- # Relational nesting (tuple-valued attributes)
2296
+ # Relational wraping (tuple-valued attributes)
2313
2297
  #
2314
2298
  # SYNOPSIS
2315
2299
  # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ... NEWNAME
2316
2300
  #
2317
2301
  # API & EXAMPLE
2318
2302
  #
2319
- # (nest :suppliers, [:city, :status], :loc_and_status)
2303
+ # (wrap :suppliers, [:city, :status], :loc_and_status)
2320
2304
  #
2321
2305
  # DESCRIPTION
2322
2306
  #
2323
- # This operator nests attributes ATTR1 to ATTRN as a new, tuple-based
2324
- # attribute whose name is NEWNAME. When used in shell, names of nested
2307
+ # This operator wraps attributes ATTR1 to ATTRN as a new, tuple-based
2308
+ # attribute whose name is NEWNAME. When used in shell, names of wrapped
2325
2309
  # attributes are taken from commandline arguments, expected the last one
2326
2310
  # which defines the new name to use:
2327
2311
  #
2328
- # alf nest suppliers -- city status loc_and_status
2312
+ # alf wrap suppliers -- city status loc_and_status
2329
2313
  #
2330
- class Nest < Factory::Operator(__FILE__, __LINE__)
2314
+ class Wrap < Factory::Operator(__FILE__, __LINE__)
2331
2315
  include Operator::Relational, Operator::Transform
2332
2316
 
2333
- # Array of nesting attributes
2317
+ # Array of wraping attributes
2334
2318
  attr_accessor :attributes
2335
2319
 
2336
- # New name for the nested attribute
2320
+ # New name for the wrapped attribute
2337
2321
  attr_accessor :as
2338
2322
 
2339
- # Builds a Nest operator instance
2340
- def initialize(attributes = [], as = :nested)
2323
+ # Builds a Wrap operator instance
2324
+ def initialize(attributes = [], as = :wrapped)
2341
2325
  @attributes = attributes
2342
2326
  @as = as
2343
2327
  end
@@ -2358,36 +2342,36 @@ module Alf
2358
2342
  others
2359
2343
  end
2360
2344
 
2361
- end # class Nest
2345
+ end # class Wrap
2362
2346
 
2363
2347
  #
2364
- # Relational un-nesting (inverse of nest)
2348
+ # Relational un-wraping (inverse of wrap)
2365
2349
  #
2366
2350
  # SYNOPSIS
2367
2351
  # #{program_name} #{command_name} [OPERAND] -- ATTR
2368
2352
  #
2369
2353
  # API & EXAMPLE
2370
2354
  #
2371
- # # Assuming nested = (nest :suppliers, [:city, :status], :loc_and_status)
2372
- # (unnest nested, :loc_and_status)
2355
+ # # Assuming wrapped = (wrap :suppliers, [:city, :status], :loc_and_status)
2356
+ # (unwrap wrapped, :loc_and_status)
2373
2357
  #
2374
2358
  # DESCRIPTION
2375
2359
  #
2376
- # This operator unnests the tuple-valued attribute named ATTR so as to
2360
+ # This operator unwraps the tuple-valued attribute named ATTR so as to
2377
2361
  # flatten its pairs with 'upstream' tuple. The latter should be such so that
2378
2362
  # no name collision occurs. When used in shell, the name of the attribute to
2379
- # unnest is taken as the first commandline argument:
2363
+ # unwrap is taken as the first commandline argument:
2380
2364
  #
2381
- # alf unnest nest -- loc_and_status
2365
+ # alf unwrap wrap -- loc_and_status
2382
2366
  #
2383
- class Unnest < Factory::Operator(__FILE__, __LINE__)
2367
+ class Unwrap < Factory::Operator(__FILE__, __LINE__)
2384
2368
  include Operator::Relational, Operator::Transform
2385
2369
 
2386
- # Name of the attribute to unnest
2370
+ # Name of the attribute to unwrap
2387
2371
  attr_accessor :attribute
2388
2372
 
2389
2373
  # Builds a Rename operator instance
2390
- def initialize(attribute = :nested)
2374
+ def initialize(attribute = :wrapped)
2391
2375
  @attribute = attribute
2392
2376
  end
2393
2377
 
@@ -2402,11 +2386,11 @@ module Alf
2402
2386
  # (see Operator::Transform#_tuple2tuple)
2403
2387
  def _tuple2tuple(tuple)
2404
2388
  tuple = tuple.dup
2405
- nested = tuple.delete(@attribute) || {}
2406
- tuple.merge(nested)
2389
+ wrapped = tuple.delete(@attribute) || {}
2390
+ tuple.merge(wrapped)
2407
2391
  end
2408
2392
 
2409
- end # class Unnest
2393
+ end # class Unwrap
2410
2394
 
2411
2395
  #
2412
2396
  # Relational grouping (relation-valued attributes)
@@ -2464,7 +2448,7 @@ module Alf
2464
2448
  # See Operator#_prepare
2465
2449
  def _prepare
2466
2450
  pkey = ProjectionKey.new(attributes, !allbut)
2467
- @index = Hash.new{|h,k| h[k] = []}
2451
+ @index = Hash.new{|h,k| h[k] = Set.new}
2468
2452
  each_input_tuple do |tuple|
2469
2453
  key, rest = pkey.split(tuple)
2470
2454
  @index[key] << rest
@@ -2474,7 +2458,7 @@ module Alf
2474
2458
  # See Operator#_each
2475
2459
  def _each
2476
2460
  @index.each_pair do |k,v|
2477
- yield(k.merge(@as => v))
2461
+ yield(k.merge(@as => Relation.coerce(v)))
2478
2462
  end
2479
2463
  end
2480
2464
 
@@ -2537,7 +2521,7 @@ module Alf
2537
2521
  # Relational summarization (group-by + aggregate ops)
2538
2522
  #
2539
2523
  # SYNOPSIS
2540
- # #{program_name} #{command_name} [OPERAND] --by=KEY1,KEY2... -- AGG1 EXPR1...
2524
+ # #{program_name} #{command_name} [OPERAND] [--allbut] --by=KEY1,KEY2... -- AGG1 EXPR1...
2541
2525
  #
2542
2526
  # OPTIONS
2543
2527
  # #{summarized_options}
@@ -2547,6 +2531,10 @@ module Alf
2547
2531
  # (summarize :supplies, [:sid],
2548
2532
  # :total_qty => Aggregator.sum(:qty))
2549
2533
  #
2534
+ # # Or, to specify an allbut projection
2535
+ # (summarize :supplies, [:qty, :pid],
2536
+ # :total_qty => Aggregator.sum(:qty), true)
2537
+ #
2550
2538
  # DESCRIPTION
2551
2539
  #
2552
2540
  # This operator summarizes input tuples on the projection on KEY1,KEY2,...
@@ -2558,6 +2546,7 @@ module Alf
2558
2546
  # aggregation expression evaluated on Aggregator:
2559
2547
  #
2560
2548
  # alf summarize supplies --by=sid -- total_qty "sum(:qty)"
2549
+ # alf summarize supplies --allbut --by=pid,qty -- total_qty "sum(:qty)"
2561
2550
  #
2562
2551
  class Summarize < Factory::Operator(__FILE__, __LINE__)
2563
2552
  include Operator::Relational, Operator::Shortcut, Operator::Unary
@@ -2565,11 +2554,15 @@ module Alf
2565
2554
  # By attributes
2566
2555
  attr_accessor :by
2567
2556
 
2557
+ # Allbut on by?
2558
+ attr_accessor :allbut
2559
+
2568
2560
  # Aggregations as a AGG => Aggregator(EXPR) hash
2569
2561
  attr_accessor :aggregators
2570
2562
 
2571
- def initialize(by = [], aggregators = {})
2563
+ def initialize(by = [], aggregators = {}, allbut = false)
2572
2564
  @by = by
2565
+ @allbut = allbut
2573
2566
  @aggregators = aggregators
2574
2567
  end
2575
2568
 
@@ -2578,6 +2571,9 @@ module Alf
2578
2571
  opt.on('--by=x,y,z', 'Specify by attributes', Array) do |args|
2579
2572
  @by = args.collect{|a| a.to_sym}
2580
2573
  end
2574
+ opt.on('--allbut', 'Make an allbut projection/summarization') do
2575
+ @allbut = true
2576
+ end
2581
2577
  end
2582
2578
 
2583
2579
  # Summarizes according to a complete order
@@ -2613,7 +2609,42 @@ module Alf
2613
2609
  end
2614
2610
 
2615
2611
  end # class SortBased
2612
+
2613
+ # Summarizes in-memory with a hash
2614
+ class HashBased
2615
+ include Operator::Relational, Operator::Unary
2616
+
2617
+ attr_reader :by_key
2618
+ attr_reader :aggregators
2616
2619
 
2620
+ def initialize(by_key, aggregators)
2621
+ @by_key, @aggregators = by_key, aggregators
2622
+ end
2623
+
2624
+ protected
2625
+
2626
+ def _each
2627
+ index = Hash.new do |h,k|
2628
+ h[k] = tuple_collect(@aggregators) do |a,agg|
2629
+ [a, agg.least]
2630
+ end
2631
+ end
2632
+ each_input_tuple do |tuple|
2633
+ key, rest = by_key.split(tuple)
2634
+ index[key] = tuple_collect(@aggregators) do |a,agg|
2635
+ [a, agg.happens(index[key][a], tuple)]
2636
+ end
2637
+ end
2638
+ index.each_pair do |key,aggs|
2639
+ aggs = tuple_collect(@aggregators) do |a,agg|
2640
+ [a, agg.finalize(aggs[a])]
2641
+ end
2642
+ yield key.merge(aggs)
2643
+ end
2644
+ end
2645
+
2646
+ end
2647
+
2617
2648
  protected
2618
2649
 
2619
2650
  # (see Operator::CommandMethods#set_args)
@@ -2625,10 +2656,16 @@ module Alf
2625
2656
  end
2626
2657
 
2627
2658
  def longexpr
2628
- by_key = Tools::ProjectionKey.new(@by, false)
2629
- chain SortBased.new(by_key, @aggregators),
2630
- Operator::NonRelational::Sort.new(by_key.to_ordering_key),
2631
- datasets
2659
+ if @allbut
2660
+ by_key = Tools::ProjectionKey.new(@by, @allbut)
2661
+ chain HashBased.new(by_key, @aggregators),
2662
+ datasets
2663
+ else
2664
+ by_key = Tools::ProjectionKey.new(@by, @allbut)
2665
+ chain SortBased.new(by_key, @aggregators),
2666
+ Operator::NonRelational::Sort.new(by_key.to_ordering_key),
2667
+ datasets
2668
+ end
2632
2669
  end
2633
2670
 
2634
2671
  end # class Summarize
@@ -2908,12 +2945,12 @@ module Alf
2908
2945
  Tools.tuple_collect(attrs){|k| [k, self.send(k)] }
2909
2946
  }
2910
2947
  end
2911
- def least(); []; end
2948
+ def least(); Set.new; end
2912
2949
  def _happens(memo, val)
2913
2950
  memo << val
2914
2951
  end
2915
2952
  def finalize(memo)
2916
- memo.uniq
2953
+ Relation.coerce memo
2917
2954
  end
2918
2955
  end
2919
2956
 
@@ -2944,7 +2981,6 @@ module Alf
2944
2981
  end
2945
2982
  end
2946
2983
 
2947
- Lispy::Agg = Aggregator
2948
2984
  end # class Aggregator
2949
2985
 
2950
2986
  #
@@ -2981,4 +3017,119 @@ module Alf
2981
3017
 
2982
3018
  end # class Buffer
2983
3019
 
3020
+ #
3021
+ # Implements a small LISP-like DSL on top of Alf.
3022
+ #
3023
+ # The lispy dialect is the functional one used in .alf files and in compiled
3024
+ # expressions as below:
3025
+ #
3026
+ # Alf.lispy.compile do
3027
+ # (restrict :suppliers, lambda{ city == 'London' })
3028
+ # end
3029
+ #
3030
+ # The DSL this module provides is part of Alf's public API and won't be broken
3031
+ # without a major version change. The module itself and its inclusion pre-
3032
+ # conditions are not part of the DSL itself, thus not considered as part of
3033
+ # the API, and may therefore evolve at any time. In other words, this module
3034
+ # is not intended to be directly included by third-party classes.
3035
+ #
3036
+ module Lispy
3037
+
3038
+ alias :ruby_extend :extend
3039
+
3040
+ # The environment
3041
+ attr_accessor :environment
3042
+
3043
+ #
3044
+ # Compiles a query expression given by a String or a block and returns
3045
+ # the result (typically a tuple iterator)
3046
+ #
3047
+ # Example
3048
+ #
3049
+ # # with a string
3050
+ # op = compile "(restrict :suppliers, lambda{ city == 'London' })"
3051
+ #
3052
+ # # or with a block
3053
+ # op = compile {
3054
+ # (restrict :suppliers, lambda{ city == 'London' })
3055
+ # }
3056
+ #
3057
+ # @param [String] expr a Lispy expression to compile
3058
+ # @return [Iterator] the iterator resulting from compilation
3059
+ #
3060
+ def compile(expr = nil, path = nil, &block)
3061
+ if expr.nil?
3062
+ instance_eval(&block)
3063
+ else
3064
+ (path ? Kernel.eval(expr, binding, path) : Kernel.eval(expr, binding))
3065
+ end
3066
+ end
3067
+
3068
+ #
3069
+ # Evaluates a query expression given by a String or a block and returns
3070
+ # the result as an in-memory relation (Alf::Relation)
3071
+ #
3072
+ # Example:
3073
+ #
3074
+ # # with a string
3075
+ # rel = evaluate "(restrict :suppliers, lambda{ city == 'London' })"
3076
+ #
3077
+ # # or with a block
3078
+ # rel = evaluate {
3079
+ # (restrict :suppliers, lambda{ city == 'London' })
3080
+ # }
3081
+ #
3082
+ def evaluate(expr = nil, path = nil, &block)
3083
+ compile(expr, path, &block).to_rel
3084
+ end
3085
+
3086
+ #
3087
+ # Delegated to the current environment
3088
+ #
3089
+ # This method returns the dataset associated to a given name. The result
3090
+ # may depend on the current environment, but is generally an Iterator,
3091
+ # often a Reader instance.
3092
+ #
3093
+ # @param [Symbol] name name of the dataset to retrieve
3094
+ # @return [Iterator] the dataset as an iterator
3095
+ # @see Environment#dataset
3096
+ #
3097
+ def dataset(name)
3098
+ raise "Environment not set" unless @environment
3099
+ @environment.dataset(name)
3100
+ end
3101
+
3102
+ # Functional equivalent to Alf::Relation[...]
3103
+ def relation(*tuples)
3104
+ Relation.coerce(tuples)
3105
+ end
3106
+
3107
+ #
3108
+ # Install the DSL through iteration over defined operators
3109
+ #
3110
+ Operator::each do |op_class|
3111
+ meth_name = Tools.ruby_case(Tools.class_name(op_class)).to_sym
3112
+ if op_class.unary?
3113
+ define_method(meth_name) do |child, *args|
3114
+ child = Iterator.coerce(child, environment)
3115
+ op_class.new(*args).pipe(child, environment)
3116
+ end
3117
+ elsif op_class.binary?
3118
+ define_method(meth_name) do |left, right, *args|
3119
+ operands = [left, right].collect{|x| Iterator.coerce(x, environment)}
3120
+ op_class.new(*args).pipe(operands, environment)
3121
+ end
3122
+ else
3123
+ raise "Unexpected operator #{op_class}"
3124
+ end
3125
+ end # Operators::each
3126
+
3127
+ def allbut(child, attributes)
3128
+ (project child, attributes, true)
3129
+ end
3130
+
3131
+ Agg = Alf::Aggregator
3132
+ end # module Lispy
3133
+
2984
3134
  end # module Alf
3135
+ require "alf/relation"