alf 0.9.0 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +64 -0
- data/Gemfile.lock +4 -4
- data/README.md +257 -171
- data/TODO.md +4 -4
- data/alf.gemspec +3 -3
- data/alf.noespec +11 -6
- data/examples/pseudo-with.alf +7 -0
- data/examples/runall.sh +2 -2
- data/examples/unwrap.alf +4 -0
- data/examples/wrap.alf +2 -0
- data/lib/alf/relation.rb +118 -0
- data/lib/alf/version.rb +1 -1
- data/lib/alf.rb +320 -169
- data/spec/integration/src/test_minus.alf +5 -0
- data/spec/integration/src/test_project.alf +9 -0
- data/spec/{alf_spec.rb → integration/test_alf.rb} +8 -21
- data/spec/integration/test_alf_specs.rb +37 -0
- data/spec/{examples_spec.rb → integration/test_examples.rb} +1 -1
- data/spec/spec_helper.rb +19 -1
- data/spec/unit/environment/examples/suppliers.rash +5 -0
- data/spec/{environment/explicit_spec.rb → unit/environment/test_explicit.rb} +0 -0
- data/spec/{environment/folder_spec.rb → unit/environment/test_folder.rb} +1 -1
- data/spec/{operator → unit/operator}/non_relational/compact/buffer_based.rb +0 -0
- data/spec/{operator/non_relational/compact/sort_based_spec.rb → unit/operator/non_relational/compact/test_sort_based.rb} +0 -0
- data/spec/{operator/non_relational/autonum_spec.rb → unit/operator/non_relational/test_autonum.rb} +0 -0
- data/spec/{operator/non_relational/clip_spec.rb → unit/operator/non_relational/test_clip.rb} +0 -0
- data/spec/{operator/non_relational/compact_spec.rb → unit/operator/non_relational/test_compact.rb} +0 -0
- data/spec/{operator/non_relational/defaults_spec.rb → unit/operator/non_relational/test_defaults.rb} +0 -0
- data/spec/{operator/non_relational/sort_spec.rb → unit/operator/non_relational/test_sort.rb} +0 -0
- data/spec/{operator/relational/join/hash_based_spec.rb → unit/operator/relational/join/test_hash_based.rb} +0 -0
- data/spec/unit/operator/relational/summarize/test_hash_based.rb +38 -0
- data/spec/{operator/relational/summarize/sort_based_spec.rb → unit/operator/relational/summarize/test_sort_based.rb} +0 -0
- data/spec/{operator/relational/extend_spec.rb → unit/operator/relational/test_extend.rb} +0 -0
- data/spec/{operator/relational/group_spec.rb → unit/operator/relational/test_group.rb} +3 -2
- data/spec/{operator/relational/intersect_spec.rb → unit/operator/relational/test_intersect.rb} +0 -0
- data/spec/unit/operator/relational/test_join.rb +36 -0
- data/spec/{operator/relational/minus_spec.rb → unit/operator/relational/test_minus.rb} +0 -0
- data/spec/{operator/relational/project_spec.rb → unit/operator/relational/test_project.rb} +0 -0
- data/spec/{operator/relational/quota_spec.rb → unit/operator/relational/test_quota.rb} +0 -0
- data/spec/{operator/relational/rename_spec.rb → unit/operator/relational/test_rename.rb} +0 -0
- data/spec/{operator/relational/restrict_spec.rb → unit/operator/relational/test_restrict.rb} +0 -0
- data/spec/unit/operator/relational/test_summarize.rb +64 -0
- data/spec/{operator/relational/ungroup_spec.rb → unit/operator/relational/test_ungroup.rb} +0 -0
- data/spec/{operator/relational/union_spec.rb → unit/operator/relational/test_union.rb} +0 -0
- data/spec/{operator/relational/unnest_spec.rb → unit/operator/relational/test_unwrap.rb} +5 -5
- data/spec/{operator/relational/nest_spec.rb → unit/operator/relational/test_wrap.rb} +5 -5
- data/spec/{operator/command_methods_spec.rb → unit/operator/test_command_methods.rb} +0 -0
- data/spec/unit/operator/test_non_relational.rb +18 -0
- data/spec/unit/operator/test_relational.rb +27 -0
- data/spec/{reader → unit/reader}/input.rb +0 -0
- data/spec/unit/reader/test_alf_file.rb +27 -0
- data/spec/{reader/rash_spec.rb → unit/reader/test_rash.rb} +0 -0
- data/spec/unit/relation/test_coerce.rb +53 -0
- data/spec/unit/relation/test_inspect.rb +20 -0
- data/spec/unit/relation/test_relops.rb +46 -0
- data/spec/{renderer/text/cell_spec.rb → unit/renderer/text/test_cell.rb} +0 -0
- data/spec/{renderer/text/row_spec.rb → unit/renderer/text/test_row.rb} +0 -0
- data/spec/{renderer/text/table_spec.rb → unit/renderer/text/test_table.rb} +0 -0
- data/spec/{aggregator_spec.rb → unit/test_aggregator.rb} +6 -6
- data/spec/{assumptions_spec.rb → unit/test_assumptions.rb} +0 -0
- data/spec/{lispy_spec.rb → unit/test_lispy.rb} +0 -0
- data/spec/unit/test_operator.rb +16 -0
- data/spec/{reader_spec.rb → unit/test_reader.rb} +4 -0
- data/spec/unit/test_relation.rb +40 -0
- data/spec/{renderer_spec.rb → unit/test_renderer.rb} +0 -0
- data/spec/{tools/ordering_key_spec.rb → unit/tools/test_ordering_key.rb} +0 -0
- data/spec/{tools/projection_key_spec.rb → unit/tools/test_projection_key.rb} +0 -0
- data/spec/{tools/tools_spec.rb → unit/tools/test_tools.rb} +0 -0
- data/spec/{tools/tuple_handle_spec.rb → unit/tools/test_tuple_handle.rb} +0 -0
- data/tasks/clean.rake +3 -0
- data/tasks/spec_test.rake +1 -1
- metadata +143 -114
- data/examples/nest.alf +0 -2
- data/examples/unnest.alf +0 -4
- data/examples/with.alf +0 -23
- data/spec/operator/relational/summarize_spec.rb +0 -41
- data/spec/reader/alf_file_spec.rb +0 -15
data/lib/alf.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require "enumerator"
|
2
2
|
require "stringio"
|
3
|
+
require "set"
|
3
4
|
require "alf/version"
|
4
5
|
require "alf/loader"
|
5
6
|
|
@@ -238,7 +239,8 @@ module Alf
|
|
238
239
|
|
239
240
|
def compare(t1,t2)
|
240
241
|
@ordering.each do |attr,order|
|
241
|
-
|
242
|
+
x, y = t1[attr], t2[attr]
|
243
|
+
comp = x.respond_to?(:<=>) ? (x <=> y) : (x.to_s <=> y.to_s)
|
242
244
|
comp *= -1 if order == :desc
|
243
245
|
return comp unless comp == 0
|
244
246
|
end
|
@@ -279,114 +281,6 @@ module Alf
|
|
279
281
|
Command::Main.new(env)
|
280
282
|
end
|
281
283
|
|
282
|
-
#
|
283
|
-
# Implements a small LISP-like DSL on top of Alf.
|
284
|
-
#
|
285
|
-
# The lispy dialect is the functional one used in .alf files and in compiled
|
286
|
-
# expressions as below:
|
287
|
-
#
|
288
|
-
# Alf.lispy.compile do
|
289
|
-
# (restrict :suppliers, lambda{ city == 'London' })
|
290
|
-
# end
|
291
|
-
#
|
292
|
-
# The DSL this module provides is part of Alf's public API and won't be broken
|
293
|
-
# without a major version change. The module itself and its inclusion pre-
|
294
|
-
# conditions are not part of the DSL itself, thus not considered as part of
|
295
|
-
# the API, and may therefore evolve at any time. In other words, this module
|
296
|
-
# is not intended to be directly included by third-party classes.
|
297
|
-
#
|
298
|
-
module Lispy
|
299
|
-
|
300
|
-
# The environment
|
301
|
-
attr_accessor :environment
|
302
|
-
|
303
|
-
#
|
304
|
-
# Compiles a query expression given by a String or a block and returns
|
305
|
-
# the result (typically a tuple iterator)
|
306
|
-
#
|
307
|
-
def compile(expr = nil, &block)
|
308
|
-
expr.nil? ? instance_eval(&block) : instance_eval(expr)
|
309
|
-
end
|
310
|
-
|
311
|
-
# Delegated to the environment
|
312
|
-
def dataset(name)
|
313
|
-
raise "Environment not set" unless @environment
|
314
|
-
@environment.dataset(name)
|
315
|
-
end
|
316
|
-
|
317
|
-
#
|
318
|
-
# Compiles the subexpression given by the block in the context of
|
319
|
-
# additional temporary expressions given by definitions
|
320
|
-
#
|
321
|
-
def with(definitions)
|
322
|
-
# We branch with the definitions for compilation
|
323
|
-
self.environment = environment.branch(definitions)
|
324
|
-
|
325
|
-
# this is to ensure that sub definitions can reuse other
|
326
|
-
# ones
|
327
|
-
definitions.each_value do |defn|
|
328
|
-
defn.environment = self.environment
|
329
|
-
end
|
330
|
-
|
331
|
-
# compile now
|
332
|
-
op = compile(&Proc.new)
|
333
|
-
|
334
|
-
# We now unbranch for next expression
|
335
|
-
self.environment = environment.unbranch
|
336
|
-
|
337
|
-
op
|
338
|
-
end
|
339
|
-
|
340
|
-
#
|
341
|
-
# Chains some elements as a new operator
|
342
|
-
#
|
343
|
-
def chain(*elements)
|
344
|
-
elements = elements.reverse
|
345
|
-
elements[1..-1].inject(elements.first) do |c, elm|
|
346
|
-
elm.pipe(c, environment)
|
347
|
-
elm
|
348
|
-
end
|
349
|
-
end
|
350
|
-
|
351
|
-
[ :Autonum, :Clip, :Compact, :Defaults, :Sort ].each do |op_name|
|
352
|
-
meth_name = Tools.ruby_case(op_name).to_sym
|
353
|
-
define_method(meth_name) do |child, *args|
|
354
|
-
chain(Operator::NonRelational.const_get(op_name).new(*args), child)
|
355
|
-
end
|
356
|
-
end
|
357
|
-
|
358
|
-
[:Project,
|
359
|
-
:Extend,
|
360
|
-
:Rename,
|
361
|
-
:Restrict,
|
362
|
-
:Nest,
|
363
|
-
:Unnest,
|
364
|
-
:Group,
|
365
|
-
:Ungroup,
|
366
|
-
:Summarize,
|
367
|
-
:Quota ].each do |op_name|
|
368
|
-
meth_name = Tools.ruby_case(op_name).to_sym
|
369
|
-
define_method(meth_name) do |child, *args|
|
370
|
-
chain(Operator::Relational.const_get(op_name).new(*args), child)
|
371
|
-
end
|
372
|
-
end
|
373
|
-
|
374
|
-
def allbut(child, attributes)
|
375
|
-
chain(Operator::Relational::Project.new(attributes, true), child)
|
376
|
-
end
|
377
|
-
|
378
|
-
[ :Join,
|
379
|
-
:Union,
|
380
|
-
:Intersect,
|
381
|
-
:Minus ].each do |op_name|
|
382
|
-
meth_name = Tools.ruby_case(op_name).to_sym
|
383
|
-
define_method(meth_name) do |left, right, *args|
|
384
|
-
chain(Operator::Relational.const_get(op_name).new(*args), [left, right])
|
385
|
-
end
|
386
|
-
end
|
387
|
-
|
388
|
-
end # module Lispy
|
389
|
-
|
390
284
|
#
|
391
285
|
# Encapsulates the interface with the outside world, providing base iterators
|
392
286
|
# for named datasets, among others.
|
@@ -574,24 +468,35 @@ module Alf
|
|
574
468
|
# implementation.
|
575
469
|
# @param [Environment] environment an optional environment for resolving
|
576
470
|
# named datasets if needed.
|
471
|
+
# @return [Object] self
|
577
472
|
#
|
578
473
|
def pipe(input, environment = nil)
|
474
|
+
self
|
579
475
|
end
|
580
476
|
undef :pipe
|
581
477
|
|
582
|
-
|
583
478
|
#
|
584
479
|
# Coerces something to an iterator
|
585
480
|
#
|
586
|
-
def self.coerce(arg,
|
481
|
+
def self.coerce(arg, environment = nil)
|
587
482
|
case arg
|
588
483
|
when Iterator, Array
|
589
484
|
arg
|
590
485
|
else
|
591
|
-
Reader.coerce(arg,
|
486
|
+
Reader.coerce(arg, environment)
|
592
487
|
end
|
593
488
|
end
|
594
489
|
|
490
|
+
#
|
491
|
+
# Converts this iterator to an in-memory Relation.
|
492
|
+
#
|
493
|
+
# @return [Relation] a relation instance, as the set of tuples
|
494
|
+
# that would be yield by this iterator.
|
495
|
+
#
|
496
|
+
def to_rel
|
497
|
+
Relation::coerce(self)
|
498
|
+
end
|
499
|
+
|
595
500
|
end # module Iterator
|
596
501
|
|
597
502
|
#
|
@@ -650,8 +555,9 @@ module Alf
|
|
650
555
|
end
|
651
556
|
|
652
557
|
#
|
653
|
-
#
|
654
|
-
# as argument.
|
558
|
+
# When filepath is a String, returns a reader instance for a specific file
|
559
|
+
# whose path is given as argument. Otherwise, delegate the call to
|
560
|
+
# <code>coerce(filepath)</code>
|
655
561
|
#
|
656
562
|
# @param [String] filepath path to a file for which extension is recognized
|
657
563
|
# @param [Array] args optional additional arguments that must be passed at
|
@@ -659,11 +565,15 @@ module Alf
|
|
659
565
|
# @return [Reader] a reader instance
|
660
566
|
#
|
661
567
|
def self.reader(filepath, *args)
|
662
|
-
|
663
|
-
|
664
|
-
registered[
|
568
|
+
if filepath.is_a?(String)
|
569
|
+
ext = File.extname(filepath)
|
570
|
+
if registered = @@readers.find{|r| r[1].include?(ext)}
|
571
|
+
registered[2].new(filepath, *args)
|
572
|
+
else
|
573
|
+
raise "No registered reader for #{ext} (#{filepath})"
|
574
|
+
end
|
665
575
|
else
|
666
|
-
|
576
|
+
coerce(filepath)
|
667
577
|
end
|
668
578
|
end
|
669
579
|
|
@@ -715,6 +625,7 @@ module Alf
|
|
715
625
|
#
|
716
626
|
def pipe(input, env = environment)
|
717
627
|
@input = input
|
628
|
+
self
|
718
629
|
end
|
719
630
|
|
720
631
|
#
|
@@ -733,6 +644,14 @@ module Alf
|
|
733
644
|
|
734
645
|
protected
|
735
646
|
|
647
|
+
#
|
648
|
+
# Returns the input file path, or nil if this Reader is bound to an IO
|
649
|
+
# directly.
|
650
|
+
#
|
651
|
+
def input_path
|
652
|
+
input.is_a?(String) ? input : nil
|
653
|
+
end
|
654
|
+
|
736
655
|
#
|
737
656
|
# Coerces the input object to an IO and yields the block with it.
|
738
657
|
#
|
@@ -822,7 +741,7 @@ module Alf
|
|
822
741
|
|
823
742
|
# (see Reader#each)
|
824
743
|
def each
|
825
|
-
op = Alf.lispy(environment).compile(input_text)
|
744
|
+
op = Alf.lispy(environment).compile(input_text, input_path)
|
826
745
|
op.each(&Proc.new)
|
827
746
|
end
|
828
747
|
|
@@ -923,6 +842,7 @@ module Alf
|
|
923
842
|
def pipe(input, env = environment)
|
924
843
|
self.environment = env
|
925
844
|
self.input = input
|
845
|
+
self
|
926
846
|
end
|
927
847
|
|
928
848
|
#
|
@@ -1033,7 +953,7 @@ module Alf
|
|
1033
953
|
# See '#{program_name} help COMMAND' for details about a specific command.
|
1034
954
|
#
|
1035
955
|
class Main < Quickl::Delegator(__FILE__, __LINE__)
|
1036
|
-
include Command
|
956
|
+
include Command
|
1037
957
|
|
1038
958
|
# Environment instance to use to get base iterators
|
1039
959
|
attr_accessor :environment
|
@@ -1044,6 +964,7 @@ module Alf
|
|
1044
964
|
# Creates a command instance
|
1045
965
|
def initialize(env = Environment.default)
|
1046
966
|
@environment = env
|
967
|
+
extend(Lispy)
|
1047
968
|
end
|
1048
969
|
|
1049
970
|
# Install options
|
@@ -1098,7 +1019,7 @@ module Alf
|
|
1098
1019
|
# 3) if there is a requester, then we do the job (assuming bin/alf)
|
1099
1020
|
# with the renderer to use. Otherwise, we simply return built operator
|
1100
1021
|
if operator && requester
|
1101
|
-
|
1022
|
+
renderer.pipe(operator, environment).execute($stdout)
|
1102
1023
|
else
|
1103
1024
|
operator
|
1104
1025
|
end
|
@@ -1138,7 +1059,7 @@ module Alf
|
|
1138
1059
|
def execute(args)
|
1139
1060
|
requester.renderer = @renderer
|
1140
1061
|
args = [ $stdin ] if args.empty?
|
1141
|
-
requester.chain
|
1062
|
+
requester.send(:chain,*args)
|
1142
1063
|
end
|
1143
1064
|
|
1144
1065
|
end # class Show
|
@@ -1198,7 +1119,42 @@ module Alf
|
|
1198
1119
|
#
|
1199
1120
|
module Operator
|
1200
1121
|
include Iterator, Tools
|
1122
|
+
|
1123
|
+
#
|
1124
|
+
# Yields non-relational then relational operators, in turn.
|
1125
|
+
#
|
1126
|
+
def self.each
|
1127
|
+
Operator::NonRelational.each{|x| yield(x)}
|
1128
|
+
Operator::Relational.each{|x| yield(x)}
|
1129
|
+
end
|
1201
1130
|
|
1131
|
+
#
|
1132
|
+
# Encapsulates method that allows making operator introspection, that is,
|
1133
|
+
# knowing operator cardinality and similar stuff.
|
1134
|
+
#
|
1135
|
+
module Introspection
|
1136
|
+
|
1137
|
+
#
|
1138
|
+
# Returns true if this operator is an unary operator, false otherwise
|
1139
|
+
#
|
1140
|
+
def unary?
|
1141
|
+
ancestors.include?(Operator::Unary)
|
1142
|
+
end
|
1143
|
+
|
1144
|
+
#
|
1145
|
+
# Returns true if this operator is a binary operator, false otherwise
|
1146
|
+
#
|
1147
|
+
def binary?
|
1148
|
+
ancestors.include?(Operator::Binary)
|
1149
|
+
end
|
1150
|
+
|
1151
|
+
end # module Introspection
|
1152
|
+
|
1153
|
+
# Ensures that the Introspection module is set on real operators
|
1154
|
+
def self.included(mod)
|
1155
|
+
mod.extend(Introspection) if mod.is_a?(Class)
|
1156
|
+
end
|
1157
|
+
|
1202
1158
|
#
|
1203
1159
|
# Encapsulates method definitions that convert operators to Quickl
|
1204
1160
|
# commands
|
@@ -1238,7 +1194,7 @@ module Alf
|
|
1238
1194
|
end
|
1239
1195
|
|
1240
1196
|
def split_command_args(args)
|
1241
|
-
|
1197
|
+
case (i = args.index("--"))
|
1242
1198
|
when NilClass
|
1243
1199
|
[args, []]
|
1244
1200
|
when 0
|
@@ -1328,12 +1284,13 @@ module Alf
|
|
1328
1284
|
def pipe(input, env = environment)
|
1329
1285
|
self.environment = env
|
1330
1286
|
self.datasets = [ input ]
|
1287
|
+
self
|
1331
1288
|
end
|
1332
1289
|
|
1333
1290
|
protected
|
1334
1291
|
|
1335
1292
|
def command_line_operands(operands)
|
1336
|
-
operands.first
|
1293
|
+
operands.first || $stdin
|
1337
1294
|
end
|
1338
1295
|
|
1339
1296
|
#
|
@@ -1366,6 +1323,7 @@ module Alf
|
|
1366
1323
|
def pipe(input, env = environment)
|
1367
1324
|
self.environment = env
|
1368
1325
|
self.datasets = input
|
1326
|
+
self
|
1369
1327
|
end
|
1370
1328
|
|
1371
1329
|
protected
|
@@ -1461,6 +1419,7 @@ module Alf
|
|
1461
1419
|
def pipe(input, env = environment)
|
1462
1420
|
self.environment = env
|
1463
1421
|
self.datasets = input
|
1422
|
+
self
|
1464
1423
|
end
|
1465
1424
|
|
1466
1425
|
protected
|
@@ -1504,26 +1463,42 @@ module Alf
|
|
1504
1463
|
#
|
1505
1464
|
module Operator::NonRelational
|
1506
1465
|
|
1466
|
+
#
|
1467
|
+
# Yields the block with each operator module in turn
|
1468
|
+
#
|
1469
|
+
def self.each
|
1470
|
+
constants.each do |c|
|
1471
|
+
val = const_get(c)
|
1472
|
+
yield(val) if val.ancestors.include?(Operator::NonRelational)
|
1473
|
+
end
|
1474
|
+
end
|
1475
|
+
|
1507
1476
|
#
|
1508
|
-
# Extend with an unique autonumber attribute
|
1477
|
+
# Extend its operand with an unique autonumber attribute
|
1509
1478
|
#
|
1510
1479
|
# SYNOPSIS
|
1511
|
-
# #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
|
1512
1480
|
#
|
1513
|
-
#
|
1481
|
+
# #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
|
1514
1482
|
#
|
1515
|
-
#
|
1516
|
-
# (autonum :suppliers)
|
1483
|
+
# DESCRIPTION
|
1517
1484
|
#
|
1518
|
-
#
|
1485
|
+
# This non-relational operator guarantees uniqueness of output tuples by
|
1486
|
+
# adding an attribute called 'ATTRNAME' whose value is an Integer. No
|
1487
|
+
# guarantee is given about ordering of output tuples, nor to the fact
|
1488
|
+
# that this autonumber is sequential. Only that all values are different.
|
1489
|
+
# If the presence of duplicates was the only "non-relational" aspect of
|
1490
|
+
# input tuples, the result may be considered a valid relation representation.
|
1491
|
+
#
|
1492
|
+
# IN RUBY
|
1493
|
+
#
|
1494
|
+
# (autonum OPERAND, ATTRNAME = :autonum)
|
1495
|
+
#
|
1496
|
+
# (autonum :suppliers)
|
1519
1497
|
# (autonum :suppliers, :unique_id)
|
1520
1498
|
#
|
1521
|
-
#
|
1499
|
+
# IN SHELL
|
1522
1500
|
#
|
1523
|
-
#
|
1524
|
-
# with an autonumber attribute ATTRNAME. This allows converting non-relational
|
1525
|
-
# tuple enumerators to relational ones by ensuring uniqueness of tuples in an
|
1526
|
-
# arbitrary manner.
|
1501
|
+
# #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
|
1527
1502
|
#
|
1528
1503
|
# alf autonum suppliers
|
1529
1504
|
# alf autonum suppliers -- unique_id
|
@@ -1845,6 +1820,15 @@ module Alf
|
|
1845
1820
|
module Operator::Relational
|
1846
1821
|
|
1847
1822
|
#
|
1823
|
+
# Yields the block with each operator module in turn
|
1824
|
+
#
|
1825
|
+
def self.each
|
1826
|
+
constants.each do |c|
|
1827
|
+
val = const_get(c)
|
1828
|
+
yield(val) if val.ancestors.include?(Operator::Relational)
|
1829
|
+
end
|
1830
|
+
end
|
1831
|
+
|
1848
1832
|
# Relational projection (clip + compact)
|
1849
1833
|
#
|
1850
1834
|
# SYNOPSIS
|
@@ -2309,35 +2293,35 @@ module Alf
|
|
2309
2293
|
end # class Union
|
2310
2294
|
|
2311
2295
|
#
|
2312
|
-
# Relational
|
2296
|
+
# Relational wraping (tuple-valued attributes)
|
2313
2297
|
#
|
2314
2298
|
# SYNOPSIS
|
2315
2299
|
# #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ... NEWNAME
|
2316
2300
|
#
|
2317
2301
|
# API & EXAMPLE
|
2318
2302
|
#
|
2319
|
-
# (
|
2303
|
+
# (wrap :suppliers, [:city, :status], :loc_and_status)
|
2320
2304
|
#
|
2321
2305
|
# DESCRIPTION
|
2322
2306
|
#
|
2323
|
-
# This operator
|
2324
|
-
# attribute whose name is NEWNAME. When used in shell, names of
|
2307
|
+
# This operator wraps attributes ATTR1 to ATTRN as a new, tuple-based
|
2308
|
+
# attribute whose name is NEWNAME. When used in shell, names of wrapped
|
2325
2309
|
# attributes are taken from commandline arguments, expected the last one
|
2326
2310
|
# which defines the new name to use:
|
2327
2311
|
#
|
2328
|
-
# alf
|
2312
|
+
# alf wrap suppliers -- city status loc_and_status
|
2329
2313
|
#
|
2330
|
-
class
|
2314
|
+
class Wrap < Factory::Operator(__FILE__, __LINE__)
|
2331
2315
|
include Operator::Relational, Operator::Transform
|
2332
2316
|
|
2333
|
-
# Array of
|
2317
|
+
# Array of wraping attributes
|
2334
2318
|
attr_accessor :attributes
|
2335
2319
|
|
2336
|
-
# New name for the
|
2320
|
+
# New name for the wrapped attribute
|
2337
2321
|
attr_accessor :as
|
2338
2322
|
|
2339
|
-
# Builds a
|
2340
|
-
def initialize(attributes = [], as = :
|
2323
|
+
# Builds a Wrap operator instance
|
2324
|
+
def initialize(attributes = [], as = :wrapped)
|
2341
2325
|
@attributes = attributes
|
2342
2326
|
@as = as
|
2343
2327
|
end
|
@@ -2358,36 +2342,36 @@ module Alf
|
|
2358
2342
|
others
|
2359
2343
|
end
|
2360
2344
|
|
2361
|
-
end # class
|
2345
|
+
end # class Wrap
|
2362
2346
|
|
2363
2347
|
#
|
2364
|
-
# Relational un-
|
2348
|
+
# Relational un-wraping (inverse of wrap)
|
2365
2349
|
#
|
2366
2350
|
# SYNOPSIS
|
2367
2351
|
# #{program_name} #{command_name} [OPERAND] -- ATTR
|
2368
2352
|
#
|
2369
2353
|
# API & EXAMPLE
|
2370
2354
|
#
|
2371
|
-
# # Assuming
|
2372
|
-
# (
|
2355
|
+
# # Assuming wrapped = (wrap :suppliers, [:city, :status], :loc_and_status)
|
2356
|
+
# (unwrap wrapped, :loc_and_status)
|
2373
2357
|
#
|
2374
2358
|
# DESCRIPTION
|
2375
2359
|
#
|
2376
|
-
# This operator
|
2360
|
+
# This operator unwraps the tuple-valued attribute named ATTR so as to
|
2377
2361
|
# flatten its pairs with 'upstream' tuple. The latter should be such so that
|
2378
2362
|
# no name collision occurs. When used in shell, the name of the attribute to
|
2379
|
-
#
|
2363
|
+
# unwrap is taken as the first commandline argument:
|
2380
2364
|
#
|
2381
|
-
# alf
|
2365
|
+
# alf unwrap wrap -- loc_and_status
|
2382
2366
|
#
|
2383
|
-
class
|
2367
|
+
class Unwrap < Factory::Operator(__FILE__, __LINE__)
|
2384
2368
|
include Operator::Relational, Operator::Transform
|
2385
2369
|
|
2386
|
-
# Name of the attribute to
|
2370
|
+
# Name of the attribute to unwrap
|
2387
2371
|
attr_accessor :attribute
|
2388
2372
|
|
2389
2373
|
# Builds a Rename operator instance
|
2390
|
-
def initialize(attribute = :
|
2374
|
+
def initialize(attribute = :wrapped)
|
2391
2375
|
@attribute = attribute
|
2392
2376
|
end
|
2393
2377
|
|
@@ -2402,11 +2386,11 @@ module Alf
|
|
2402
2386
|
# (see Operator::Transform#_tuple2tuple)
|
2403
2387
|
def _tuple2tuple(tuple)
|
2404
2388
|
tuple = tuple.dup
|
2405
|
-
|
2406
|
-
tuple.merge(
|
2389
|
+
wrapped = tuple.delete(@attribute) || {}
|
2390
|
+
tuple.merge(wrapped)
|
2407
2391
|
end
|
2408
2392
|
|
2409
|
-
end # class
|
2393
|
+
end # class Unwrap
|
2410
2394
|
|
2411
2395
|
#
|
2412
2396
|
# Relational grouping (relation-valued attributes)
|
@@ -2464,7 +2448,7 @@ module Alf
|
|
2464
2448
|
# See Operator#_prepare
|
2465
2449
|
def _prepare
|
2466
2450
|
pkey = ProjectionKey.new(attributes, !allbut)
|
2467
|
-
@index = Hash.new{|h,k| h[k] =
|
2451
|
+
@index = Hash.new{|h,k| h[k] = Set.new}
|
2468
2452
|
each_input_tuple do |tuple|
|
2469
2453
|
key, rest = pkey.split(tuple)
|
2470
2454
|
@index[key] << rest
|
@@ -2474,7 +2458,7 @@ module Alf
|
|
2474
2458
|
# See Operator#_each
|
2475
2459
|
def _each
|
2476
2460
|
@index.each_pair do |k,v|
|
2477
|
-
yield(k.merge(@as => v))
|
2461
|
+
yield(k.merge(@as => Relation.coerce(v)))
|
2478
2462
|
end
|
2479
2463
|
end
|
2480
2464
|
|
@@ -2537,7 +2521,7 @@ module Alf
|
|
2537
2521
|
# Relational summarization (group-by + aggregate ops)
|
2538
2522
|
#
|
2539
2523
|
# SYNOPSIS
|
2540
|
-
# #{program_name} #{command_name} [OPERAND] --by=KEY1,KEY2... -- AGG1 EXPR1...
|
2524
|
+
# #{program_name} #{command_name} [OPERAND] [--allbut] --by=KEY1,KEY2... -- AGG1 EXPR1...
|
2541
2525
|
#
|
2542
2526
|
# OPTIONS
|
2543
2527
|
# #{summarized_options}
|
@@ -2547,6 +2531,10 @@ module Alf
|
|
2547
2531
|
# (summarize :supplies, [:sid],
|
2548
2532
|
# :total_qty => Aggregator.sum(:qty))
|
2549
2533
|
#
|
2534
|
+
# # Or, to specify an allbut projection
|
2535
|
+
# (summarize :supplies, [:qty, :pid],
|
2536
|
+
# :total_qty => Aggregator.sum(:qty), true)
|
2537
|
+
#
|
2550
2538
|
# DESCRIPTION
|
2551
2539
|
#
|
2552
2540
|
# This operator summarizes input tuples on the projection on KEY1,KEY2,...
|
@@ -2558,6 +2546,7 @@ module Alf
|
|
2558
2546
|
# aggregation expression evaluated on Aggregator:
|
2559
2547
|
#
|
2560
2548
|
# alf summarize supplies --by=sid -- total_qty "sum(:qty)"
|
2549
|
+
# alf summarize supplies --allbut --by=pid,qty -- total_qty "sum(:qty)"
|
2561
2550
|
#
|
2562
2551
|
class Summarize < Factory::Operator(__FILE__, __LINE__)
|
2563
2552
|
include Operator::Relational, Operator::Shortcut, Operator::Unary
|
@@ -2565,11 +2554,15 @@ module Alf
|
|
2565
2554
|
# By attributes
|
2566
2555
|
attr_accessor :by
|
2567
2556
|
|
2557
|
+
# Allbut on by?
|
2558
|
+
attr_accessor :allbut
|
2559
|
+
|
2568
2560
|
# Aggregations as a AGG => Aggregator(EXPR) hash
|
2569
2561
|
attr_accessor :aggregators
|
2570
2562
|
|
2571
|
-
def initialize(by = [], aggregators = {})
|
2563
|
+
def initialize(by = [], aggregators = {}, allbut = false)
|
2572
2564
|
@by = by
|
2565
|
+
@allbut = allbut
|
2573
2566
|
@aggregators = aggregators
|
2574
2567
|
end
|
2575
2568
|
|
@@ -2578,6 +2571,9 @@ module Alf
|
|
2578
2571
|
opt.on('--by=x,y,z', 'Specify by attributes', Array) do |args|
|
2579
2572
|
@by = args.collect{|a| a.to_sym}
|
2580
2573
|
end
|
2574
|
+
opt.on('--allbut', 'Make an allbut projection/summarization') do
|
2575
|
+
@allbut = true
|
2576
|
+
end
|
2581
2577
|
end
|
2582
2578
|
|
2583
2579
|
# Summarizes according to a complete order
|
@@ -2613,7 +2609,42 @@ module Alf
|
|
2613
2609
|
end
|
2614
2610
|
|
2615
2611
|
end # class SortBased
|
2612
|
+
|
2613
|
+
# Summarizes in-memory with a hash
|
2614
|
+
class HashBased
|
2615
|
+
include Operator::Relational, Operator::Unary
|
2616
|
+
|
2617
|
+
attr_reader :by_key
|
2618
|
+
attr_reader :aggregators
|
2616
2619
|
|
2620
|
+
def initialize(by_key, aggregators)
|
2621
|
+
@by_key, @aggregators = by_key, aggregators
|
2622
|
+
end
|
2623
|
+
|
2624
|
+
protected
|
2625
|
+
|
2626
|
+
def _each
|
2627
|
+
index = Hash.new do |h,k|
|
2628
|
+
h[k] = tuple_collect(@aggregators) do |a,agg|
|
2629
|
+
[a, agg.least]
|
2630
|
+
end
|
2631
|
+
end
|
2632
|
+
each_input_tuple do |tuple|
|
2633
|
+
key, rest = by_key.split(tuple)
|
2634
|
+
index[key] = tuple_collect(@aggregators) do |a,agg|
|
2635
|
+
[a, agg.happens(index[key][a], tuple)]
|
2636
|
+
end
|
2637
|
+
end
|
2638
|
+
index.each_pair do |key,aggs|
|
2639
|
+
aggs = tuple_collect(@aggregators) do |a,agg|
|
2640
|
+
[a, agg.finalize(aggs[a])]
|
2641
|
+
end
|
2642
|
+
yield key.merge(aggs)
|
2643
|
+
end
|
2644
|
+
end
|
2645
|
+
|
2646
|
+
end
|
2647
|
+
|
2617
2648
|
protected
|
2618
2649
|
|
2619
2650
|
# (see Operator::CommandMethods#set_args)
|
@@ -2625,10 +2656,16 @@ module Alf
|
|
2625
2656
|
end
|
2626
2657
|
|
2627
2658
|
def longexpr
|
2628
|
-
|
2629
|
-
|
2630
|
-
|
2631
|
-
|
2659
|
+
if @allbut
|
2660
|
+
by_key = Tools::ProjectionKey.new(@by, @allbut)
|
2661
|
+
chain HashBased.new(by_key, @aggregators),
|
2662
|
+
datasets
|
2663
|
+
else
|
2664
|
+
by_key = Tools::ProjectionKey.new(@by, @allbut)
|
2665
|
+
chain SortBased.new(by_key, @aggregators),
|
2666
|
+
Operator::NonRelational::Sort.new(by_key.to_ordering_key),
|
2667
|
+
datasets
|
2668
|
+
end
|
2632
2669
|
end
|
2633
2670
|
|
2634
2671
|
end # class Summarize
|
@@ -2908,12 +2945,12 @@ module Alf
|
|
2908
2945
|
Tools.tuple_collect(attrs){|k| [k, self.send(k)] }
|
2909
2946
|
}
|
2910
2947
|
end
|
2911
|
-
def least();
|
2948
|
+
def least(); Set.new; end
|
2912
2949
|
def _happens(memo, val)
|
2913
2950
|
memo << val
|
2914
2951
|
end
|
2915
2952
|
def finalize(memo)
|
2916
|
-
memo
|
2953
|
+
Relation.coerce memo
|
2917
2954
|
end
|
2918
2955
|
end
|
2919
2956
|
|
@@ -2944,7 +2981,6 @@ module Alf
|
|
2944
2981
|
end
|
2945
2982
|
end
|
2946
2983
|
|
2947
|
-
Lispy::Agg = Aggregator
|
2948
2984
|
end # class Aggregator
|
2949
2985
|
|
2950
2986
|
#
|
@@ -2981,4 +3017,119 @@ module Alf
|
|
2981
3017
|
|
2982
3018
|
end # class Buffer
|
2983
3019
|
|
3020
|
+
#
|
3021
|
+
# Implements a small LISP-like DSL on top of Alf.
|
3022
|
+
#
|
3023
|
+
# The lispy dialect is the functional one used in .alf files and in compiled
|
3024
|
+
# expressions as below:
|
3025
|
+
#
|
3026
|
+
# Alf.lispy.compile do
|
3027
|
+
# (restrict :suppliers, lambda{ city == 'London' })
|
3028
|
+
# end
|
3029
|
+
#
|
3030
|
+
# The DSL this module provides is part of Alf's public API and won't be broken
|
3031
|
+
# without a major version change. The module itself and its inclusion pre-
|
3032
|
+
# conditions are not part of the DSL itself, thus not considered as part of
|
3033
|
+
# the API, and may therefore evolve at any time. In other words, this module
|
3034
|
+
# is not intended to be directly included by third-party classes.
|
3035
|
+
#
|
3036
|
+
module Lispy
|
3037
|
+
|
3038
|
+
alias :ruby_extend :extend
|
3039
|
+
|
3040
|
+
# The environment
|
3041
|
+
attr_accessor :environment
|
3042
|
+
|
3043
|
+
#
|
3044
|
+
# Compiles a query expression given by a String or a block and returns
|
3045
|
+
# the result (typically a tuple iterator)
|
3046
|
+
#
|
3047
|
+
# Example
|
3048
|
+
#
|
3049
|
+
# # with a string
|
3050
|
+
# op = compile "(restrict :suppliers, lambda{ city == 'London' })"
|
3051
|
+
#
|
3052
|
+
# # or with a block
|
3053
|
+
# op = compile {
|
3054
|
+
# (restrict :suppliers, lambda{ city == 'London' })
|
3055
|
+
# }
|
3056
|
+
#
|
3057
|
+
# @param [String] expr a Lispy expression to compile
|
3058
|
+
# @return [Iterator] the iterator resulting from compilation
|
3059
|
+
#
|
3060
|
+
def compile(expr = nil, path = nil, &block)
|
3061
|
+
if expr.nil?
|
3062
|
+
instance_eval(&block)
|
3063
|
+
else
|
3064
|
+
(path ? Kernel.eval(expr, binding, path) : Kernel.eval(expr, binding))
|
3065
|
+
end
|
3066
|
+
end
|
3067
|
+
|
3068
|
+
#
|
3069
|
+
# Evaluates a query expression given by a String or a block and returns
|
3070
|
+
# the result as an in-memory relation (Alf::Relation)
|
3071
|
+
#
|
3072
|
+
# Example:
|
3073
|
+
#
|
3074
|
+
# # with a string
|
3075
|
+
# rel = evaluate "(restrict :suppliers, lambda{ city == 'London' })"
|
3076
|
+
#
|
3077
|
+
# # or with a block
|
3078
|
+
# rel = evaluate {
|
3079
|
+
# (restrict :suppliers, lambda{ city == 'London' })
|
3080
|
+
# }
|
3081
|
+
#
|
3082
|
+
def evaluate(expr = nil, path = nil, &block)
|
3083
|
+
compile(expr, path, &block).to_rel
|
3084
|
+
end
|
3085
|
+
|
3086
|
+
#
|
3087
|
+
# Delegated to the current environment
|
3088
|
+
#
|
3089
|
+
# This method returns the dataset associated to a given name. The result
|
3090
|
+
# may depend on the current environment, but is generally an Iterator,
|
3091
|
+
# often a Reader instance.
|
3092
|
+
#
|
3093
|
+
# @param [Symbol] name name of the dataset to retrieve
|
3094
|
+
# @return [Iterator] the dataset as an iterator
|
3095
|
+
# @see Environment#dataset
|
3096
|
+
#
|
3097
|
+
def dataset(name)
|
3098
|
+
raise "Environment not set" unless @environment
|
3099
|
+
@environment.dataset(name)
|
3100
|
+
end
|
3101
|
+
|
3102
|
+
# Functional equivalent to Alf::Relation[...]
|
3103
|
+
def relation(*tuples)
|
3104
|
+
Relation.coerce(tuples)
|
3105
|
+
end
|
3106
|
+
|
3107
|
+
#
|
3108
|
+
# Install the DSL through iteration over defined operators
|
3109
|
+
#
|
3110
|
+
Operator::each do |op_class|
|
3111
|
+
meth_name = Tools.ruby_case(Tools.class_name(op_class)).to_sym
|
3112
|
+
if op_class.unary?
|
3113
|
+
define_method(meth_name) do |child, *args|
|
3114
|
+
child = Iterator.coerce(child, environment)
|
3115
|
+
op_class.new(*args).pipe(child, environment)
|
3116
|
+
end
|
3117
|
+
elsif op_class.binary?
|
3118
|
+
define_method(meth_name) do |left, right, *args|
|
3119
|
+
operands = [left, right].collect{|x| Iterator.coerce(x, environment)}
|
3120
|
+
op_class.new(*args).pipe(operands, environment)
|
3121
|
+
end
|
3122
|
+
else
|
3123
|
+
raise "Unexpected operator #{op_class}"
|
3124
|
+
end
|
3125
|
+
end # Operators::each
|
3126
|
+
|
3127
|
+
def allbut(child, attributes)
|
3128
|
+
(project child, attributes, true)
|
3129
|
+
end
|
3130
|
+
|
3131
|
+
Agg = Alf::Aggregator
|
3132
|
+
end # module Lispy
|
3133
|
+
|
2984
3134
|
end # module Alf
|
3135
|
+
require "alf/relation"
|