alf 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +64 -0
- data/Gemfile.lock +4 -4
- data/README.md +257 -171
- data/TODO.md +4 -4
- data/alf.gemspec +3 -3
- data/alf.noespec +11 -6
- data/examples/pseudo-with.alf +7 -0
- data/examples/runall.sh +2 -2
- data/examples/unwrap.alf +4 -0
- data/examples/wrap.alf +2 -0
- data/lib/alf/relation.rb +118 -0
- data/lib/alf/version.rb +1 -1
- data/lib/alf.rb +320 -169
- data/spec/integration/src/test_minus.alf +5 -0
- data/spec/integration/src/test_project.alf +9 -0
- data/spec/{alf_spec.rb → integration/test_alf.rb} +8 -21
- data/spec/integration/test_alf_specs.rb +37 -0
- data/spec/{examples_spec.rb → integration/test_examples.rb} +1 -1
- data/spec/spec_helper.rb +19 -1
- data/spec/unit/environment/examples/suppliers.rash +5 -0
- data/spec/{environment/explicit_spec.rb → unit/environment/test_explicit.rb} +0 -0
- data/spec/{environment/folder_spec.rb → unit/environment/test_folder.rb} +1 -1
- data/spec/{operator → unit/operator}/non_relational/compact/buffer_based.rb +0 -0
- data/spec/{operator/non_relational/compact/sort_based_spec.rb → unit/operator/non_relational/compact/test_sort_based.rb} +0 -0
- data/spec/{operator/non_relational/autonum_spec.rb → unit/operator/non_relational/test_autonum.rb} +0 -0
- data/spec/{operator/non_relational/clip_spec.rb → unit/operator/non_relational/test_clip.rb} +0 -0
- data/spec/{operator/non_relational/compact_spec.rb → unit/operator/non_relational/test_compact.rb} +0 -0
- data/spec/{operator/non_relational/defaults_spec.rb → unit/operator/non_relational/test_defaults.rb} +0 -0
- data/spec/{operator/non_relational/sort_spec.rb → unit/operator/non_relational/test_sort.rb} +0 -0
- data/spec/{operator/relational/join/hash_based_spec.rb → unit/operator/relational/join/test_hash_based.rb} +0 -0
- data/spec/unit/operator/relational/summarize/test_hash_based.rb +38 -0
- data/spec/{operator/relational/summarize/sort_based_spec.rb → unit/operator/relational/summarize/test_sort_based.rb} +0 -0
- data/spec/{operator/relational/extend_spec.rb → unit/operator/relational/test_extend.rb} +0 -0
- data/spec/{operator/relational/group_spec.rb → unit/operator/relational/test_group.rb} +3 -2
- data/spec/{operator/relational/intersect_spec.rb → unit/operator/relational/test_intersect.rb} +0 -0
- data/spec/unit/operator/relational/test_join.rb +36 -0
- data/spec/{operator/relational/minus_spec.rb → unit/operator/relational/test_minus.rb} +0 -0
- data/spec/{operator/relational/project_spec.rb → unit/operator/relational/test_project.rb} +0 -0
- data/spec/{operator/relational/quota_spec.rb → unit/operator/relational/test_quota.rb} +0 -0
- data/spec/{operator/relational/rename_spec.rb → unit/operator/relational/test_rename.rb} +0 -0
- data/spec/{operator/relational/restrict_spec.rb → unit/operator/relational/test_restrict.rb} +0 -0
- data/spec/unit/operator/relational/test_summarize.rb +64 -0
- data/spec/{operator/relational/ungroup_spec.rb → unit/operator/relational/test_ungroup.rb} +0 -0
- data/spec/{operator/relational/union_spec.rb → unit/operator/relational/test_union.rb} +0 -0
- data/spec/{operator/relational/unnest_spec.rb → unit/operator/relational/test_unwrap.rb} +5 -5
- data/spec/{operator/relational/nest_spec.rb → unit/operator/relational/test_wrap.rb} +5 -5
- data/spec/{operator/command_methods_spec.rb → unit/operator/test_command_methods.rb} +0 -0
- data/spec/unit/operator/test_non_relational.rb +18 -0
- data/spec/unit/operator/test_relational.rb +27 -0
- data/spec/{reader → unit/reader}/input.rb +0 -0
- data/spec/unit/reader/test_alf_file.rb +27 -0
- data/spec/{reader/rash_spec.rb → unit/reader/test_rash.rb} +0 -0
- data/spec/unit/relation/test_coerce.rb +53 -0
- data/spec/unit/relation/test_inspect.rb +20 -0
- data/spec/unit/relation/test_relops.rb +46 -0
- data/spec/{renderer/text/cell_spec.rb → unit/renderer/text/test_cell.rb} +0 -0
- data/spec/{renderer/text/row_spec.rb → unit/renderer/text/test_row.rb} +0 -0
- data/spec/{renderer/text/table_spec.rb → unit/renderer/text/test_table.rb} +0 -0
- data/spec/{aggregator_spec.rb → unit/test_aggregator.rb} +6 -6
- data/spec/{assumptions_spec.rb → unit/test_assumptions.rb} +0 -0
- data/spec/{lispy_spec.rb → unit/test_lispy.rb} +0 -0
- data/spec/unit/test_operator.rb +16 -0
- data/spec/{reader_spec.rb → unit/test_reader.rb} +4 -0
- data/spec/unit/test_relation.rb +40 -0
- data/spec/{renderer_spec.rb → unit/test_renderer.rb} +0 -0
- data/spec/{tools/ordering_key_spec.rb → unit/tools/test_ordering_key.rb} +0 -0
- data/spec/{tools/projection_key_spec.rb → unit/tools/test_projection_key.rb} +0 -0
- data/spec/{tools/tools_spec.rb → unit/tools/test_tools.rb} +0 -0
- data/spec/{tools/tuple_handle_spec.rb → unit/tools/test_tuple_handle.rb} +0 -0
- data/tasks/clean.rake +3 -0
- data/tasks/spec_test.rake +1 -1
- metadata +143 -114
- data/examples/nest.alf +0 -2
- data/examples/unnest.alf +0 -4
- data/examples/with.alf +0 -23
- data/spec/operator/relational/summarize_spec.rb +0 -41
- data/spec/reader/alf_file_spec.rb +0 -15
data/lib/alf.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
require "enumerator"
|
|
2
2
|
require "stringio"
|
|
3
|
+
require "set"
|
|
3
4
|
require "alf/version"
|
|
4
5
|
require "alf/loader"
|
|
5
6
|
|
|
@@ -238,7 +239,8 @@ module Alf
|
|
|
238
239
|
|
|
239
240
|
def compare(t1,t2)
|
|
240
241
|
@ordering.each do |attr,order|
|
|
241
|
-
|
|
242
|
+
x, y = t1[attr], t2[attr]
|
|
243
|
+
comp = x.respond_to?(:<=>) ? (x <=> y) : (x.to_s <=> y.to_s)
|
|
242
244
|
comp *= -1 if order == :desc
|
|
243
245
|
return comp unless comp == 0
|
|
244
246
|
end
|
|
@@ -279,114 +281,6 @@ module Alf
|
|
|
279
281
|
Command::Main.new(env)
|
|
280
282
|
end
|
|
281
283
|
|
|
282
|
-
#
|
|
283
|
-
# Implements a small LISP-like DSL on top of Alf.
|
|
284
|
-
#
|
|
285
|
-
# The lispy dialect is the functional one used in .alf files and in compiled
|
|
286
|
-
# expressions as below:
|
|
287
|
-
#
|
|
288
|
-
# Alf.lispy.compile do
|
|
289
|
-
# (restrict :suppliers, lambda{ city == 'London' })
|
|
290
|
-
# end
|
|
291
|
-
#
|
|
292
|
-
# The DSL this module provides is part of Alf's public API and won't be broken
|
|
293
|
-
# without a major version change. The module itself and its inclusion pre-
|
|
294
|
-
# conditions are not part of the DSL itself, thus not considered as part of
|
|
295
|
-
# the API, and may therefore evolve at any time. In other words, this module
|
|
296
|
-
# is not intended to be directly included by third-party classes.
|
|
297
|
-
#
|
|
298
|
-
module Lispy
|
|
299
|
-
|
|
300
|
-
# The environment
|
|
301
|
-
attr_accessor :environment
|
|
302
|
-
|
|
303
|
-
#
|
|
304
|
-
# Compiles a query expression given by a String or a block and returns
|
|
305
|
-
# the result (typically a tuple iterator)
|
|
306
|
-
#
|
|
307
|
-
def compile(expr = nil, &block)
|
|
308
|
-
expr.nil? ? instance_eval(&block) : instance_eval(expr)
|
|
309
|
-
end
|
|
310
|
-
|
|
311
|
-
# Delegated to the environment
|
|
312
|
-
def dataset(name)
|
|
313
|
-
raise "Environment not set" unless @environment
|
|
314
|
-
@environment.dataset(name)
|
|
315
|
-
end
|
|
316
|
-
|
|
317
|
-
#
|
|
318
|
-
# Compiles the subexpression given by the block in the context of
|
|
319
|
-
# additional temporary expressions given by definitions
|
|
320
|
-
#
|
|
321
|
-
def with(definitions)
|
|
322
|
-
# We branch with the definitions for compilation
|
|
323
|
-
self.environment = environment.branch(definitions)
|
|
324
|
-
|
|
325
|
-
# this is to ensure that sub definitions can reuse other
|
|
326
|
-
# ones
|
|
327
|
-
definitions.each_value do |defn|
|
|
328
|
-
defn.environment = self.environment
|
|
329
|
-
end
|
|
330
|
-
|
|
331
|
-
# compile now
|
|
332
|
-
op = compile(&Proc.new)
|
|
333
|
-
|
|
334
|
-
# We now unbranch for next expression
|
|
335
|
-
self.environment = environment.unbranch
|
|
336
|
-
|
|
337
|
-
op
|
|
338
|
-
end
|
|
339
|
-
|
|
340
|
-
#
|
|
341
|
-
# Chains some elements as a new operator
|
|
342
|
-
#
|
|
343
|
-
def chain(*elements)
|
|
344
|
-
elements = elements.reverse
|
|
345
|
-
elements[1..-1].inject(elements.first) do |c, elm|
|
|
346
|
-
elm.pipe(c, environment)
|
|
347
|
-
elm
|
|
348
|
-
end
|
|
349
|
-
end
|
|
350
|
-
|
|
351
|
-
[ :Autonum, :Clip, :Compact, :Defaults, :Sort ].each do |op_name|
|
|
352
|
-
meth_name = Tools.ruby_case(op_name).to_sym
|
|
353
|
-
define_method(meth_name) do |child, *args|
|
|
354
|
-
chain(Operator::NonRelational.const_get(op_name).new(*args), child)
|
|
355
|
-
end
|
|
356
|
-
end
|
|
357
|
-
|
|
358
|
-
[:Project,
|
|
359
|
-
:Extend,
|
|
360
|
-
:Rename,
|
|
361
|
-
:Restrict,
|
|
362
|
-
:Nest,
|
|
363
|
-
:Unnest,
|
|
364
|
-
:Group,
|
|
365
|
-
:Ungroup,
|
|
366
|
-
:Summarize,
|
|
367
|
-
:Quota ].each do |op_name|
|
|
368
|
-
meth_name = Tools.ruby_case(op_name).to_sym
|
|
369
|
-
define_method(meth_name) do |child, *args|
|
|
370
|
-
chain(Operator::Relational.const_get(op_name).new(*args), child)
|
|
371
|
-
end
|
|
372
|
-
end
|
|
373
|
-
|
|
374
|
-
def allbut(child, attributes)
|
|
375
|
-
chain(Operator::Relational::Project.new(attributes, true), child)
|
|
376
|
-
end
|
|
377
|
-
|
|
378
|
-
[ :Join,
|
|
379
|
-
:Union,
|
|
380
|
-
:Intersect,
|
|
381
|
-
:Minus ].each do |op_name|
|
|
382
|
-
meth_name = Tools.ruby_case(op_name).to_sym
|
|
383
|
-
define_method(meth_name) do |left, right, *args|
|
|
384
|
-
chain(Operator::Relational.const_get(op_name).new(*args), [left, right])
|
|
385
|
-
end
|
|
386
|
-
end
|
|
387
|
-
|
|
388
|
-
end # module Lispy
|
|
389
|
-
|
|
390
284
|
#
|
|
391
285
|
# Encapsulates the interface with the outside world, providing base iterators
|
|
392
286
|
# for named datasets, among others.
|
|
@@ -574,24 +468,35 @@ module Alf
|
|
|
574
468
|
# implementation.
|
|
575
469
|
# @param [Environment] environment an optional environment for resolving
|
|
576
470
|
# named datasets if needed.
|
|
471
|
+
# @return [Object] self
|
|
577
472
|
#
|
|
578
473
|
def pipe(input, environment = nil)
|
|
474
|
+
self
|
|
579
475
|
end
|
|
580
476
|
undef :pipe
|
|
581
477
|
|
|
582
|
-
|
|
583
478
|
#
|
|
584
479
|
# Coerces something to an iterator
|
|
585
480
|
#
|
|
586
|
-
def self.coerce(arg,
|
|
481
|
+
def self.coerce(arg, environment = nil)
|
|
587
482
|
case arg
|
|
588
483
|
when Iterator, Array
|
|
589
484
|
arg
|
|
590
485
|
else
|
|
591
|
-
Reader.coerce(arg,
|
|
486
|
+
Reader.coerce(arg, environment)
|
|
592
487
|
end
|
|
593
488
|
end
|
|
594
489
|
|
|
490
|
+
#
|
|
491
|
+
# Converts this iterator to an in-memory Relation.
|
|
492
|
+
#
|
|
493
|
+
# @return [Relation] a relation instance, as the set of tuples
|
|
494
|
+
# that would be yield by this iterator.
|
|
495
|
+
#
|
|
496
|
+
def to_rel
|
|
497
|
+
Relation::coerce(self)
|
|
498
|
+
end
|
|
499
|
+
|
|
595
500
|
end # module Iterator
|
|
596
501
|
|
|
597
502
|
#
|
|
@@ -650,8 +555,9 @@ module Alf
|
|
|
650
555
|
end
|
|
651
556
|
|
|
652
557
|
#
|
|
653
|
-
#
|
|
654
|
-
# as argument.
|
|
558
|
+
# When filepath is a String, returns a reader instance for a specific file
|
|
559
|
+
# whose path is given as argument. Otherwise, delegate the call to
|
|
560
|
+
# <code>coerce(filepath)</code>
|
|
655
561
|
#
|
|
656
562
|
# @param [String] filepath path to a file for which extension is recognized
|
|
657
563
|
# @param [Array] args optional additional arguments that must be passed at
|
|
@@ -659,11 +565,15 @@ module Alf
|
|
|
659
565
|
# @return [Reader] a reader instance
|
|
660
566
|
#
|
|
661
567
|
def self.reader(filepath, *args)
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
registered[
|
|
568
|
+
if filepath.is_a?(String)
|
|
569
|
+
ext = File.extname(filepath)
|
|
570
|
+
if registered = @@readers.find{|r| r[1].include?(ext)}
|
|
571
|
+
registered[2].new(filepath, *args)
|
|
572
|
+
else
|
|
573
|
+
raise "No registered reader for #{ext} (#{filepath})"
|
|
574
|
+
end
|
|
665
575
|
else
|
|
666
|
-
|
|
576
|
+
coerce(filepath)
|
|
667
577
|
end
|
|
668
578
|
end
|
|
669
579
|
|
|
@@ -715,6 +625,7 @@ module Alf
|
|
|
715
625
|
#
|
|
716
626
|
def pipe(input, env = environment)
|
|
717
627
|
@input = input
|
|
628
|
+
self
|
|
718
629
|
end
|
|
719
630
|
|
|
720
631
|
#
|
|
@@ -733,6 +644,14 @@ module Alf
|
|
|
733
644
|
|
|
734
645
|
protected
|
|
735
646
|
|
|
647
|
+
#
|
|
648
|
+
# Returns the input file path, or nil if this Reader is bound to an IO
|
|
649
|
+
# directly.
|
|
650
|
+
#
|
|
651
|
+
def input_path
|
|
652
|
+
input.is_a?(String) ? input : nil
|
|
653
|
+
end
|
|
654
|
+
|
|
736
655
|
#
|
|
737
656
|
# Coerces the input object to an IO and yields the block with it.
|
|
738
657
|
#
|
|
@@ -822,7 +741,7 @@ module Alf
|
|
|
822
741
|
|
|
823
742
|
# (see Reader#each)
|
|
824
743
|
def each
|
|
825
|
-
op = Alf.lispy(environment).compile(input_text)
|
|
744
|
+
op = Alf.lispy(environment).compile(input_text, input_path)
|
|
826
745
|
op.each(&Proc.new)
|
|
827
746
|
end
|
|
828
747
|
|
|
@@ -923,6 +842,7 @@ module Alf
|
|
|
923
842
|
def pipe(input, env = environment)
|
|
924
843
|
self.environment = env
|
|
925
844
|
self.input = input
|
|
845
|
+
self
|
|
926
846
|
end
|
|
927
847
|
|
|
928
848
|
#
|
|
@@ -1033,7 +953,7 @@ module Alf
|
|
|
1033
953
|
# See '#{program_name} help COMMAND' for details about a specific command.
|
|
1034
954
|
#
|
|
1035
955
|
class Main < Quickl::Delegator(__FILE__, __LINE__)
|
|
1036
|
-
include Command
|
|
956
|
+
include Command
|
|
1037
957
|
|
|
1038
958
|
# Environment instance to use to get base iterators
|
|
1039
959
|
attr_accessor :environment
|
|
@@ -1044,6 +964,7 @@ module Alf
|
|
|
1044
964
|
# Creates a command instance
|
|
1045
965
|
def initialize(env = Environment.default)
|
|
1046
966
|
@environment = env
|
|
967
|
+
extend(Lispy)
|
|
1047
968
|
end
|
|
1048
969
|
|
|
1049
970
|
# Install options
|
|
@@ -1098,7 +1019,7 @@ module Alf
|
|
|
1098
1019
|
# 3) if there is a requester, then we do the job (assuming bin/alf)
|
|
1099
1020
|
# with the renderer to use. Otherwise, we simply return built operator
|
|
1100
1021
|
if operator && requester
|
|
1101
|
-
|
|
1022
|
+
renderer.pipe(operator, environment).execute($stdout)
|
|
1102
1023
|
else
|
|
1103
1024
|
operator
|
|
1104
1025
|
end
|
|
@@ -1138,7 +1059,7 @@ module Alf
|
|
|
1138
1059
|
def execute(args)
|
|
1139
1060
|
requester.renderer = @renderer
|
|
1140
1061
|
args = [ $stdin ] if args.empty?
|
|
1141
|
-
requester.chain
|
|
1062
|
+
requester.send(:chain,*args)
|
|
1142
1063
|
end
|
|
1143
1064
|
|
|
1144
1065
|
end # class Show
|
|
@@ -1198,7 +1119,42 @@ module Alf
|
|
|
1198
1119
|
#
|
|
1199
1120
|
module Operator
|
|
1200
1121
|
include Iterator, Tools
|
|
1122
|
+
|
|
1123
|
+
#
|
|
1124
|
+
# Yields non-relational then relational operators, in turn.
|
|
1125
|
+
#
|
|
1126
|
+
def self.each
|
|
1127
|
+
Operator::NonRelational.each{|x| yield(x)}
|
|
1128
|
+
Operator::Relational.each{|x| yield(x)}
|
|
1129
|
+
end
|
|
1201
1130
|
|
|
1131
|
+
#
|
|
1132
|
+
# Encapsulates method that allows making operator introspection, that is,
|
|
1133
|
+
# knowing operator cardinality and similar stuff.
|
|
1134
|
+
#
|
|
1135
|
+
module Introspection
|
|
1136
|
+
|
|
1137
|
+
#
|
|
1138
|
+
# Returns true if this operator is an unary operator, false otherwise
|
|
1139
|
+
#
|
|
1140
|
+
def unary?
|
|
1141
|
+
ancestors.include?(Operator::Unary)
|
|
1142
|
+
end
|
|
1143
|
+
|
|
1144
|
+
#
|
|
1145
|
+
# Returns true if this operator is a binary operator, false otherwise
|
|
1146
|
+
#
|
|
1147
|
+
def binary?
|
|
1148
|
+
ancestors.include?(Operator::Binary)
|
|
1149
|
+
end
|
|
1150
|
+
|
|
1151
|
+
end # module Introspection
|
|
1152
|
+
|
|
1153
|
+
# Ensures that the Introspection module is set on real operators
|
|
1154
|
+
def self.included(mod)
|
|
1155
|
+
mod.extend(Introspection) if mod.is_a?(Class)
|
|
1156
|
+
end
|
|
1157
|
+
|
|
1202
1158
|
#
|
|
1203
1159
|
# Encapsulates method definitions that convert operators to Quickl
|
|
1204
1160
|
# commands
|
|
@@ -1238,7 +1194,7 @@ module Alf
|
|
|
1238
1194
|
end
|
|
1239
1195
|
|
|
1240
1196
|
def split_command_args(args)
|
|
1241
|
-
|
|
1197
|
+
case (i = args.index("--"))
|
|
1242
1198
|
when NilClass
|
|
1243
1199
|
[args, []]
|
|
1244
1200
|
when 0
|
|
@@ -1328,12 +1284,13 @@ module Alf
|
|
|
1328
1284
|
def pipe(input, env = environment)
|
|
1329
1285
|
self.environment = env
|
|
1330
1286
|
self.datasets = [ input ]
|
|
1287
|
+
self
|
|
1331
1288
|
end
|
|
1332
1289
|
|
|
1333
1290
|
protected
|
|
1334
1291
|
|
|
1335
1292
|
def command_line_operands(operands)
|
|
1336
|
-
operands.first
|
|
1293
|
+
operands.first || $stdin
|
|
1337
1294
|
end
|
|
1338
1295
|
|
|
1339
1296
|
#
|
|
@@ -1366,6 +1323,7 @@ module Alf
|
|
|
1366
1323
|
def pipe(input, env = environment)
|
|
1367
1324
|
self.environment = env
|
|
1368
1325
|
self.datasets = input
|
|
1326
|
+
self
|
|
1369
1327
|
end
|
|
1370
1328
|
|
|
1371
1329
|
protected
|
|
@@ -1461,6 +1419,7 @@ module Alf
|
|
|
1461
1419
|
def pipe(input, env = environment)
|
|
1462
1420
|
self.environment = env
|
|
1463
1421
|
self.datasets = input
|
|
1422
|
+
self
|
|
1464
1423
|
end
|
|
1465
1424
|
|
|
1466
1425
|
protected
|
|
@@ -1504,26 +1463,42 @@ module Alf
|
|
|
1504
1463
|
#
|
|
1505
1464
|
module Operator::NonRelational
|
|
1506
1465
|
|
|
1466
|
+
#
|
|
1467
|
+
# Yields the block with each operator module in turn
|
|
1468
|
+
#
|
|
1469
|
+
def self.each
|
|
1470
|
+
constants.each do |c|
|
|
1471
|
+
val = const_get(c)
|
|
1472
|
+
yield(val) if val.ancestors.include?(Operator::NonRelational)
|
|
1473
|
+
end
|
|
1474
|
+
end
|
|
1475
|
+
|
|
1507
1476
|
#
|
|
1508
|
-
# Extend with an unique autonumber attribute
|
|
1477
|
+
# Extend its operand with an unique autonumber attribute
|
|
1509
1478
|
#
|
|
1510
1479
|
# SYNOPSIS
|
|
1511
|
-
# #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
|
|
1512
1480
|
#
|
|
1513
|
-
#
|
|
1481
|
+
# #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
|
|
1514
1482
|
#
|
|
1515
|
-
#
|
|
1516
|
-
# (autonum :suppliers)
|
|
1483
|
+
# DESCRIPTION
|
|
1517
1484
|
#
|
|
1518
|
-
#
|
|
1485
|
+
# This non-relational operator guarantees uniqueness of output tuples by
|
|
1486
|
+
# adding an attribute called 'ATTRNAME' whose value is an Integer. No
|
|
1487
|
+
# guarantee is given about ordering of output tuples, nor to the fact
|
|
1488
|
+
# that this autonumber is sequential. Only that all values are different.
|
|
1489
|
+
# If the presence of duplicates was the only "non-relational" aspect of
|
|
1490
|
+
# input tuples, the result may be considered a valid relation representation.
|
|
1491
|
+
#
|
|
1492
|
+
# IN RUBY
|
|
1493
|
+
#
|
|
1494
|
+
# (autonum OPERAND, ATTRNAME = :autonum)
|
|
1495
|
+
#
|
|
1496
|
+
# (autonum :suppliers)
|
|
1519
1497
|
# (autonum :suppliers, :unique_id)
|
|
1520
1498
|
#
|
|
1521
|
-
#
|
|
1499
|
+
# IN SHELL
|
|
1522
1500
|
#
|
|
1523
|
-
#
|
|
1524
|
-
# with an autonumber attribute ATTRNAME. This allows converting non-relational
|
|
1525
|
-
# tuple enumerators to relational ones by ensuring uniqueness of tuples in an
|
|
1526
|
-
# arbitrary manner.
|
|
1501
|
+
# #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
|
|
1527
1502
|
#
|
|
1528
1503
|
# alf autonum suppliers
|
|
1529
1504
|
# alf autonum suppliers -- unique_id
|
|
@@ -1845,6 +1820,15 @@ module Alf
|
|
|
1845
1820
|
module Operator::Relational
|
|
1846
1821
|
|
|
1847
1822
|
#
|
|
1823
|
+
# Yields the block with each operator module in turn
|
|
1824
|
+
#
|
|
1825
|
+
def self.each
|
|
1826
|
+
constants.each do |c|
|
|
1827
|
+
val = const_get(c)
|
|
1828
|
+
yield(val) if val.ancestors.include?(Operator::Relational)
|
|
1829
|
+
end
|
|
1830
|
+
end
|
|
1831
|
+
|
|
1848
1832
|
# Relational projection (clip + compact)
|
|
1849
1833
|
#
|
|
1850
1834
|
# SYNOPSIS
|
|
@@ -2309,35 +2293,35 @@ module Alf
|
|
|
2309
2293
|
end # class Union
|
|
2310
2294
|
|
|
2311
2295
|
#
|
|
2312
|
-
# Relational
|
|
2296
|
+
# Relational wraping (tuple-valued attributes)
|
|
2313
2297
|
#
|
|
2314
2298
|
# SYNOPSIS
|
|
2315
2299
|
# #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ... NEWNAME
|
|
2316
2300
|
#
|
|
2317
2301
|
# API & EXAMPLE
|
|
2318
2302
|
#
|
|
2319
|
-
# (
|
|
2303
|
+
# (wrap :suppliers, [:city, :status], :loc_and_status)
|
|
2320
2304
|
#
|
|
2321
2305
|
# DESCRIPTION
|
|
2322
2306
|
#
|
|
2323
|
-
# This operator
|
|
2324
|
-
# attribute whose name is NEWNAME. When used in shell, names of
|
|
2307
|
+
# This operator wraps attributes ATTR1 to ATTRN as a new, tuple-based
|
|
2308
|
+
# attribute whose name is NEWNAME. When used in shell, names of wrapped
|
|
2325
2309
|
# attributes are taken from commandline arguments, expected the last one
|
|
2326
2310
|
# which defines the new name to use:
|
|
2327
2311
|
#
|
|
2328
|
-
# alf
|
|
2312
|
+
# alf wrap suppliers -- city status loc_and_status
|
|
2329
2313
|
#
|
|
2330
|
-
class
|
|
2314
|
+
class Wrap < Factory::Operator(__FILE__, __LINE__)
|
|
2331
2315
|
include Operator::Relational, Operator::Transform
|
|
2332
2316
|
|
|
2333
|
-
# Array of
|
|
2317
|
+
# Array of wraping attributes
|
|
2334
2318
|
attr_accessor :attributes
|
|
2335
2319
|
|
|
2336
|
-
# New name for the
|
|
2320
|
+
# New name for the wrapped attribute
|
|
2337
2321
|
attr_accessor :as
|
|
2338
2322
|
|
|
2339
|
-
# Builds a
|
|
2340
|
-
def initialize(attributes = [], as = :
|
|
2323
|
+
# Builds a Wrap operator instance
|
|
2324
|
+
def initialize(attributes = [], as = :wrapped)
|
|
2341
2325
|
@attributes = attributes
|
|
2342
2326
|
@as = as
|
|
2343
2327
|
end
|
|
@@ -2358,36 +2342,36 @@ module Alf
|
|
|
2358
2342
|
others
|
|
2359
2343
|
end
|
|
2360
2344
|
|
|
2361
|
-
end # class
|
|
2345
|
+
end # class Wrap
|
|
2362
2346
|
|
|
2363
2347
|
#
|
|
2364
|
-
# Relational un-
|
|
2348
|
+
# Relational un-wraping (inverse of wrap)
|
|
2365
2349
|
#
|
|
2366
2350
|
# SYNOPSIS
|
|
2367
2351
|
# #{program_name} #{command_name} [OPERAND] -- ATTR
|
|
2368
2352
|
#
|
|
2369
2353
|
# API & EXAMPLE
|
|
2370
2354
|
#
|
|
2371
|
-
# # Assuming
|
|
2372
|
-
# (
|
|
2355
|
+
# # Assuming wrapped = (wrap :suppliers, [:city, :status], :loc_and_status)
|
|
2356
|
+
# (unwrap wrapped, :loc_and_status)
|
|
2373
2357
|
#
|
|
2374
2358
|
# DESCRIPTION
|
|
2375
2359
|
#
|
|
2376
|
-
# This operator
|
|
2360
|
+
# This operator unwraps the tuple-valued attribute named ATTR so as to
|
|
2377
2361
|
# flatten its pairs with 'upstream' tuple. The latter should be such so that
|
|
2378
2362
|
# no name collision occurs. When used in shell, the name of the attribute to
|
|
2379
|
-
#
|
|
2363
|
+
# unwrap is taken as the first commandline argument:
|
|
2380
2364
|
#
|
|
2381
|
-
# alf
|
|
2365
|
+
# alf unwrap wrap -- loc_and_status
|
|
2382
2366
|
#
|
|
2383
|
-
class
|
|
2367
|
+
class Unwrap < Factory::Operator(__FILE__, __LINE__)
|
|
2384
2368
|
include Operator::Relational, Operator::Transform
|
|
2385
2369
|
|
|
2386
|
-
# Name of the attribute to
|
|
2370
|
+
# Name of the attribute to unwrap
|
|
2387
2371
|
attr_accessor :attribute
|
|
2388
2372
|
|
|
2389
2373
|
# Builds a Rename operator instance
|
|
2390
|
-
def initialize(attribute = :
|
|
2374
|
+
def initialize(attribute = :wrapped)
|
|
2391
2375
|
@attribute = attribute
|
|
2392
2376
|
end
|
|
2393
2377
|
|
|
@@ -2402,11 +2386,11 @@ module Alf
|
|
|
2402
2386
|
# (see Operator::Transform#_tuple2tuple)
|
|
2403
2387
|
def _tuple2tuple(tuple)
|
|
2404
2388
|
tuple = tuple.dup
|
|
2405
|
-
|
|
2406
|
-
tuple.merge(
|
|
2389
|
+
wrapped = tuple.delete(@attribute) || {}
|
|
2390
|
+
tuple.merge(wrapped)
|
|
2407
2391
|
end
|
|
2408
2392
|
|
|
2409
|
-
end # class
|
|
2393
|
+
end # class Unwrap
|
|
2410
2394
|
|
|
2411
2395
|
#
|
|
2412
2396
|
# Relational grouping (relation-valued attributes)
|
|
@@ -2464,7 +2448,7 @@ module Alf
|
|
|
2464
2448
|
# See Operator#_prepare
|
|
2465
2449
|
def _prepare
|
|
2466
2450
|
pkey = ProjectionKey.new(attributes, !allbut)
|
|
2467
|
-
@index = Hash.new{|h,k| h[k] =
|
|
2451
|
+
@index = Hash.new{|h,k| h[k] = Set.new}
|
|
2468
2452
|
each_input_tuple do |tuple|
|
|
2469
2453
|
key, rest = pkey.split(tuple)
|
|
2470
2454
|
@index[key] << rest
|
|
@@ -2474,7 +2458,7 @@ module Alf
|
|
|
2474
2458
|
# See Operator#_each
|
|
2475
2459
|
def _each
|
|
2476
2460
|
@index.each_pair do |k,v|
|
|
2477
|
-
yield(k.merge(@as => v))
|
|
2461
|
+
yield(k.merge(@as => Relation.coerce(v)))
|
|
2478
2462
|
end
|
|
2479
2463
|
end
|
|
2480
2464
|
|
|
@@ -2537,7 +2521,7 @@ module Alf
|
|
|
2537
2521
|
# Relational summarization (group-by + aggregate ops)
|
|
2538
2522
|
#
|
|
2539
2523
|
# SYNOPSIS
|
|
2540
|
-
# #{program_name} #{command_name} [OPERAND] --by=KEY1,KEY2... -- AGG1 EXPR1...
|
|
2524
|
+
# #{program_name} #{command_name} [OPERAND] [--allbut] --by=KEY1,KEY2... -- AGG1 EXPR1...
|
|
2541
2525
|
#
|
|
2542
2526
|
# OPTIONS
|
|
2543
2527
|
# #{summarized_options}
|
|
@@ -2547,6 +2531,10 @@ module Alf
|
|
|
2547
2531
|
# (summarize :supplies, [:sid],
|
|
2548
2532
|
# :total_qty => Aggregator.sum(:qty))
|
|
2549
2533
|
#
|
|
2534
|
+
# # Or, to specify an allbut projection
|
|
2535
|
+
# (summarize :supplies, [:qty, :pid],
|
|
2536
|
+
# :total_qty => Aggregator.sum(:qty), true)
|
|
2537
|
+
#
|
|
2550
2538
|
# DESCRIPTION
|
|
2551
2539
|
#
|
|
2552
2540
|
# This operator summarizes input tuples on the projection on KEY1,KEY2,...
|
|
@@ -2558,6 +2546,7 @@ module Alf
|
|
|
2558
2546
|
# aggregation expression evaluated on Aggregator:
|
|
2559
2547
|
#
|
|
2560
2548
|
# alf summarize supplies --by=sid -- total_qty "sum(:qty)"
|
|
2549
|
+
# alf summarize supplies --allbut --by=pid,qty -- total_qty "sum(:qty)"
|
|
2561
2550
|
#
|
|
2562
2551
|
class Summarize < Factory::Operator(__FILE__, __LINE__)
|
|
2563
2552
|
include Operator::Relational, Operator::Shortcut, Operator::Unary
|
|
@@ -2565,11 +2554,15 @@ module Alf
|
|
|
2565
2554
|
# By attributes
|
|
2566
2555
|
attr_accessor :by
|
|
2567
2556
|
|
|
2557
|
+
# Allbut on by?
|
|
2558
|
+
attr_accessor :allbut
|
|
2559
|
+
|
|
2568
2560
|
# Aggregations as a AGG => Aggregator(EXPR) hash
|
|
2569
2561
|
attr_accessor :aggregators
|
|
2570
2562
|
|
|
2571
|
-
def initialize(by = [], aggregators = {})
|
|
2563
|
+
def initialize(by = [], aggregators = {}, allbut = false)
|
|
2572
2564
|
@by = by
|
|
2565
|
+
@allbut = allbut
|
|
2573
2566
|
@aggregators = aggregators
|
|
2574
2567
|
end
|
|
2575
2568
|
|
|
@@ -2578,6 +2571,9 @@ module Alf
|
|
|
2578
2571
|
opt.on('--by=x,y,z', 'Specify by attributes', Array) do |args|
|
|
2579
2572
|
@by = args.collect{|a| a.to_sym}
|
|
2580
2573
|
end
|
|
2574
|
+
opt.on('--allbut', 'Make an allbut projection/summarization') do
|
|
2575
|
+
@allbut = true
|
|
2576
|
+
end
|
|
2581
2577
|
end
|
|
2582
2578
|
|
|
2583
2579
|
# Summarizes according to a complete order
|
|
@@ -2613,7 +2609,42 @@ module Alf
|
|
|
2613
2609
|
end
|
|
2614
2610
|
|
|
2615
2611
|
end # class SortBased
|
|
2612
|
+
|
|
2613
|
+
# Summarizes in-memory with a hash
|
|
2614
|
+
class HashBased
|
|
2615
|
+
include Operator::Relational, Operator::Unary
|
|
2616
|
+
|
|
2617
|
+
attr_reader :by_key
|
|
2618
|
+
attr_reader :aggregators
|
|
2616
2619
|
|
|
2620
|
+
def initialize(by_key, aggregators)
|
|
2621
|
+
@by_key, @aggregators = by_key, aggregators
|
|
2622
|
+
end
|
|
2623
|
+
|
|
2624
|
+
protected
|
|
2625
|
+
|
|
2626
|
+
def _each
|
|
2627
|
+
index = Hash.new do |h,k|
|
|
2628
|
+
h[k] = tuple_collect(@aggregators) do |a,agg|
|
|
2629
|
+
[a, agg.least]
|
|
2630
|
+
end
|
|
2631
|
+
end
|
|
2632
|
+
each_input_tuple do |tuple|
|
|
2633
|
+
key, rest = by_key.split(tuple)
|
|
2634
|
+
index[key] = tuple_collect(@aggregators) do |a,agg|
|
|
2635
|
+
[a, agg.happens(index[key][a], tuple)]
|
|
2636
|
+
end
|
|
2637
|
+
end
|
|
2638
|
+
index.each_pair do |key,aggs|
|
|
2639
|
+
aggs = tuple_collect(@aggregators) do |a,agg|
|
|
2640
|
+
[a, agg.finalize(aggs[a])]
|
|
2641
|
+
end
|
|
2642
|
+
yield key.merge(aggs)
|
|
2643
|
+
end
|
|
2644
|
+
end
|
|
2645
|
+
|
|
2646
|
+
end
|
|
2647
|
+
|
|
2617
2648
|
protected
|
|
2618
2649
|
|
|
2619
2650
|
# (see Operator::CommandMethods#set_args)
|
|
@@ -2625,10 +2656,16 @@ module Alf
|
|
|
2625
2656
|
end
|
|
2626
2657
|
|
|
2627
2658
|
def longexpr
|
|
2628
|
-
|
|
2629
|
-
|
|
2630
|
-
|
|
2631
|
-
|
|
2659
|
+
if @allbut
|
|
2660
|
+
by_key = Tools::ProjectionKey.new(@by, @allbut)
|
|
2661
|
+
chain HashBased.new(by_key, @aggregators),
|
|
2662
|
+
datasets
|
|
2663
|
+
else
|
|
2664
|
+
by_key = Tools::ProjectionKey.new(@by, @allbut)
|
|
2665
|
+
chain SortBased.new(by_key, @aggregators),
|
|
2666
|
+
Operator::NonRelational::Sort.new(by_key.to_ordering_key),
|
|
2667
|
+
datasets
|
|
2668
|
+
end
|
|
2632
2669
|
end
|
|
2633
2670
|
|
|
2634
2671
|
end # class Summarize
|
|
@@ -2908,12 +2945,12 @@ module Alf
|
|
|
2908
2945
|
Tools.tuple_collect(attrs){|k| [k, self.send(k)] }
|
|
2909
2946
|
}
|
|
2910
2947
|
end
|
|
2911
|
-
def least();
|
|
2948
|
+
def least(); Set.new; end
|
|
2912
2949
|
def _happens(memo, val)
|
|
2913
2950
|
memo << val
|
|
2914
2951
|
end
|
|
2915
2952
|
def finalize(memo)
|
|
2916
|
-
memo
|
|
2953
|
+
Relation.coerce memo
|
|
2917
2954
|
end
|
|
2918
2955
|
end
|
|
2919
2956
|
|
|
@@ -2944,7 +2981,6 @@ module Alf
|
|
|
2944
2981
|
end
|
|
2945
2982
|
end
|
|
2946
2983
|
|
|
2947
|
-
Lispy::Agg = Aggregator
|
|
2948
2984
|
end # class Aggregator
|
|
2949
2985
|
|
|
2950
2986
|
#
|
|
@@ -2981,4 +3017,119 @@ module Alf
|
|
|
2981
3017
|
|
|
2982
3018
|
end # class Buffer
|
|
2983
3019
|
|
|
3020
|
+
#
|
|
3021
|
+
# Implements a small LISP-like DSL on top of Alf.
|
|
3022
|
+
#
|
|
3023
|
+
# The lispy dialect is the functional one used in .alf files and in compiled
|
|
3024
|
+
# expressions as below:
|
|
3025
|
+
#
|
|
3026
|
+
# Alf.lispy.compile do
|
|
3027
|
+
# (restrict :suppliers, lambda{ city == 'London' })
|
|
3028
|
+
# end
|
|
3029
|
+
#
|
|
3030
|
+
# The DSL this module provides is part of Alf's public API and won't be broken
|
|
3031
|
+
# without a major version change. The module itself and its inclusion pre-
|
|
3032
|
+
# conditions are not part of the DSL itself, thus not considered as part of
|
|
3033
|
+
# the API, and may therefore evolve at any time. In other words, this module
|
|
3034
|
+
# is not intended to be directly included by third-party classes.
|
|
3035
|
+
#
|
|
3036
|
+
module Lispy
|
|
3037
|
+
|
|
3038
|
+
alias :ruby_extend :extend
|
|
3039
|
+
|
|
3040
|
+
# The environment
|
|
3041
|
+
attr_accessor :environment
|
|
3042
|
+
|
|
3043
|
+
#
|
|
3044
|
+
# Compiles a query expression given by a String or a block and returns
|
|
3045
|
+
# the result (typically a tuple iterator)
|
|
3046
|
+
#
|
|
3047
|
+
# Example
|
|
3048
|
+
#
|
|
3049
|
+
# # with a string
|
|
3050
|
+
# op = compile "(restrict :suppliers, lambda{ city == 'London' })"
|
|
3051
|
+
#
|
|
3052
|
+
# # or with a block
|
|
3053
|
+
# op = compile {
|
|
3054
|
+
# (restrict :suppliers, lambda{ city == 'London' })
|
|
3055
|
+
# }
|
|
3056
|
+
#
|
|
3057
|
+
# @param [String] expr a Lispy expression to compile
|
|
3058
|
+
# @return [Iterator] the iterator resulting from compilation
|
|
3059
|
+
#
|
|
3060
|
+
def compile(expr = nil, path = nil, &block)
|
|
3061
|
+
if expr.nil?
|
|
3062
|
+
instance_eval(&block)
|
|
3063
|
+
else
|
|
3064
|
+
(path ? Kernel.eval(expr, binding, path) : Kernel.eval(expr, binding))
|
|
3065
|
+
end
|
|
3066
|
+
end
|
|
3067
|
+
|
|
3068
|
+
#
|
|
3069
|
+
# Evaluates a query expression given by a String or a block and returns
|
|
3070
|
+
# the result as an in-memory relation (Alf::Relation)
|
|
3071
|
+
#
|
|
3072
|
+
# Example:
|
|
3073
|
+
#
|
|
3074
|
+
# # with a string
|
|
3075
|
+
# rel = evaluate "(restrict :suppliers, lambda{ city == 'London' })"
|
|
3076
|
+
#
|
|
3077
|
+
# # or with a block
|
|
3078
|
+
# rel = evaluate {
|
|
3079
|
+
# (restrict :suppliers, lambda{ city == 'London' })
|
|
3080
|
+
# }
|
|
3081
|
+
#
|
|
3082
|
+
def evaluate(expr = nil, path = nil, &block)
|
|
3083
|
+
compile(expr, path, &block).to_rel
|
|
3084
|
+
end
|
|
3085
|
+
|
|
3086
|
+
#
|
|
3087
|
+
# Delegated to the current environment
|
|
3088
|
+
#
|
|
3089
|
+
# This method returns the dataset associated to a given name. The result
|
|
3090
|
+
# may depend on the current environment, but is generally an Iterator,
|
|
3091
|
+
# often a Reader instance.
|
|
3092
|
+
#
|
|
3093
|
+
# @param [Symbol] name name of the dataset to retrieve
|
|
3094
|
+
# @return [Iterator] the dataset as an iterator
|
|
3095
|
+
# @see Environment#dataset
|
|
3096
|
+
#
|
|
3097
|
+
def dataset(name)
|
|
3098
|
+
raise "Environment not set" unless @environment
|
|
3099
|
+
@environment.dataset(name)
|
|
3100
|
+
end
|
|
3101
|
+
|
|
3102
|
+
# Functional equivalent to Alf::Relation[...]
|
|
3103
|
+
def relation(*tuples)
|
|
3104
|
+
Relation.coerce(tuples)
|
|
3105
|
+
end
|
|
3106
|
+
|
|
3107
|
+
#
|
|
3108
|
+
# Install the DSL through iteration over defined operators
|
|
3109
|
+
#
|
|
3110
|
+
Operator::each do |op_class|
|
|
3111
|
+
meth_name = Tools.ruby_case(Tools.class_name(op_class)).to_sym
|
|
3112
|
+
if op_class.unary?
|
|
3113
|
+
define_method(meth_name) do |child, *args|
|
|
3114
|
+
child = Iterator.coerce(child, environment)
|
|
3115
|
+
op_class.new(*args).pipe(child, environment)
|
|
3116
|
+
end
|
|
3117
|
+
elsif op_class.binary?
|
|
3118
|
+
define_method(meth_name) do |left, right, *args|
|
|
3119
|
+
operands = [left, right].collect{|x| Iterator.coerce(x, environment)}
|
|
3120
|
+
op_class.new(*args).pipe(operands, environment)
|
|
3121
|
+
end
|
|
3122
|
+
else
|
|
3123
|
+
raise "Unexpected operator #{op_class}"
|
|
3124
|
+
end
|
|
3125
|
+
end # Operators::each
|
|
3126
|
+
|
|
3127
|
+
def allbut(child, attributes)
|
|
3128
|
+
(project child, attributes, true)
|
|
3129
|
+
end
|
|
3130
|
+
|
|
3131
|
+
Agg = Alf::Aggregator
|
|
3132
|
+
end # module Lispy
|
|
3133
|
+
|
|
2984
3134
|
end # module Alf
|
|
3135
|
+
require "alf/relation"
|