alf 0.9.3 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. data/CHANGELOG.md +255 -129
  2. data/Gemfile +31 -1
  3. data/Gemfile.lock +17 -20
  4. data/LICENCE.md +1 -1
  5. data/Manifest.txt +2 -0
  6. data/README.md +37 -43
  7. data/TODO.md +1 -1
  8. data/alf.gemspec +10 -7
  9. data/alf.noespec +24 -13
  10. data/bin/alf +2 -2
  11. data/doc/commands/exec.md +16 -0
  12. data/doc/commands/help.md +11 -0
  13. data/doc/commands/main.md +33 -0
  14. data/doc/commands/show.md +19 -0
  15. data/doc/operators/non_relational/autonum.md +23 -0
  16. data/doc/operators/non_relational/clip.md +31 -0
  17. data/doc/operators/non_relational/coerce.md +15 -0
  18. data/doc/operators/non_relational/compact.md +20 -0
  19. data/doc/operators/non_relational/defaults.md +32 -0
  20. data/doc/operators/non_relational/generator.md +20 -0
  21. data/doc/operators/non_relational/sort.md +24 -0
  22. data/doc/operators/relational/extend.md +18 -0
  23. data/doc/operators/relational/group.md +27 -0
  24. data/doc/operators/relational/intersect.md +13 -0
  25. data/doc/operators/relational/join.md +27 -0
  26. data/doc/operators/relational/matching.md +20 -0
  27. data/doc/operators/relational/minus.md +12 -0
  28. data/doc/operators/relational/not-matching.md +20 -0
  29. data/doc/operators/relational/project.md +28 -0
  30. data/doc/operators/relational/quota.md +21 -0
  31. data/doc/operators/relational/rank.md +27 -0
  32. data/doc/operators/relational/rename.md +17 -0
  33. data/doc/operators/relational/restrict.md +25 -0
  34. data/doc/operators/relational/summarize.md +25 -0
  35. data/doc/operators/relational/ungroup.md +20 -0
  36. data/doc/operators/relational/union.md +14 -0
  37. data/doc/operators/relational/unwrap.md +20 -0
  38. data/doc/operators/relational/wrap.md +24 -0
  39. data/examples/csv/suppliers.csv +6 -0
  40. data/examples/logs/access.log +1000 -0
  41. data/examples/logs/combined.alf +2 -0
  42. data/examples/logs/hits.alf +14 -0
  43. data/examples/logs/not_found.alf +7 -0
  44. data/examples/logs/robots-cheating.alf +11 -0
  45. data/examples/logs/robots.alf +8 -0
  46. data/examples/northwind/customers.csv +92 -0
  47. data/examples/northwind/northwind.db +0 -0
  48. data/examples/northwind/orders.csv +831 -0
  49. data/examples/operators/clip.alf +1 -1
  50. data/examples/operators/database.alf +5 -6
  51. data/examples/operators/defaults.alf +1 -1
  52. data/examples/operators/group.alf +1 -1
  53. data/examples/operators/project.alf +2 -1
  54. data/examples/operators/pseudo-with.alf +2 -2
  55. data/examples/operators/quota.alf +2 -2
  56. data/examples/operators/summarize.alf +2 -2
  57. data/lib/alf/aggregator/aggregators.rb +77 -0
  58. data/lib/alf/aggregator/base.rb +95 -0
  59. data/lib/alf/aggregator/class_methods.rb +57 -0
  60. data/lib/alf/buffer/sorted.rb +48 -0
  61. data/lib/alf/command/class_methods.rb +27 -0
  62. data/lib/alf/command/doc_manager.rb +72 -0
  63. data/lib/alf/command/exec.rb +12 -0
  64. data/lib/alf/command/help.rb +31 -0
  65. data/lib/alf/command/main.rb +146 -0
  66. data/lib/alf/command/show.rb +33 -0
  67. data/lib/alf/environment/base.rb +37 -0
  68. data/lib/alf/environment/class_methods.rb +93 -0
  69. data/lib/alf/environment/explicit.rb +38 -0
  70. data/lib/alf/environment/folder.rb +62 -0
  71. data/lib/alf/extra/csv.rb +104 -0
  72. data/lib/alf/extra/logs.rb +100 -0
  73. data/lib/alf/extra/sequel.rb +77 -0
  74. data/lib/alf/{yaml.rb → extra/yaml.rb} +0 -0
  75. data/lib/alf/extra.rb +5 -0
  76. data/lib/alf/iterator/base.rb +38 -0
  77. data/lib/alf/iterator/class_methods.rb +22 -0
  78. data/lib/alf/iterator/proxy.rb +33 -0
  79. data/lib/alf/lispy/instance_methods.rb +157 -0
  80. data/lib/alf/operator/base.rb +74 -0
  81. data/lib/alf/operator/binary.rb +32 -0
  82. data/lib/alf/operator/cesure.rb +45 -0
  83. data/lib/alf/operator/class_methods.rb +132 -0
  84. data/lib/alf/operator/experimental.rb +9 -0
  85. data/lib/alf/operator/non_relational/autonum.rb +24 -0
  86. data/lib/alf/operator/non_relational/clip.rb +20 -0
  87. data/lib/alf/operator/non_relational/coerce.rb +21 -0
  88. data/lib/alf/operator/non_relational/compact.rb +62 -0
  89. data/lib/alf/operator/non_relational/defaults.rb +25 -0
  90. data/lib/alf/operator/non_relational/generator.rb +38 -0
  91. data/lib/alf/operator/non_relational/sort.rb +23 -0
  92. data/lib/alf/operator/nullary.rb +20 -0
  93. data/lib/alf/operator/relational/extend.rb +24 -0
  94. data/lib/alf/operator/relational/group.rb +32 -0
  95. data/lib/alf/operator/relational/intersect.rb +37 -0
  96. data/lib/alf/operator/relational/join.rb +106 -0
  97. data/lib/alf/operator/relational/matching.rb +45 -0
  98. data/lib/alf/operator/relational/minus.rb +37 -0
  99. data/lib/alf/operator/relational/not_matching.rb +45 -0
  100. data/lib/alf/operator/relational/project.rb +22 -0
  101. data/lib/alf/operator/relational/quota.rb +51 -0
  102. data/lib/alf/operator/relational/rank.rb +55 -0
  103. data/lib/alf/operator/relational/rename.rb +19 -0
  104. data/lib/alf/operator/relational/restrict.rb +20 -0
  105. data/lib/alf/operator/relational/summarize.rb +83 -0
  106. data/lib/alf/operator/relational/ungroup.rb +25 -0
  107. data/lib/alf/operator/relational/union.rb +32 -0
  108. data/lib/alf/operator/relational/unwrap.rb +21 -0
  109. data/lib/alf/operator/relational/wrap.rb +22 -0
  110. data/lib/alf/operator/shortcut.rb +53 -0
  111. data/lib/alf/operator/signature.rb +262 -0
  112. data/lib/alf/operator/transform.rb +27 -0
  113. data/lib/alf/operator/unary.rb +38 -0
  114. data/lib/alf/reader/alf_file.rb +24 -0
  115. data/lib/alf/reader/base.rb +119 -0
  116. data/lib/alf/reader/class_methods.rb +82 -0
  117. data/lib/alf/reader/rash.rb +28 -0
  118. data/lib/alf/relation/class_methods.rb +37 -0
  119. data/lib/alf/relation/instance_methods.rb +127 -0
  120. data/lib/alf/renderer/base.rb +72 -0
  121. data/lib/alf/renderer/class_methods.rb +58 -0
  122. data/lib/alf/renderer/rash.rb +19 -0
  123. data/lib/alf/{text.rb → renderer/text.rb} +1 -1
  124. data/lib/alf/tools/coerce.rb +14 -0
  125. data/lib/alf/tools/miscellaneous.rb +77 -0
  126. data/lib/alf/tools/to_lispy.rb +99 -0
  127. data/lib/alf/tools/to_ruby_literal.rb +14 -0
  128. data/lib/alf/tools/tuple_handle.rb +50 -0
  129. data/lib/alf/types/attr_list.rb +56 -0
  130. data/lib/alf/types/attr_name.rb +28 -0
  131. data/lib/alf/types/boolean.rb +12 -0
  132. data/lib/alf/types/heading.rb +96 -0
  133. data/lib/alf/types/ordering.rb +93 -0
  134. data/lib/alf/types/renaming.rb +57 -0
  135. data/lib/alf/types/summarization.rb +76 -0
  136. data/lib/alf/types/tuple_computation.rb +61 -0
  137. data/lib/alf/types/tuple_expression.rb +61 -0
  138. data/lib/alf/types/tuple_predicate.rb +49 -0
  139. data/lib/alf/version.rb +2 -2
  140. data/lib/alf.rb +193 -3714
  141. data/spec/integration/__database__/group.alf +1 -1
  142. data/spec/integration/__database__/suppliers_csv.csv +6 -0
  143. data/spec/integration/command/alf/alf.db +0 -0
  144. data/spec/integration/command/alf/alf_env_sqlite.cmd +1 -0
  145. data/spec/integration/command/alf/alf_env_sqlite.stdout +9 -0
  146. data/spec/integration/command/alf/alf_help.cmd +1 -0
  147. data/spec/integration/command/alf/alf_help.stdout +67 -0
  148. data/spec/integration/command/autonum/autonum_0.cmd +1 -1
  149. data/spec/integration/command/coerce/coerce_1.cmd +1 -0
  150. data/spec/integration/command/coerce/coerce_1.stdout +5 -0
  151. data/spec/integration/command/defaults/defaults_0.cmd +1 -1
  152. data/spec/integration/command/defaults/defaults_0.stdout +9 -9
  153. data/spec/integration/command/defaults/defaults_2.cmd +1 -0
  154. data/spec/integration/command/defaults/defaults_2.stdout +9 -0
  155. data/spec/integration/command/generator/generator_1.cmd +1 -0
  156. data/spec/integration/command/generator/generator_1.stdout +10 -0
  157. data/spec/integration/command/generator/generator_2.cmd +1 -0
  158. data/spec/integration/command/generator/generator_2.stdout +5 -0
  159. data/spec/integration/command/generator/generator_3.cmd +1 -0
  160. data/spec/integration/command/generator/generator_3.stdout +5 -0
  161. data/spec/integration/command/group/group_0.cmd +1 -1
  162. data/spec/integration/command/group/group_1.cmd +1 -1
  163. data/spec/integration/command/help/help_1.cmd +1 -0
  164. data/spec/integration/command/help/help_1.stdout +22 -0
  165. data/spec/integration/command/quota/quota_0.cmd +1 -1
  166. data/spec/integration/command/rank/rank_1.cmd +1 -1
  167. data/spec/integration/command/rank/rank_1.stdout +10 -10
  168. data/spec/integration/command/rank/rank_2.cmd +1 -1
  169. data/spec/integration/command/rank/rank_2.stdout +10 -10
  170. data/spec/integration/command/rank/rank_3.cmd +1 -1
  171. data/spec/integration/command/rank/rank_3.stdout +10 -10
  172. data/spec/integration/command/rank/rank_4.cmd +1 -1
  173. data/spec/integration/command/rank/rank_5.cmd +1 -1
  174. data/spec/integration/command/show/show_csv.cmd +1 -0
  175. data/spec/integration/command/show/show_csv.stdout +6 -0
  176. data/spec/integration/command/show/show_rash_2.cmd +1 -1
  177. data/spec/integration/command/show/show_rash_2.stdout +5 -5
  178. data/spec/integration/command/sort/sort_0.cmd +1 -1
  179. data/spec/integration/command/sort/sort_1.cmd +1 -1
  180. data/spec/integration/command/sort/sort_1.stdout +2 -2
  181. data/spec/integration/command/sort/sort_2.cmd +1 -0
  182. data/spec/integration/command/sort/sort_2.stdout +9 -0
  183. data/spec/integration/command/sort/sort_3.cmd +1 -0
  184. data/spec/integration/command/sort/sort_3.stdout +9 -0
  185. data/spec/integration/command/summarize/summarize_0.cmd +1 -1
  186. data/spec/integration/command/ungroup/ungroup_0.cmd +1 -1
  187. data/spec/integration/command/wrap/wrap_0.cmd +1 -1
  188. data/spec/integration/semantics/test_project.alf +5 -6
  189. data/spec/integration/semantics/test_rank.alf +16 -16
  190. data/spec/integration/test_command.rb +17 -6
  191. data/spec/integration/test_examples.rb +1 -1
  192. data/spec/regression/logs/apache_combined.log +5 -0
  193. data/spec/regression/logs/test_path_attribute.rb +25 -0
  194. data/spec/regression/relation/test_relation_allbut_all.rb +14 -0
  195. data/spec/shared/an_operator_class.rb +10 -5
  196. data/spec/spec_helper.rb +1 -7
  197. data/spec/unit/assumptions/test_set.rb +64 -0
  198. data/spec/unit/command/doc_manager/dynamic.md +1 -0
  199. data/spec/unit/command/doc_manager/example.md +1 -0
  200. data/spec/unit/command/doc_manager/example_1.txt +11 -0
  201. data/spec/unit/command/doc_manager/static.md +1 -0
  202. data/spec/unit/command/doc_manager/test_call.rb +49 -0
  203. data/spec/unit/csv/input.csv +3 -0
  204. data/spec/unit/csv/test_reader.rb +66 -0
  205. data/spec/unit/csv/test_renderer.rb +73 -0
  206. data/spec/unit/lispy/test_relation.rb +37 -0
  207. data/spec/unit/lispy/test_run.rb +40 -0
  208. data/spec/unit/lispy/test_tuple.rb +36 -0
  209. data/spec/unit/logs/apache_combined.log +5 -0
  210. data/spec/unit/logs/postgresql.log +29 -0
  211. data/spec/unit/logs/test_reader.rb +56 -0
  212. data/spec/unit/operator/non_relational/compact/{buffer_based.rb → test_buffer_based.rb} +0 -0
  213. data/spec/unit/operator/non_relational/test_clip.rb +1 -1
  214. data/spec/unit/operator/non_relational/test_coerce.rb +35 -0
  215. data/spec/unit/operator/non_relational/test_defaults.rb +15 -2
  216. data/spec/unit/operator/non_relational/test_generator.rb +78 -0
  217. data/spec/unit/operator/relational/join/test_hash_based.rb +4 -4
  218. data/spec/unit/operator/relational/matching/test_hash_based.rb +6 -6
  219. data/spec/unit/operator/relational/not_matching/test_hash_based.rb +4 -4
  220. data/spec/unit/operator/relational/summarize/test_hash_based.rb +10 -6
  221. data/spec/unit/operator/relational/summarize/test_sort_based.rb +18 -7
  222. data/spec/unit/operator/relational/test_group.rb +8 -8
  223. data/spec/unit/operator/relational/test_intersect.rb +3 -3
  224. data/spec/unit/operator/relational/test_minus.rb +3 -3
  225. data/spec/unit/operator/relational/test_project.rb +12 -2
  226. data/spec/unit/operator/relational/test_quota.rb +5 -6
  227. data/spec/unit/operator/relational/test_summarize.rb +9 -11
  228. data/spec/unit/operator/relational/test_union.rb +1 -1
  229. data/spec/unit/operator/relational/test_wrap.rb +1 -1
  230. data/spec/unit/operator/signature/test_collect_on.rb +45 -0
  231. data/spec/unit/operator/signature/test_initialize.rb +17 -0
  232. data/spec/unit/operator/signature/test_install.rb +56 -0
  233. data/spec/unit/operator/signature/test_option_parser.rb +36 -0
  234. data/spec/unit/operator/signature/test_parse_args.rb +60 -0
  235. data/spec/unit/operator/signature/test_parse_argv.rb +87 -0
  236. data/spec/unit/operator/signature/test_to_lispy.rb +102 -0
  237. data/spec/unit/operator/signature/test_to_shell.rb +103 -0
  238. data/spec/unit/operator/test_non_relational.rb +3 -1
  239. data/spec/unit/relation/test_relops.rb +20 -15
  240. data/spec/unit/sequel/alf.db +0 -0
  241. data/spec/unit/sequel/test_environment.rb +54 -0
  242. data/spec/unit/test_aggregator.rb +32 -22
  243. data/spec/unit/test_environment.rb +5 -0
  244. data/spec/unit/test_lispy.rb +4 -0
  245. data/spec/unit/test_relation.rb +5 -0
  246. data/spec/unit/text/test_cell.rb +6 -6
  247. data/spec/unit/text/test_row.rb +3 -3
  248. data/spec/unit/text/test_table.rb +6 -6
  249. data/spec/unit/tools/test_coalesce.rb +15 -0
  250. data/spec/unit/tools/test_coerce.rb +10 -0
  251. data/spec/unit/tools/test_to_lispy.rb +138 -0
  252. data/spec/unit/tools/test_to_ruby_literal.rb +10 -0
  253. data/spec/unit/tools/test_tuple_handle.rb +1 -59
  254. data/spec/unit/types/test_attr_list.rb +106 -0
  255. data/spec/unit/types/test_attr_name.rb +52 -0
  256. data/spec/unit/{test_heading.rb → types/test_heading.rb} +10 -0
  257. data/spec/unit/types/test_ordering.rb +127 -0
  258. data/spec/unit/types/test_renaming.rb +55 -0
  259. data/spec/unit/types/test_summarization.rb +63 -0
  260. data/spec/unit/types/test_tuple_computation.rb +60 -0
  261. data/spec/unit/types/test_tuple_expression.rb +64 -0
  262. data/spec/unit/types/test_tuple_predicate.rb +79 -0
  263. data/tasks/debug_mail.rake +1 -1
  264. data/tasks/debug_mail.txt +5 -0
  265. data/tasks/gh-pages.rake +63 -0
  266. metadata +325 -52
  267. data/spec/unit/operator/test_command_methods.rb +0 -38
  268. data/spec/unit/tools/test_ordering_key.rb +0 -94
  269. data/spec/unit/tools/test_parse_commandline_args.rb +0 -47
  270. data/spec/unit/tools/test_projection_key.rb +0 -83
data/lib/alf.rb CHANGED
@@ -12,338 +12,37 @@ require 'myrrha/coerce'
12
12
  # Classy data-manipulation dressed in a DSL (+ commandline)
13
13
  #
14
14
  module Alf
15
-
16
- #
17
- # Provides tooling methods that are used here and there in Alf.
18
- #
19
- module Tools
20
-
21
- #
22
- # Parse a string with commandline arguments and returns an array.
23
- #
24
- # Example:
25
- #
26
- # parse_commandline_args("--text --size=10") # => ['--text', '--size=10']
27
- #
28
- def parse_commandline_args(args)
29
- args = args.split(/\s+/)
30
- result = []
31
- until args.empty?
32
- if args.first[0,1] == '"'
33
- if args.first[-1,1] == '"'
34
- result << args.shift[1...-1]
35
- else
36
- block = [ args.shift[1..-1] ]
37
- while args.first[-1,1] != '"'
38
- block << args.shift
39
- end
40
- block << args.shift[0...-1]
41
- result << block.join(" ")
42
- end
43
- elsif args.first[0,1] == "'"
44
- if args.first[-1,1] == "'"
45
- result << args.shift[1...-1]
46
- else
47
- block = [ args.shift[1..-1] ]
48
- while args.first[-1,1] != "'"
49
- block << args.shift
50
- end
51
- block << args.shift[0...-1]
52
- result << block.join(" ")
53
- end
54
- else
55
- result << args.shift
56
- end
57
- end
58
- result
59
- end
60
15
 
61
- # Helper to define methods with multiple signatures.
62
- #
63
- # Example:
64
- #
65
- # varargs([1, "hello"], [Integer, String]) # => [1, "hello"]
66
- # varargs(["hello"], [Integer, String]) # => [nil, "hello"]
67
- #
68
- def varargs(args, types)
69
- types.collect{|t| t===args.first ? args.shift : nil}
70
- end
71
-
72
- #
73
- # Attempt to require(who) the most friendly way as possible.
74
- #
75
- def friendly_require(who, dep = nil, retried = false)
76
- gem(who, dep) if dep && defined?(Gem)
77
- require who
78
- rescue LoadError => ex
79
- if retried
80
- raise "Unable to require #{who}, which is now needed\n"\
81
- "Try 'gem install #{who}'"
82
- else
83
- require 'rubygems' unless defined?(Gem)
84
- friendly_require(who, dep, true)
85
- end
86
- end
16
+ #
17
+ # Encapsulates all types
18
+ #
19
+ module Types
20
+ require 'alf/types/attr_name'
21
+ require 'alf/types/boolean'
22
+ require 'alf/types/heading'
23
+ require 'alf/types/ordering'
24
+ require 'alf/types/attr_list'
25
+ require 'alf/types/renaming'
26
+ require 'alf/types/tuple_expression'
27
+ require 'alf/types/tuple_predicate'
28
+ require 'alf/types/summarization'
29
+ require 'alf/types/tuple_computation'
87
30
 
88
- # Returns the unqualified name of a ruby class or module
89
- #
90
- # Example
91
- #
92
- # class_name(Alf::Tools) -> :Tools
93
- #
94
- def class_name(clazz)
95
- clazz.name.to_s =~ /([A-Za-z0-9_]+)$/
96
- $1.to_sym
97
- end
98
-
99
- #
100
- # Converts an unqualified class or module name to a ruby case method name.
101
- #
102
- # Example
103
- #
104
- # ruby_case(:Alf) -> "alf"
105
- # ruby_case(:HelloWorld) -> "hello_world"
106
- #
107
- def ruby_case(s)
108
- s.to_s.gsub(/[A-Z]/){|x| "_#{x.downcase}"}[1..-1]
109
- end
110
-
111
- #
112
- # Returns the first non nil values from arguments
113
- #
114
- # Example
115
- #
116
- # coalesce(nil, 1, "abc") -> 1
117
- #
118
- def coalesce(*args)
119
- args.find{|x| !x.nil?}
120
- end
121
-
122
- #
123
- # Iterates over enum and yields the block on each element.
124
- # Collect block results as key/value pairs returns them as
125
- # a Hash.
126
- #
127
- def tuple_collect(enum)
128
- tuple = {}
129
- enum.each do |elm|
130
- k, v = yield(elm)
131
- tuple[k] = v
132
- end
133
- tuple
31
+ # Install all types on Alf now
32
+ constants.each do |s|
33
+ Alf.const_set(s, const_get(s))
134
34
  end
35
+ end
135
36
 
136
- #
137
- # Provides a handle, implementing a flyweight design pattern on tuples.
138
- #
139
- class TupleHandle
140
-
141
- # Creates an handle instance
142
- def initialize
143
- @tuple = nil
144
- end
145
-
146
- #
147
- # Sets the next tuple to use.
148
- #
149
- # This method installs the handle as a side effect
150
- # on first call.
151
- #
152
- def set(tuple)
153
- build(tuple) if @tuple.nil?
154
- @tuple = tuple
155
- self
156
- end
157
-
158
- #
159
- # Compiles a tuple expression and returns a lambda
160
- # instance that can be passed to evaluate later.
161
- #
162
- def self.compile(expr)
163
- case expr
164
- when Proc
165
- expr
166
- when NilClass
167
- compile('true')
168
- when Hash
169
- if expr.empty?
170
- compile(nil)
171
- else
172
- compile expr.each_pair.collect{|k,v|
173
- "(self.#{k} == #{Myrrha.to_ruby_literal(v)})"
174
- }.join(" && ")
175
- end
176
- when Array
177
- compile(Hash[*expr])
178
- when String, Symbol
179
- eval("lambda{ #{expr} }")
180
- else
181
- raise ArgumentError, "Unable to compile #{expr} to a TupleHandle"
182
- end
183
- end
184
-
185
- #
186
- # Evaluates an expression on the current tuple. Expression
187
- # can be a lambda or a string (immediately compiled in the
188
- # later case).
189
- #
190
- def evaluate(expr)
191
- if RUBY_VERSION < "1.9"
192
- instance_eval(&TupleHandle.compile(expr))
193
- else
194
- instance_exec(&TupleHandle.compile(expr))
195
- end
196
- end
197
-
198
- private
199
-
200
- #
201
- # Builds this handle with a tuple.
202
- #
203
- # This method should be called only once and installs
204
- # instance methods on the handle with keys of _tuple_.
205
- #
206
- def build(tuple)
207
- tuple.keys.each do |k|
208
- (class << self; self; end).send(:define_method, k) do
209
- @tuple[k]
210
- end
211
- end
212
- end
213
-
214
- end # class TupleHandle
215
-
216
- #
217
- # Defines a projection key
218
- #
219
- class ProjectionKey
220
- include Tools
221
-
222
- # Projection attributes
223
- attr_accessor :attributes
224
-
225
- # Allbut projection?
226
- attr_accessor :allbut
227
-
228
- def initialize(attributes, allbut = false)
229
- @attributes = attributes
230
- @allbut = allbut
231
- end
232
-
233
- def self.coerce(arg)
234
- case arg
235
- when Array
236
- ProjectionKey.new(arg, false)
237
- when OrderingKey
238
- ProjectionKey.new(arg.attributes, false)
239
- when ProjectionKey
240
- arg
241
- else
242
- raise ArgumentError, "Unable to coerce #{arg} to a projection key"
243
- end
244
- end
245
-
246
- def to_ordering_key
247
- OrderingKey.new attributes.collect{|arg|
248
- [arg, :asc]
249
- }
250
- end
251
-
252
- def project(tuple)
253
- split(tuple).first
254
- end
255
-
256
- def split(tuple)
257
- projection, rest = {}, tuple.dup
258
- attributes.each do |a|
259
- projection[a] = tuple[a]
260
- rest.delete(a)
261
- end
262
- @allbut ? [rest, projection] : [projection, rest]
263
- end
264
-
265
- end # class ProjectionKey
266
-
267
- #
268
- # Encapsulates tools for computing orders on tuples
269
- #
270
- class OrderingKey
271
-
272
- attr_reader :ordering
273
-
274
- def initialize(ordering = [])
275
- @ordering = ordering
276
- @sorter = nil
277
- end
278
-
279
- #
280
- # Coerces `arg` to an ordering key.
281
- #
282
- # Implemented coercions are:
283
- # * Array of symbols (all attributes in ascending order)
284
- # * Array of [Symbol, :asc|:desc] pairs (obvious semantics)
285
- # * ProjectionKey (all its attributes in ascending order)
286
- # * OrderingKey (self)
287
- #
288
- # @return [OrderingKey]
289
- # @raises [ArgumentError] when `arg` is not recognized
290
- #
291
- def self.coerce(arg)
292
- case arg
293
- when Array
294
- if arg.all?{|a| a.is_a?(Array)}
295
- OrderingKey.new(arg)
296
- elsif arg.all?{|a| a.is_a?(Symbol)}
297
- sliced = arg.each_slice(2)
298
- if sliced.all?{|a,o| [:asc,:desc].include?(o)}
299
- OrderingKey.new sliced.to_a
300
- else
301
- OrderingKey.new arg.collect{|a| [a, :asc]}
302
- end
303
- end
304
- when ProjectionKey
305
- arg.to_ordering_key
306
- when OrderingKey
307
- arg
308
- else
309
- raise ArgumentError, "Unable to coerce #{arg} to an ordering key"
310
- end
311
- end
312
-
313
- def attributes
314
- @ordering.collect{|arg| arg.first}
315
- end
316
-
317
- def order_by(attr, order = :asc)
318
- @ordering << [attr, order]
319
- @sorter = nil
320
- self
321
- end
322
-
323
- def order_of(attr)
324
- @ordering.find{|arg| arg.first == attr}.last
325
- end
326
-
327
- def compare(t1,t2)
328
- @ordering.each do |attr,order|
329
- x, y = t1[attr], t2[attr]
330
- comp = x.respond_to?(:<=>) ? (x <=> y) : (x.to_s <=> y.to_s)
331
- comp *= -1 if order == :desc
332
- return comp unless comp == 0
333
- end
334
- return 0
335
- end
336
-
337
- def sorter
338
- @sorter ||= lambda{|t1,t2| compare(t1, t2)}
339
- end
340
-
341
- def +(other)
342
- other = OrderingKey.coerce(other)
343
- OrderingKey.new(@ordering + other.ordering)
344
- end
345
-
346
- end # class OrderingKey
37
+ #
38
+ # Provides tooling methods that are used here and there in Alf.
39
+ #
40
+ module Tools
41
+ require 'alf/tools/coerce'
42
+ require 'alf/tools/to_ruby_literal'
43
+ require 'alf/tools/to_lispy'
44
+ require 'alf/tools/tuple_handle'
45
+ require 'alf/tools/miscellaneous'
347
46
 
348
47
  extend Tools
349
48
  end # module Tools
@@ -375,211 +74,11 @@ module Alf
375
74
  # for details.
376
75
  #
377
76
  class Environment
378
-
379
- # Registered environments
380
- @@environments = []
381
-
382
- #
383
- # Register an environment class under a specific name.
384
- #
385
- # Registered class must implement a recognizes? method that takes an array
386
- # of arguments; it must returns true if an environment instance can be built
387
- # using those arguments, false otherwise. Please be very specific in the
388
- # implementation for returning true. See also autodetect and recognizes?
389
- #
390
- # @param [Symbol] name name of the environment kind
391
- # @param [Class] clazz class that implemented the environment
392
- #
393
- def self.register(name, clazz)
394
- @@environments << [name, clazz]
395
- (class << self; self; end).
396
- send(:define_method, name) do |*args|
397
- clazz.new(*args)
398
- end
399
- end
400
-
401
- #
402
- # Auto-detect the environment to use for specific arguments.
403
- #
404
- # This method returns an instance of the first registered Environment class
405
- # that returns true to an invocation of recognizes?(args). It raises an
406
- # ArgumentError if no such class can be found.
407
- #
408
- # @return [Environment] an environment instance
409
- # @raise [ArgumentError] when no registered class recognizes the arguments
410
- #
411
- def self.autodetect(*args)
412
- if (args.size == 1) && args.first.is_a?(Environment)
413
- return args.first
414
- else
415
- @@environments.each do |name,clazz|
416
- return clazz.new(*args) if clazz.recognizes?(args)
417
- end
418
- end
419
- raise ArgumentError, "Unable to auto-detect Environment with #{args.inspect}"
420
- end
421
-
422
- #
423
- # (see Environment.autodetect)
424
- #
425
- def self.coerce(*args)
426
- autodetect(*args)
427
- end
428
-
429
- #
430
- # Returns true _args_ can be used for building an environment instance,
431
- # false otherwise.
432
- #
433
- # When returning true, an immediate invocation of new(*args) should
434
- # succeed. While runtime exception are admitted (no such database, for
435
- # example), argument errors should not occur (missing argument, wrong
436
- # typing, etc.).
437
- #
438
- # Please be specific in the implementation of this extension point, as
439
- # registered environments for a chain and each of them should have a
440
- # chance of being selected.
441
- #
442
- def self.recognizes?(args)
443
- false
444
- end
445
-
446
- #
447
- # Returns a dataset whose name is provided.
448
- #
449
- # This method resolves named datasets to tuple enumerables. When the
450
- # dataset exists, this method must return an Iterator, typically a
451
- # Reader instance. Otherwise, it must throw a NoSuchDatasetError.
452
- #
453
- # @param [Symbol] name the name of a dataset
454
- # @return [Iterator] an iterator, typically a Reader instance
455
- # @raise [NoSuchDatasetError] when the dataset does not exists
456
- #
457
- def dataset(name)
458
- end
459
- undef :dataset
460
-
461
- #
462
- # Branches this environment and puts some additional explicit
463
- # definitions.
464
- #
465
- # This method is provided for (with ...) expressions and should not
466
- # be overriden by subclasses.
467
- #
468
- # @param [Hash] a set of (name, Iterator) pairs.
469
- # @return [Environment] an environment instance with new definitions set
470
- #
471
- def branch(defs)
472
- Explicit.new(defs, self)
473
- end
474
-
475
- #
476
- # Specialization of Environment that works with explicitely defined
477
- # datasources and allow branching and unbranching.
478
- #
479
- class Explicit < Environment
480
-
481
- #
482
- # Creates a new environment instance with initial definitions
483
- # and optional child environment.
484
- #
485
- def initialize(defs = {}, child = nil)
486
- @defs = defs
487
- @child = child
488
- end
489
-
490
- #
491
- # Unbranches this environment and returns its child
492
- #
493
- def unbranch
494
- @child
495
- end
496
-
497
- # (see Environment#dataset)
498
- def dataset(name)
499
- if @defs.has_key?(name)
500
- @defs[name]
501
- elsif @child
502
- @child.dataset(name)
503
- else
504
- raise "No such dataset #{name}"
505
- end
506
- end
507
-
508
- end # class Explicit
509
-
510
- #
511
- # Specialization of Environment to work on files of a given folder.
512
- #
513
- # This kind of environment resolves datasets by simply looking at
514
- # recognized files in a specific folder. "Recognized" files are simply
515
- # those for which a Reader subclass has been previously registered.
516
- # This environment then serves reader instances.
517
- #
518
- class Folder < Environment
519
-
520
- #
521
- # (see Environment.recognizes?)
522
- #
523
- # Returns true if args contains onely a String which is an existing
524
- # folder.
525
- #
526
- def self.recognizes?(args)
527
- (args.size == 1) &&
528
- args.first.is_a?(String) &&
529
- File.directory?(args.first.to_s)
530
- end
531
-
532
- #
533
- # Creates an environment instance, wired to the specified folder.
534
- #
535
- # @param [String] folder path to the folder to use as dataset source
536
- #
537
- def initialize(folder)
538
- @folder = folder
539
- end
540
-
541
- # (see Environment#dataset)
542
- def dataset(name)
543
- if file = find_file(name)
544
- Reader.reader(file, self)
545
- else
546
- raise "No such dataset #{name} (#{@folder})"
547
- end
548
- end
549
-
550
- protected
551
-
552
- def find_file(name)
553
- # TODO: refactor this, because it allows getting out of the folder
554
- if File.exists?(name.to_s)
555
- name.to_s
556
- elsif File.exists?(explicit = File.join(@folder, name.to_s)) &&
557
- File.file?(explicit)
558
- explicit
559
- else
560
- Dir[File.join(@folder, "#{name}.*")].find do |f|
561
- File.file?(f)
562
- end
563
- end
564
- end
565
-
566
- Environment.register(:folder, self)
567
- end # class Folder
568
-
569
- #
570
- # Returns the default environment
571
- #
572
- def self.default
573
- examples
574
- end
575
-
576
- #
577
- # Returns the examples environment
578
- #
579
- def self.examples
580
- folder File.expand_path('../../examples/operators', __FILE__)
581
- end
582
-
77
+ require 'alf/environment/class_methods'
78
+ require 'alf/environment/base'
79
+ require 'alf/environment/explicit'
80
+ require 'alf/environment/folder'
81
+
583
82
  end # class Environment
584
83
 
585
84
  #
@@ -600,48 +99,9 @@ module Alf
600
99
  module Iterator
601
100
  include Enumerable
602
101
 
603
- #
604
- # Wire the iterator input and an optional execution environment.
605
- #
606
- # Iterators (typically Reader and Operator instances) work from input data
607
- # that come from files, or other operators, and so on. This method wires
608
- # this input data to the iterator. Wiring is required before any attempt
609
- # to call each, unless autowiring occurs at construction. The exact kind of
610
- # input object is left at discretion of Iterator implementations.
611
- #
612
- # @param [Object] input the iterator input, at discretion of the Iterator
613
- # implementation.
614
- # @param [Environment] environment an optional environment for resolving
615
- # named datasets if needed.
616
- # @return [Object] self
617
- #
618
- def pipe(input, environment = nil)
619
- self
620
- end
621
- undef :pipe
622
-
623
- #
624
- # Coerces something to an iterator
625
- #
626
- def self.coerce(arg, environment = nil)
627
- case arg
628
- when Iterator, Array
629
- arg
630
- else
631
- Reader.coerce(arg, environment)
632
- end
633
- end
634
-
635
- #
636
- # Converts this iterator to an in-memory Relation.
637
- #
638
- # @return [Relation] a relation instance, as the set of tuples
639
- # that would be yield by this iterator.
640
- #
641
- def to_rel
642
- Relation::coerce(self)
643
- end
644
-
102
+ require 'alf/iterator/class_methods'
103
+ require 'alf/iterator/base'
104
+ require 'alf/iterator/proxy'
645
105
  end # module Iterator
646
106
 
647
107
  #
@@ -673,2914 +133,187 @@ module Alf
673
133
  #
674
134
  class Reader
675
135
  include Iterator
676
-
677
- # Registered readers
678
- @@readers = []
679
-
680
- #
681
- # Registers a reader class associated with specific file extensions
136
+
137
+ require 'alf/reader/class_methods'
138
+ require 'alf/reader/base'
139
+ require 'alf/reader/rash'
140
+ require 'alf/reader/alf_file'
141
+ end # class Reader
142
+
143
+ #
144
+ # Renders a relation (given by any Iterator) in a specific format.
145
+ #
146
+ # A renderer takes an Iterator instance as input and renders it on an output
147
+ # stream. Renderers are **not** iterators themselves, even if they mimic the
148
+ # {#pipe} method. Their usage is made via the {#execute} method.
149
+ #
150
+ # Similarly to the {Reader} class, this one provides a registration mechanism
151
+ # for specific output formats. The common scenario is as follows:
152
+ #
153
+ # # Register a new renderer for :foo format (automatically provides the
154
+ # # '--foo Render output as a foo stream' option of 'alf show') and with
155
+ # # the FooRenderer class for handling rendering.
156
+ # Renderer.register(:foo, "as a foo stream", FooRenderer)
157
+ #
158
+ # # Later on, you can request a renderer instance for a specific format
159
+ # # as follows (wiring input is optional)
160
+ # r = Renderer.renderer(:foo, [an Iterator])
161
+ #
162
+ # # Also, a factory method is automatically installed on the Renderer class
163
+ # # itself.
164
+ # r = Renderer.foo([an Iterator])
165
+ #
166
+ class Renderer
167
+ require 'alf/renderer/class_methods'
168
+ require 'alf/renderer/base'
169
+ require 'alf/renderer/rash'
170
+ require 'alf/renderer/text'
171
+
172
+ end # class Renderer
173
+
174
+ #
175
+ # Marker module and namespace for Alf main commands, those that are **not**
176
+ # operators at all.
177
+ #
178
+ module Command
179
+ require 'alf/command/class_methods'
180
+ require 'alf/command/doc_manager'
181
+
182
+ # This is the main documentation extractor
183
+ DOC_EXTRACTOR = DocManager.new
184
+
682
185
  #
683
- # Registered class must provide a constructor with the following signature
684
- # <code>new(path_or_io, environment = nil)</code>. The name must be a symbol
685
- # which can safely be used as a ruby method name. A factory class method of
686
- # that name and same signature is automatically installed on the Reader
687
- # class.
186
+ # Delegator command factory
688
187
  #
689
- # @param [Symbol] name a name for the kind of data decoded
690
- # @param [Array] extensions file extensions mapped to the registered reader
691
- # class (should include the '.', e.g. '.foo')
692
- # @param [Class] class Reader subclass used to decode this kind of files
693
- #
694
- def self.register(name, extensions, clazz)
695
- @@readers << [name, extensions, clazz]
696
- (class << self; self; end).
697
- send(:define_method, name) do |*args|
698
- clazz.new(*args)
699
- end
188
+ def Alf.Delegator()
189
+ Quickl::Delegator(){|builder|
190
+ builder.doc_extractor = DOC_EXTRACTOR
191
+ yield(builder) if block_given?
192
+ }
700
193
  end
701
-
194
+
702
195
  #
703
- # When filepath is a String, returns a reader instance for a specific file
704
- # whose path is given as argument. Otherwise, delegate the call to
705
- # <code>coerce(filepath)</code>
196
+ # Command factory
706
197
  #
707
- # @param [String] filepath path to a file for which extension is recognized
708
- # @param [Array] args optional additional arguments that must be passed at
709
- # reader's class new method.
710
- # @return [Reader] a reader instance
711
- #
712
- def self.reader(filepath, *args)
713
- if filepath.is_a?(String)
714
- ext = File.extname(filepath)
715
- if registered = @@readers.find{|r| r[1].include?(ext)}
716
- registered[2].new(filepath, *args)
717
- else
718
- raise "No registered reader for #{ext} (#{filepath})"
719
- end
720
- elsif args.empty?
721
- coerce(filepath)
722
- else
723
- raise ArgumentError, "Unable to return a reader for #{filepath} and #{args}"
724
- end
725
- end
726
-
727
- #
728
- # Coerces an argument to a reader, using an optional environment to convert
729
- # named datasets.
730
- #
731
- # This method automatically provides readers for Strings and Symbols through
732
- # passed environment (**not** through the reader factory) and for IO objects
733
- # (through Rash reader). It is part if Alf's internals and should be used
734
- # with care.
735
- #
736
- def self.coerce(arg, environment = nil)
737
- case arg
738
- when Reader
739
- arg
740
- when IO
741
- rash(arg, environment)
742
- when String, Symbol
743
- if environment
744
- environment.dataset(arg.to_sym)
745
- else
746
- raise "No environment set"
747
- end
748
- else
749
- raise ArgumentError, "Unable to coerce #{arg.inspect} to a reader"
750
- end
751
- end
752
-
753
- # Default reader options
754
- DEFAULT_OPTIONS = {}
755
-
756
- # @return [Environment] Wired environment
757
- attr_accessor :environment
758
-
759
- # @return [String or IO] Input IO, or file name
760
- attr_accessor :input
761
-
762
- # @return [Hash] Reader's options
763
- attr_accessor :options
764
-
765
- #
766
- # Creates a reader instance.
767
- #
768
- # @param [String or IO] path to a file or IO object for input
769
- # @param [Environment] environment wired environment, serving this reader
770
- # @param [Hash] options Reader's options (see doc of subclasses)
771
- #
772
- def initialize(*args)
773
- @input, @environment, @options = case args.first
774
- when String, IO, StringIO
775
- Tools.varargs(args, [args.first.class, Environment, Hash])
776
- else
777
- Tools.varargs(args, [String, Environment, Hash])
778
- end
779
- @options = self.class.const_get(:DEFAULT_OPTIONS).merge(@options || {})
780
- end
781
-
782
- #
783
- # (see Iterator#pipe)
784
- #
785
- def pipe(input, env = environment)
786
- @input = input
787
- self
788
- end
789
-
790
- #
791
- # (see Iterator#each)
792
- #
793
- # @private the default implementation reads lines of the input stream and
794
- # yields the block with <code>line2tuple(line)</code> on each of them. This
795
- # method may be overriden if this behavior does not fit reader's needs.
796
- #
797
- def each
798
- each_input_line do |line|
799
- tuple = line2tuple(line)
800
- yield tuple unless tuple.nil?
801
- end
802
- end
803
-
804
- protected
805
-
806
- #
807
- # Returns the input file path, or nil if this Reader is bound to an IO
808
- # directly.
809
- #
810
- def input_path
811
- input.is_a?(String) ? input : nil
812
- end
813
-
814
- #
815
- # Coerces the input object to an IO and yields the block with it.
816
- #
817
- # StringIO and IO input are yield directly while file paths are first
818
- # opened in read mode and then yield.
819
- #
820
- def with_input_io
821
- case input
822
- when IO, StringIO
823
- yield input
824
- when String
825
- File.open(input, 'r'){|io| yield io}
826
- else
827
- raise "Unable to convert #{input} to an IO object"
828
- end
829
- end
830
-
831
- #
832
- # Returns the whole input text.
833
- #
834
- # This feature should only be used by subclasses on inputs that are
835
- # small enough to fit in memory. Consider implementing readers without this
836
- # feature on files that could be larger.
837
- #
838
- def input_text
839
- with_input_io{|io| io.readlines.join}
840
- end
841
-
842
- #
843
- # Yields the block with each line of the input text in turn.
844
- #
845
- # This method is an helper for files that capture one tuple on each input
846
- # line. It should be used in those cases, as the resulting reader will not
847
- # load all input in memory but serve tuples on demand.
848
- #
849
- def each_input_line
850
- with_input_io{|io| io.each_line(&Proc.new)}
851
- end
852
-
853
- #
854
- # Converts a line previously read from the input stream to a tuple.
855
- #
856
- # The line is simply ignored is this method return nil. Errors should be
857
- # properly handled by raising exceptions. This method MUST be implemented
858
- # by subclasses unless each is overriden.
859
- #
860
- def line2tuple(line)
861
- end
862
- undef :line2tuple
863
-
864
- #
865
- # Specialization of the Reader contract for .rash files.
866
- #
867
- # A .rash file/stream contains one ruby hash literal on each line. This
868
- # reader simply decodes each of them in turn with Kernel.eval, providing a
869
- # state-less reader (that is, tuples are not all loaded in memory at once).
870
- #
871
- class Rash < Reader
872
-
873
- # (see Reader#line2tuple)
874
- def line2tuple(line)
875
- begin
876
- h = Kernel.eval(line)
877
- raise "hash expected, got #{h}" unless h.is_a?(Hash)
878
- rescue Exception => ex
879
- $stderr << "Skipping #{line.strip}: #{ex.message}\n"
880
- nil
881
- else
882
- return h
883
- end
884
- end
885
-
886
- Reader.register(:rash, [".rash"], self)
887
- end # class Rash
888
-
889
- #
890
- # Specialization of the Reader contrat for .alf files.
891
- #
892
- # A .alf file simply contains a query expression in the Lispy DSL. This
893
- # reader decodes and compiles the expression and delegates the enumeration
894
- # to the obtained operator.
895
- #
896
- # Note that an Environment must be wired at creation or piping time.
897
- # NoSuchDatasetError will certainly occur otherwise.
898
- #
899
- class AlfFile < Reader
900
-
901
- # (see Reader#each)
902
- def each
903
- op = Alf.lispy(environment).compile(input_text, input_path)
904
- op.each(&Proc.new)
905
- end
906
-
907
- Reader.register(:alf, [".alf"], self)
908
- end # module AlfFile
909
-
910
- end # module Reader
911
-
912
- #
913
- # Renders a relation (given by any Iterator) in a specific format.
914
- #
915
- # A renderer takes an Iterator instance as input and renders it on an output
916
- # stream. Renderers are **not** iterators themselves, even if they mimic the
917
- # {#pipe} method. Their usage is made via the {#execute} method.
918
- #
919
- # Similarly to the {Reader} class, this one provides a registration mechanism
920
- # for specific output formats. The common scenario is as follows:
921
- #
922
- # # Register a new renderer for :foo format (automatically provides the
923
- # # '--foo Render output as a foo stream' option of 'alf show') and with
924
- # # the FooRenderer class for handling rendering.
925
- # Renderer.register(:foo, "as a foo stream", FooRenderer)
926
- #
927
- # # Later on, you can request a renderer instance for a specific format
928
- # # as follows (wiring input is optional)
929
- # r = Renderer.renderer(:foo, [an Iterator])
930
- #
931
- # # Also, a factory method is automatically installed on the Renderer class
932
- # # itself.
933
- # r = Renderer.foo([an Iterator])
934
- #
935
- class Renderer
936
-
937
- # Registered renderers
938
- @@renderers = []
939
-
940
- #
941
- # Register a renderering class with a given name and description.
942
- #
943
- # Registered class must at least provide a constructor with an empty
944
- # signature. The name must be a symbol which can safely be used as a ruby
945
- # method name. A factory class method of that name and degelation signature
946
- # is automatically installed on the Renderer class.
947
- #
948
- # @param [Symbol] name a name for the output format
949
- # @param [String] description an output format description (for 'alf show')
950
- # @param [Class] clazz Renderer subclass used to render in this format
951
- #
952
- def self.register(name, description, clazz)
953
- @@renderers << [name, description, clazz]
954
- (class << self; self; end).
955
- send(:define_method, name) do |*args|
956
- clazz.new(*args)
957
- end
958
- end
959
-
960
- #
961
- # Returns a Renderer instance for the given output format name.
962
- #
963
- # @param [Symbol] name name of an output format previously registered
964
- # @param [...] args other arguments to pass to the renderer constructor
965
- # @return [Renderer] a Renderer instance, already wired if args are
966
- # provided
967
- #
968
- def self.renderer(name, *args)
969
- if r = @@renderers.find{|triple| triple[0] == name}
970
- r[2].new(*args)
971
- else
972
- raise "No renderer registered for #{name}"
973
- end
974
- end
975
-
976
- #
977
- # Yields each (name,description,clazz) previously registered in turn
978
- #
979
- def self.each_renderer
980
- @@renderers.each(&Proc.new)
981
- end
982
-
983
- # Default renderer options
984
- DEFAULT_OPTIONS = {}
985
-
986
- # Renderer input (typically an Iterator)
987
- attr_accessor :input
988
-
989
- # @return [Environment] Optional wired environment
990
- attr_accessor :environment
991
-
992
- # @return [Hash] Renderer's options
993
- attr_accessor :options
994
-
995
- #
996
- # Creates a reader instance.
997
- #
998
- # @param [Iterator] iterator an Iterator of tuples to render
999
- # @param [Environment] environment wired environment, serving this reader
1000
- # @param [Hash] options Reader's options (see doc of subclasses)
1001
- #
1002
- def initialize(*args)
1003
- @input, @environment, @options = case args.first
1004
- when Array
1005
- Tools.varargs(args, [Array, Environment, Hash])
1006
- else
1007
- Tools.varargs(args, [Iterator, Environment, Hash])
1008
- end
1009
- @options = self.class.const_get(:DEFAULT_OPTIONS).merge(@options || {})
1010
- end
1011
-
1012
- #
1013
- # Sets the renderer input.
1014
- #
1015
- # This method mimics {Iterator#pipe} and have the same contract.
1016
- #
1017
- def pipe(input, env = environment)
1018
- self.environment = env
1019
- self.input = input
1020
- self
1021
- end
1022
-
1023
- #
1024
- # Executes the rendering, outputting the resulting tuples on the provided
1025
- # output buffer.
1026
- #
1027
- # The default implementation simply coerces the input as an Iterator and
1028
- # delegates the call to {#render}.
1029
- #
1030
- def execute(output = $stdout)
1031
- render(Iterator.coerce(input, environment), output)
1032
- end
1033
-
1034
- protected
1035
-
1036
- #
1037
- # Renders tuples served by the iterator to the output buffer provided and
1038
- # returns the latter.
1039
- #
1040
- # This method must be implemented by subclasses unless {#execute} is
1041
- # overriden.
1042
- #
1043
- def render(iterator, output)
1044
- end
1045
- undef :render
1046
-
1047
- #
1048
- # Implements the Renderer contract through inspect
1049
- #
1050
- class Rash < Renderer
1051
-
1052
- # (see Renderer#render)
1053
- def render(input, output)
1054
- input.each do |tuple|
1055
- output << Myrrha.to_ruby_literal(tuple) << "\n"
1056
- end
1057
- output
1058
- end
1059
-
1060
- Renderer.register(:rash, "as ruby hashes", self)
1061
- end # class Rash
1062
-
1063
- end # module Renderer
1064
-
1065
- #
1066
- # Provides a factory over Alf operators and handles the interface with
1067
- # Quickl for commandline support.
1068
- #
1069
- # This module is part of Alf's internal architecture and should not be used
1070
- # at all by third-party projects.
1071
- #
1072
- module Factory
1073
-
1074
- # @see Quickl::Command
1075
- def Command(file, line)
1076
- Quickl::Command(file, line){|builder|
1077
- builder.command_parent = Alf::Command::Main
1078
- yield(builder) if block_given?
1079
- }
1080
- end
1081
-
1082
- # @see Operator
1083
- def Operator(file, line)
1084
- Command(file, line) do |b|
1085
- b.instance_module Alf::Operator
1086
- end
1087
- end
1088
-
1089
- extend Factory
1090
- end # module Factory
1091
-
1092
- #
1093
- # Marker module and namespace for Alf main commands, those that are **not**
1094
- # operators at all.
1095
- #
1096
- module Command
1097
-
1098
- #
1099
- # alf - Classy data-manipulation dressed in a DSL (+ commandline)
1100
- #
1101
- # SYNOPSIS
1102
- # alf [--version] [--help]
1103
- # alf -e '(lispy command)'
1104
- # alf [FILE.alf]
1105
- # alf [alf opts] OPERATOR [operator opts] ARGS ...
1106
- # alf help OPERATOR
1107
- #
1108
- # OPTIONS
1109
- # #{summarized_options}
1110
- #
1111
- # RELATIONAL COMMANDS
1112
- # #{summarized_subcommands subcommands.select{|cmd|
1113
- # cmd.include?(Alf::Operator::Relational) &&
1114
- # !cmd.include?(Alf::Operator::Experimental)
1115
- # }}
1116
- #
1117
- # EXPERIMENTAL OPERATORS
1118
- # #{summarized_subcommands subcommands.select{|cmd|
1119
- # cmd.include?(Alf::Operator::Relational) &&
1120
- # cmd.include?(Alf::Operator::Experimental)
1121
- # }}
1122
- #
1123
- # NON-RELATIONAL COMMANDS
1124
- # #{summarized_subcommands subcommands.select{|cmd|
1125
- # cmd.include?(Alf::Operator::NonRelational)
1126
- # }}
1127
- #
1128
- # OTHER NON-RELATIONAL COMMANDS
1129
- # #{summarized_subcommands subcommands.select{|cmd|
1130
- # cmd.include?(Alf::Command)
1131
- # }}
1132
- #
1133
- # See '#{program_name} help COMMAND' for details about a specific command.
1134
- #
1135
- class Main < Quickl::Delegator(__FILE__, __LINE__)
1136
- include Command
1137
-
1138
- # Environment instance to use to get base iterators
1139
- attr_accessor :environment
1140
-
1141
- # Output renderer
1142
- attr_accessor :renderer
1143
-
1144
- # Creates a command instance
1145
- def initialize(env = Environment.default)
1146
- @environment = env
1147
- end
1148
-
1149
- # Install options
1150
- options do |opt|
1151
- @execute = false
1152
- opt.on("-e", "--execute", "Execute one line of script (Lispy API)") do
1153
- @execute = true
1154
- end
1155
-
1156
- @renderer = nil
1157
- Renderer.each_renderer do |name,descr,clazz|
1158
- opt.on("--#{name}", "Render output #{descr}"){
1159
- @renderer = clazz.new
1160
- }
1161
- end
1162
-
1163
- opt.on('--env=ENV',
1164
- "Set the environment to use") do |value|
1165
- @environment = Environment.autodetect(value)
1166
- end
1167
-
1168
- opt.on('-rlibrary', "require the library, before executing alf") do |value|
1169
- require(value)
1170
- end
1171
-
1172
- opt.on_tail('-h', "--help", "Show help") do
1173
- raise Quickl::Help
1174
- end
1175
-
1176
- opt.on_tail('-v', "--version", "Show version") do
1177
- raise Quickl::Exit, "alf #{Alf::VERSION}"\
1178
- " (c) 2011, Bernard Lambeau"
1179
- end
1180
- end # Alf's options
1181
-
1182
- #
1183
- def _normalize(args)
1184
- opts = []
1185
- while !args.empty? && (args.first =~ /^\-/)
1186
- opts << args.shift
1187
- end
1188
- if args.empty? or (args.size == 1 && File.exists?(args.first))
1189
- opts << "exec"
1190
- end
1191
- opts += args
1192
- end
1193
-
1194
- #
1195
- # Overrided because Quickl only keep --options but modifying it there
1196
- # should probably be considered a broken API.
1197
- #
1198
- def _run(argv = [])
1199
- argv = _normalize(argv)
1200
-
1201
- # 1) Extract my options and parse them
1202
- my_argv = []
1203
- while argv.first =~ /^-/
1204
- my_argv << argv.shift
1205
- end
1206
- parse_options(my_argv)
1207
-
1208
- # 2) build the operator according to -e option
1209
- operator = if @execute
1210
- Alf.lispy(environment).compile(argv.first)
1211
- else
1212
- super
1213
- end
1214
-
1215
- # 3) if there is a requester, then we do the job (assuming bin/alf)
1216
- # with the renderer to use. Otherwise, we simply return built operator
1217
- if operator && requester
1218
- renderer = self.renderer ||= Renderer::Rash.new
1219
- renderer.pipe(operator, environment).execute($stdout)
1220
- else
1221
- operator
1222
- end
1223
- end
1224
-
1225
- end
1226
-
1227
- #
1228
- # Output input tuples through a specific renderer (text, yaml, ...)
1229
- #
1230
- # SYNOPSIS
1231
- # #{program_name} #{command_name} DATASET
1232
- #
1233
- # OPTIONS
1234
- # #{summarized_options}
1235
- #
1236
- # DESCRIPTION
1237
- #
1238
- # When a dataset name is specified as commandline arg, request the
1239
- # environment to provide this dataset and prints it. Otherwise, take what
1240
- # comes on standard input.
1241
- #
1242
- # Note that this command is not an operator and should not be piped anymore.
1243
- #
1244
- class Show < Factory::Command(__FILE__, __LINE__)
1245
- include Command
1246
-
1247
- options do |opt|
1248
- @renderer = nil
1249
- Renderer.each_renderer do |name,descr,clazz|
1250
- opt.on("--#{name}", "Render output #{descr}"){
1251
- @renderer = clazz.new
1252
- }
1253
- end
1254
- end
1255
-
1256
- def execute(args)
1257
- requester.renderer = (@renderer || requester.renderer || Text::Renderer.new)
1258
- args = [ $stdin ] if args.empty?
1259
- args.first
1260
- end
1261
-
1262
- end # class Show
1263
-
1264
- #
1265
- # Executes an .alf file on current environment
1266
- #
1267
- # SYNOPSIS
1268
- # #{program_name} #{command_name} [FILE]
1269
- #
1270
- # OPTIONS
1271
- # #{summarized_options}
1272
- #
1273
- # DESCRIPTION
1274
- #
1275
- # This command executes the .alf file passed as first argument (or what comes
1276
- # on standard input) as a alf query to be executed on the current environment.
1277
- #
1278
- class Exec < Factory::Command(__FILE__, __LINE__)
1279
- include Command
1280
-
1281
- def execute(args)
1282
- Reader.alf(args.first || $stdin, requester.environment)
1283
- end
1284
-
1285
- end # class Exec
1286
-
1287
- #
1288
- # Show help about a specific command
1289
- #
1290
- # SYNOPSIS
1291
- # #{program_name} #{command_name} COMMAND
1292
- #
1293
- class Help < Factory::Command(__FILE__, __LINE__)
1294
- include Command
1295
-
1296
- # Let NoSuchCommandError be passed to higher stage
1297
- no_react_to Quickl::NoSuchCommand
1298
-
1299
- # Command execution
1300
- def execute(args)
1301
- if args.size != 1
1302
- puts super_command.help
1303
- else
1304
- cmd = has_command!(args.first, super_command)
1305
- puts cmd.help
1306
- end
1307
- nil
1308
- end
1309
-
1310
- end # class Help
1311
-
1312
- end
1313
-
1314
- #
1315
- # Marker for all operators, relational and non-relational ones.
1316
- #
1317
- module Operator
1318
- include Iterator, Tools
1319
-
1320
- #
1321
- # Yields non-relational then relational operators, in turn.
1322
- #
1323
- def self.each
1324
- Operator::NonRelational.each{|x| yield(x)}
1325
- Operator::Relational.each{|x| yield(x)}
1326
- end
1327
-
1328
- #
1329
- # Encapsulates method that allows making operator introspection, that is,
1330
- # knowing operator cardinality and similar stuff.
1331
- #
1332
- module Introspection
1333
-
1334
- #
1335
- # Returns true if this operator is an unary operator, false otherwise
1336
- #
1337
- def unary?
1338
- ancestors.include?(Operator::Unary)
1339
- end
1340
-
1341
- #
1342
- # Returns true if this operator is a binary operator, false otherwise
1343
- #
1344
- def binary?
1345
- ancestors.include?(Operator::Binary)
1346
- end
1347
-
1348
- end # module Introspection
1349
-
1350
- # Ensures that the Introspection module is set on real operators
1351
- def self.included(mod)
1352
- mod.extend(Introspection) if mod.is_a?(Class)
1353
- end
1354
-
1355
- #
1356
- # Encapsulates method definitions that convert operators to Quickl
1357
- # commands
1358
- #
1359
- module CommandMethods
1360
-
1361
- protected
1362
-
1363
- #
1364
- # Configures the operator from arguments taken from command line.
1365
- #
1366
- # This method is intended to be overriden by subclasses and must return the
1367
- # operator itself.
1368
- #
1369
- def set_args(args)
1370
- self
1371
- end
1372
-
1373
- #
1374
- # Overrides Quickl::Command::Single#_run to handles the '--' separator
1375
- # correctly.
1376
- #
1377
- # This is because parse_options tend to eat the '--' separator... This
1378
- # could be handled in Quickl itself, but it should be considered a broken
1379
- # API and will only be available in quickl >= 0.3.0 (probably)
1380
- #
1381
- def _run(argv = [])
1382
- operands, args = split_command_args(argv).collect do |arr|
1383
- parse_options(arr)
1384
- end
1385
- self.set_args(args)
1386
- if operands = command_line_operands(operands)
1387
- env = environment || (requester ? requester.environment : nil)
1388
- self.pipe(operands, env)
1389
- end
1390
- self
1391
- end
1392
-
1393
- def split_command_args(args)
1394
- case (i = args.index("--"))
1395
- when NilClass
1396
- [args, []]
1397
- when 0
1398
- [[ $stdin ], args[1..-1]]
1399
- else
1400
- [args[0...i], args[i+1..-1]]
1401
- end
1402
- end
1403
-
1404
- def command_line_operands(operands)
1405
- operands
1406
- end
1407
-
1408
- end # module CommandMethods
1409
- include CommandMethods
1410
-
1411
- # Operators input datasets
1412
- attr_accessor :datasets
1413
-
1414
- # Optional environment
1415
- attr_reader :environment
1416
-
1417
- # Sets the environment on this operator and propagate on
1418
- # datasets
1419
- def environment=(env)
1420
- # this is to avoid infinite loop (TODO: why is there infinite loops??)
1421
- return if @environment == env
1422
-
1423
- # set and propagate on children
1424
- @environment = env
1425
- datasets.each do |dataset|
1426
- if dataset.respond_to?(:environment)
1427
- dataset.environment = env
1428
- end
1429
- end if datasets
1430
-
1431
- env
1432
- end
1433
-
1434
- #
1435
- # Sets the operator input
1436
- #
1437
- def pipe(input, env = environment)
1438
- raise NotImplementedError, "Operator#pipe should be overriden"
1439
- end
1440
-
1441
- #
1442
- # Yields each tuple in turn
1443
- #
1444
- # This method is implemented in a way that ensures that all operators are
1445
- # thread safe. It is not intended to be overriden, use _each instead.
1446
- #
1447
- def each
1448
- op = self.dup
1449
- op._prepare
1450
- op._each(&Proc.new)
1451
- end
1452
-
1453
- protected
1454
-
1455
- #
1456
- # Prepares the iterator before subsequent call to _each.
1457
- #
1458
- # This method is intended to be overriden by suclasses to install what's
1459
- # need for successful iteration. The default implementation does nothing.
1460
- #
1461
- def _prepare
1462
- end
1463
-
1464
- # Internal implementation of the iterator.
1465
- #
1466
- # This method must be implemented by subclasses. It is safe to use instance
1467
- # variables (typically initialized in _prepare) here.
1468
- #
1469
- def _each
1470
- end
1471
-
1472
- #
1473
- # Specialization of Operator for operators that work on a unary input
1474
- #
1475
- module Unary
1476
- include Operator
1477
-
1478
- #
1479
- # Sets the operator input
1480
- #
1481
- def pipe(input, env = environment)
1482
- self.environment = env
1483
- self.datasets = [ input ]
1484
- self
1485
- end
1486
-
1487
- protected
1488
-
1489
- def command_line_operands(operands)
1490
- operands.first || $stdin
1491
- end
1492
-
1493
- #
1494
- # Simply returns the first dataset
1495
- #
1496
- def input
1497
- Iterator.coerce(datasets.first, environment)
1498
- end
1499
-
1500
- #
1501
- # Yields the block with each input tuple.
1502
- #
1503
- # This method should be preferred to <code>input.each</code> when possible.
1504
- #
1505
- def each_input_tuple
1506
- input.each(&Proc.new)
1507
- end
1508
-
1509
- end # module Unary
1510
-
1511
- #
1512
- # Specialization of Operator for operators that work on a binary input
1513
- #
1514
- module Binary
1515
- include Operator
1516
-
1517
- #
1518
- # Sets the operator input
1519
- #
1520
- def pipe(input, env = environment)
1521
- self.environment = env
1522
- self.datasets = input
1523
- self
1524
- end
1525
-
1526
- protected
1527
-
1528
- def command_line_operands(operands)
1529
- (operands.size < 2) ? ([$stdin] + operands) : operands
1530
- end
1531
-
1532
- # Returns the left operand
1533
- def left
1534
- Iterator.coerce(datasets.first, environment)
1535
- end
1536
-
1537
- # Returns the right operand
1538
- def right
1539
- Iterator.coerce(datasets.last, environment)
1540
- end
1541
-
1542
- end # module Binary
1543
-
1544
- #
1545
- # Specialization of Operator for operators that simply convert single tuples
1546
- # to single tuples.
1547
- #
1548
- module Transform
1549
- include Unary
1550
-
1551
- protected
1552
-
1553
- # (see Operator#_each)
1554
- def _each
1555
- each_input_tuple do |tuple|
1556
- yield _tuple2tuple(tuple)
1557
- end
1558
- end
1559
-
1560
- #
1561
- # Transforms an input tuple to an output tuple
1562
- #
1563
- def _tuple2tuple(tuple)
1564
- end
1565
-
1566
- end # module Transform
1567
-
1568
- #
1569
- # Specialization of Operator for implementing operators that rely on a
1570
- # cesure algorithm.
1571
- #
1572
- module Cesure
1573
- include Unary
1574
-
1575
- protected
1576
-
1577
- # (see Operator#_each)
1578
- def _each
1579
- receiver, proj_key, prev_key = Proc.new, cesure_key, nil
1580
- each_input_tuple do |tuple|
1581
- cur_key = proj_key.project(tuple)
1582
- if cur_key != prev_key
1583
- flush_cesure(prev_key, receiver) unless prev_key.nil?
1584
- start_cesure(cur_key, receiver)
1585
- prev_key = cur_key
1586
- end
1587
- accumulate_cesure(tuple, receiver)
1588
- end
1589
- flush_cesure(prev_key, receiver) unless prev_key.nil?
1590
- end
1591
-
1592
- def cesure_key
1593
- end
1594
-
1595
- def start_cesure(key, receiver)
1596
- end
1597
-
1598
- def accumulate_cesure(tuple, receiver)
1599
- end
1600
-
1601
- def flush_cesure(key, receiver)
1602
- end
1603
-
1604
- end # module Cesure
1605
-
1606
- #
1607
- # Specialization of Operator for operators that are shortcuts for longer
1608
- # expressions.
1609
- #
1610
- module Shortcut
1611
- include Operator
1612
-
1613
- #
1614
- # Sets the operator input
1615
- #
1616
- def pipe(input, env = environment)
1617
- self.environment = env
1618
- self.datasets = input
1619
- self
1620
- end
1621
-
1622
- protected
1623
-
1624
- # (see Operator#_each)
1625
- def _each
1626
- longexpr.each(&Proc.new)
1627
- end
1628
-
1629
- #
1630
- # Compiles the longer expression and returns it.
1631
- #
1632
- # @return (Iterator) the compiled longer expression, typically another
1633
- # Operator instance
1634
- #
1635
- def longexpr
1636
- end
1637
- undef :longexpr
1638
-
1639
- #
1640
- # This is an helper ala Lispy#chain for implementing (#longexpr).
1641
- #
1642
- # @param [Array] elements a list of Iterator-able
1643
- # @return [Operator] the first element of the list, but piped with the
1644
- # next one, and so on.
1645
- #
1646
- def chain(*elements)
1647
- elements = elements.reverse
1648
- elements[1..-1].inject(elements.first) do |c, elm|
1649
- elm.pipe(c, environment)
1650
- elm
1651
- end
1652
- end
1653
-
1654
- end # module Shortcut
1655
-
1656
- # Marker for experimental operators
1657
- module Experimental; end
1658
-
1659
- end # module Operator
1660
-
1661
- #
1662
- # Marker module and namespace for non relational operators
1663
- #
1664
- module Operator::NonRelational
1665
-
1666
- #
1667
- # Yields the block with each operator module in turn
1668
- #
1669
- def self.each
1670
- constants.each do |c|
1671
- val = const_get(c)
1672
- yield(val) if val.ancestors.include?(Operator::NonRelational)
1673
- end
1674
- end
1675
-
1676
- #
1677
- # Extend its operand with an unique autonumber attribute
1678
- #
1679
- # SYNOPSIS
1680
- #
1681
- # #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
1682
- #
1683
- # DESCRIPTION
1684
- #
1685
- # This non-relational operator guarantees uniqueness of output tuples by
1686
- # adding an attribute called 'ATTRNAME' whose value is an Integer. No
1687
- # guarantee is given about ordering of output tuples, nor to the fact
1688
- # that this autonumber is sequential. Only that all values are different.
1689
- # If the presence of duplicates was the only "non-relational" aspect of
1690
- # input tuples, the result may be considered a valid relation representation.
1691
- #
1692
- # IN RUBY
1693
- #
1694
- # (autonum OPERAND, ATTRNAME = :autonum)
1695
- #
1696
- # (autonum :suppliers)
1697
- # (autonum :suppliers, :unique_id)
1698
- #
1699
- # IN SHELL
1700
- #
1701
- # #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
1702
- #
1703
- # alf autonum suppliers
1704
- # alf autonum suppliers -- unique_id
1705
- #
1706
- class Autonum < Factory::Operator(__FILE__, __LINE__)
1707
- include Operator::NonRelational, Operator::Transform
1708
-
1709
- # Names of the new attribute to add
1710
- attr_accessor :attrname
1711
-
1712
- def initialize(attrname = :autonum)
1713
- @attrname = attrname
1714
- end
1715
-
1716
- protected
1717
-
1718
- # (see Operator::CommandMethods#set_args)
1719
- def set_args(args)
1720
- @attrname = args.last.to_sym unless args.empty?
1721
- end
1722
-
1723
- # (see Operator#_prepare)
1724
- def _prepare
1725
- @autonum = -1
1726
- end
1727
-
1728
- # (see Operator::Transform#_tuple2tuple)
1729
- def _tuple2tuple(tuple)
1730
- tuple.merge(@attrname => (@autonum += 1))
1731
- end
1732
-
1733
- end # class Autonum
1734
-
1735
- #
1736
- # Force default values on missing/nil attributes
1737
- #
1738
- # SYNOPSIS
1739
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 VAL1 ...
1740
- #
1741
- # OPTIONS
1742
- # #{summarized_options}
1743
- #
1744
- # API & EXAMPLE
1745
- #
1746
- # # Non strict mode
1747
- # (defaults :suppliers, :country => 'Belgium')
1748
- #
1749
- # # Strict mode (--strict)
1750
- # (defaults :suppliers, {:country => 'Belgium'}, true)
1751
- #
1752
- # DESCRIPTION
1753
- #
1754
- # This operator rewrites tuples so as to ensure that all values for specified
1755
- # attributes ATTRx are defined and not nil. Missing or nil attributes are
1756
- # replaced by the associated default value VALx.
1757
- #
1758
- # When used in shell, the hash of default values is built from commandline
1759
- # arguments ala Hash[...]. However, to keep type safety VALx are interpreted
1760
- # as ruby literals and built with Kernel.eval. This means that strings must
1761
- # be doubly quoted. For the example of the API section:
1762
- #
1763
- # alf defaults suppliers -- country "'Belgium'"
1764
- #
1765
- # When used in --strict mode, the operator simply project resulting tuples on
1766
- # attributes for which a default value has been specified. Using the strict
1767
- # mode guarantess that the heading of all tuples is the same, and that no nil
1768
- # value ever remains. However, this operator never remove duplicates.
1769
- #
1770
- class Defaults < Factory::Operator(__FILE__, __LINE__)
1771
- include Operator::NonRelational, Operator::Transform
1772
-
1773
- # Default values as a ATTR -> VAL hash
1774
- attr_accessor :defaults
1775
-
1776
- # Strict mode?
1777
- attr_accessor :strict
1778
-
1779
- # Builds a Defaults operator instance
1780
- def initialize(defaults = {}, strict = false)
1781
- @defaults = defaults
1782
- @strict = strict
1783
- end
1784
-
1785
- options do |opt|
1786
- opt.on('-s', '--strict', 'Strictly restrict to default attributes'){
1787
- self.strict = true
1788
- }
1789
- end
1790
-
1791
- protected
1792
-
1793
- # (see Operator::CommandMethods#set_args)
1794
- def set_args(args)
1795
- @defaults = tuple_collect(args.each_slice(2)) do |k,v|
1796
- [k.to_sym, Kernel.eval(v)]
1797
- end
1798
- self
1799
- end
1800
-
1801
- # (see Operator::Transform#_tuple2tuple)
1802
- def _tuple2tuple(tuple)
1803
- if strict
1804
- tuple_collect(@defaults){|k,v|
1805
- [k, coalesce(tuple[k], v)]
1806
- }
1807
- else
1808
- @defaults.merge tuple_collect(tuple){|k,v|
1809
- [k, coalesce(v, @defaults[k])]
1810
- }
1811
- end
1812
- end
1813
-
1814
- end # class Defaults
1815
-
1816
- #
1817
- # Remove tuple duplicates
1818
- #
1819
- # SYNOPSIS
1820
- # #{program_name} #{command_name} [OPERAND]
1821
- #
1822
- # API & EXAMPLE
1823
- #
1824
- # # clip, unlike project, typically leave duplicates
1825
- # (compact (clip :suppliers, [ :city ]))
1826
- #
1827
- # DESCRIPTION
1828
- #
1829
- # This operator remove duplicates from input tuples. As defaults, it is a non
1830
- # relational operator that helps normalizing input for implementing relational
1831
- # operators. This one is centric in converting bags of tuples to sets of
1832
- # tuples, as required by true relations.
1833
- #
1834
- # alf compact ...
1835
- #
1836
- class Compact < Factory::Operator(__FILE__, __LINE__)
1837
- include Operator::NonRelational, Operator::Shortcut, Operator::Unary
1838
-
1839
- # Removes duplicates according to a complete order
1840
- class SortBased
1841
- include Operator::Cesure
1842
-
1843
- def cesure_key
1844
- @cesure_key ||= ProjectionKey.new([],true)
1845
- end
1846
-
1847
- def accumulate_cesure(tuple, receiver)
1848
- @tuple = tuple
1849
- end
1850
-
1851
- def flush_cesure(key, receiver)
1852
- receiver.call(@tuple)
1853
- end
1854
-
1855
- end # class SortBased
1856
-
1857
- # Removes duplicates by loading all in memory and filtering
1858
- # them there
1859
- class BufferBased
1860
- include Operator::Unary
1861
-
1862
- def _prepare
1863
- @tuples = input.to_a.uniq
1864
- end
1865
-
1866
- def _each
1867
- @tuples.each(&Proc.new)
1868
- end
1869
-
1870
- end # class BufferBased
1871
-
1872
- protected
1873
-
1874
- def longexpr
1875
- chain BufferBased.new,
1876
- datasets
1877
- end
1878
-
1879
- end # class Compact
1880
-
1881
- #
1882
- # Sort input tuples according to an order relation
1883
- #
1884
- # SYNOPSIS
1885
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 ORDER1 ATTR2 ORDER2...
1886
- #
1887
- # API & EXAMPLE
1888
- #
1889
- # # sort on supplier name in ascending order
1890
- # (sort :suppliers, [:name])
1891
- #
1892
- # # sort on city then on name
1893
- # (sort :suppliers, [:city, :name])
1894
- #
1895
- # # sort on city DESC then on name ASC
1896
- # (sort :suppliers, [[:city, :desc], [:name, :asc]])
1897
- #
1898
- # => See OrderingKey about specifying orderings
1899
- #
1900
- # DESCRIPTION
1901
- #
1902
- # This operator sorts input tuples on ATTR1 then ATTR2, etc. and outputs
1903
- # them sorted after that. This is, of course, a non relational operator as
1904
- # relations are unordered sets. It is provided to implement operators that
1905
- # need tuples to be sorted to work correctly. When used in shell, the key
1906
- # ordering must be specified in its longest form:
1907
- #
1908
- # alf sort suppliers -- name asc
1909
- # alf sort suppliers -- city desc name asc
1910
- #
1911
- # LIMITATIONS
1912
- #
1913
- # The fact that the ordering must be completely specified with commandline
1914
- # arguments is a limitation, shortcuts could be provided in the future.
1915
- #
1916
- class Sort < Factory::Operator(__FILE__, __LINE__)
1917
- include Operator::NonRelational, Operator::Unary
1918
-
1919
- def initialize(ordering_key = [])
1920
- @ordering_key = OrderingKey.coerce(ordering_key)
1921
- yield self if block_given?
1922
- end
1923
-
1924
- def ordering=(ordering)
1925
- @ordering_key = OrderingKey.coerce(ordering)
1926
- end
1927
-
1928
- protected
1929
-
1930
- def set_args(args)
1931
- self.ordering = args.collect{|c| c.to_sym}.each_slice(2).to_a
1932
- self
1933
- end
1934
-
1935
- def _prepare
1936
- @buffer = Buffer::Sorted.new(@ordering_key)
1937
- @buffer.add_all(input)
1938
- end
1939
-
1940
- def _each
1941
- @buffer.each(&Proc.new)
1942
- end
1943
-
1944
- end # class Sort
1945
-
1946
- #
1947
- # Clip input tuples to a subset of attributes
1948
- #
1949
- # SYNOPSIS
1950
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ...
1951
- #
1952
- # OPTIONS
1953
- # #{summarized_options}
1954
- #
1955
- # API & EXAMPLE
1956
- #
1957
- # # Keep only name and city attributes
1958
- # (clip :suppliers, [:name, :city])
1959
- #
1960
- # # Keep all but name and city attributes
1961
- # (clip :suppliers, [:name, :city], true)
1962
- #
1963
- # DESCRIPTION
1964
- #
1965
- # This operator clips tuples on attributes whose names are specified as
1966
- # arguments. This is similar to the relational PROJECT operator, expect
1967
- # that this one does not removed duplicates that can occur from clipping.
1968
- # In other words, clipping may lead to bags of tuples instead of sets.
1969
- #
1970
- # When used in shell, the clipping/projection key is simply taken from
1971
- # commandline arguments:
1972
- #
1973
- # alf clip suppliers -- name city
1974
- # alf clip suppliers --allbut -- name city
1975
- #
1976
- class Clip < Factory::Operator(__FILE__, __LINE__)
1977
- include Operator::NonRelational, Operator::Transform
1978
-
1979
- # Builds a Clip operator instance
1980
- def initialize(attributes = [], allbut = false)
1981
- @projection_key = ProjectionKey.new(attributes, allbut)
1982
- yield self if block_given?
1983
- end
1984
-
1985
- def attributes=(attrs)
1986
- @projection_key.attributes = attrs
1987
- end
1988
-
1989
- def allbut=(allbut)
1990
- @projection_key.allbut = allbut
1991
- end
1992
-
1993
- # Installs the options
1994
- options do |opt|
1995
- opt.on('-a', '--allbut', 'Apply a ALLBUT clipping') do
1996
- self.allbut = true
1997
- end
1998
- end
1999
-
2000
- protected
2001
-
2002
- # (see Operator::CommandMethods#set_args)
2003
- def set_args(args)
2004
- self.attributes = args.collect{|a| a.to_sym}
2005
- self
2006
- end
2007
-
2008
- # (see Operator::Transform#_tuple2tuple)
2009
- def _tuple2tuple(tuple)
2010
- @projection_key.project(tuple)
2011
- end
2012
-
2013
- end # class Clip
2014
-
2015
- end # Operator::NonRelational
2016
-
2017
- #
2018
- # Marker module and namespace for relational operators
2019
- #
2020
- module Operator::Relational
2021
-
2022
- #
2023
- # Yields the block with each operator module in turn
2024
- #
2025
- def self.each
2026
- constants.each do |c|
2027
- val = const_get(c)
2028
- yield(val) if val.ancestors.include?(Operator::Relational)
2029
- end
2030
- end
2031
-
2032
- # Relational projection (clip + compact)
2033
- #
2034
- # SYNOPSIS
2035
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ...
2036
- #
2037
- # OPTIONS
2038
- # #{summarized_options}
2039
- #
2040
- # API & EXAMPLE
2041
- #
2042
- # # Project on name and city attributes
2043
- # (project :suppliers, [:name, :city])
2044
- #
2045
- # # Project on all but name and city attributes
2046
- # (allbut :suppliers, [:name, :city])
2047
- #
2048
- # DESCRIPTION
2049
- #
2050
- # This operator projects tuples on attributes whose names are specified as
2051
- # arguments. This is similar to clip, except that this ones is a truly
2052
- # relational one, that is, it also removes duplicates tuples.
2053
- #
2054
- # When used in shell, the clipping/projection key is simply taken from
2055
- # commandline arguments:
2056
- #
2057
- # alf project suppliers -- name city
2058
- # alf project --allbut suppliers -- name city
2059
- #
2060
- class Project < Factory::Operator(__FILE__, __LINE__)
2061
- include Operator::Relational, Operator::Shortcut, Operator::Unary
2062
-
2063
- # Builds a Project operator instance
2064
- def initialize(attributes = [], allbut = false)
2065
- @projection_key = ProjectionKey.new(attributes, allbut)
2066
- yield self if block_given?
2067
- end
2068
-
2069
- def attributes=(attrs)
2070
- @projection_key.attributes = attrs
2071
- end
2072
-
2073
- def allbut=(allbut)
2074
- @projection_key.allbut = allbut
2075
- end
2076
-
2077
- # Installs the options
2078
- options do |opt|
2079
- opt.on('-a', '--allbut', 'Apply a ALLBUT projection') do
2080
- self.allbut = true
2081
- end
2082
- end
2083
-
2084
- protected
2085
-
2086
- # (see Operator::CommandMethods#set_args)
2087
- def set_args(args)
2088
- self.attributes = args.collect{|a| a.to_sym}
2089
- self
2090
- end
2091
-
2092
- # (see Operator::Shortcut#longexpr)
2093
- def longexpr
2094
- chain Operator::NonRelational::Compact.new,
2095
- Operator::NonRelational::Clip.new(@projection_key.attributes,
2096
- @projection_key.allbut),
2097
- datasets
2098
- end
2099
-
2100
- end # class Project
2101
-
2102
- #
2103
- # Relational extension (additional, computed attributes)
2104
- #
2105
- # SYNOPSIS
2106
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 EXPR1 ATTR2 EXPR2...
2107
- #
2108
- # API & EXAMPLE
2109
- #
2110
- # (extend :supplies, :sp => lambda{ sid + "/" + pid },
2111
- # :big => lambda{ qty > 100 ? true : false })
2112
- #
2113
- # DESCRIPTION
2114
- #
2115
- # This command extend input tuples with new attributes (named ATTR1, ...)
2116
- # whose value is the result of evaluating tuple expressions (i.e. EXPR1, ...).
2117
- # See main documentation about the semantics of tuple expressions. When used
2118
- # in shell, the hash of extensions is built from commandline arguments ala
2119
- # Hash[...]. Tuple expressions must be specified as code literals there:
2120
- #
2121
- # alf extend supplies -- sp 'sid + "/" + pid' big "qty > 100 ? true : false"
2122
- #
2123
- # Attributes ATTRx should not already exist, no behavior is guaranteed if
2124
- # this precondition is not respected.
2125
- #
2126
- class Extend < Factory::Operator(__FILE__, __LINE__)
2127
- include Operator::Relational, Operator::Transform
2128
-
2129
- # Extensions as a Hash attr => lambda{...}
2130
- attr_accessor :extensions
2131
-
2132
- # Builds an Extend operator instance
2133
- def initialize(extensions = {})
2134
- @extensions = extensions
2135
- end
2136
-
2137
- protected
2138
-
2139
- # (see Operator::CommandMethods#set_args)
2140
- def set_args(args)
2141
- @extensions = tuple_collect(args.each_slice(2)){|k,v|
2142
- [k.to_sym, TupleHandle.compile(v)]
2143
- }
2144
- self
2145
- end
2146
-
2147
- # (see Operator#_prepare)
2148
- def _prepare
2149
- @handle = TupleHandle.new
2150
- end
2151
-
2152
- # (see Operator::Transform#_tuple2tuple)
2153
- def _tuple2tuple(tuple)
2154
- tuple.merge tuple_collect(@extensions){|k,v|
2155
- [k, @handle.set(tuple).evaluate(v)]
2156
- }
2157
- end
2158
-
2159
- end # class Extend
2160
-
2161
- #
2162
- # Relational renaming (rename some attributes)
2163
- #
2164
- # SYNOPSIS
2165
- # #{program_name} #{command_name} [OPERAND] -- OLD1 NEW1 ...
2166
- #
2167
- # OPTIONS
2168
- # #{summarized_options}
2169
- #
2170
- # API & EXAMPLE
2171
- #
2172
- # (rename :suppliers, :name => :supplier_name, :city => :supplier_city)
2173
- #
2174
- # DESCRIPTION
2175
- #
2176
- # This command renames OLD attributes as NEW as specified by arguments.
2177
- # Attributes OLD should exist in source tuples while attributes NEW should
2178
- # not. When used in shell, renaming attributes are built ala Hash[...] from
2179
- # commandline arguments:
2180
- #
2181
- # alf rename suppliers -- name supplier_name city supplier_city
2182
- #
2183
- class Rename < Factory::Operator(__FILE__, __LINE__)
2184
- include Operator::Relational, Operator::Transform
2185
-
2186
- # Hash of source -> target attribute renamings
2187
- attr_accessor :renaming
2188
-
2189
- # Builds a Rename operator instance
2190
- def initialize(renaming = {})
2191
- @renaming = renaming
2192
- end
2193
-
2194
- protected
2195
-
2196
- # (see Operator::CommandMethods#set_args)
2197
- def set_args(args)
2198
- @renaming = Hash[*args.collect{|c| c.to_sym}]
2199
- self
2200
- end
2201
-
2202
- # (see Operator::Transform#_tuple2tuple)
2203
- def _tuple2tuple(tuple)
2204
- tuple_collect(tuple){|k,v| [@renaming[k] || k, v]}
2205
- end
2206
-
2207
- end # class Rename
2208
-
2209
- #
2210
- # Relational restriction (aka where, predicate filtering)
2211
- #
2212
- # SYNOPSIS
2213
- # #{program_name} #{command_name} [OPERAND] -- EXPR
2214
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 VAL1 ...
2215
- #
2216
- # API & EXAMPLE
2217
- #
2218
- # # Restrict to suppliers with status greater than 20
2219
- # (restrict :suppliers, lambda{ status > 20 })
2220
- #
2221
- # # Restrict to suppliers that live in London
2222
- # (restrict :suppliers, lambda{ city == 'London' })
2223
- #
2224
- # DESCRIPTION
2225
- #
2226
- # This command restricts tuples to those for which EXPR evaluates to true.
2227
- # EXPR must be a valid tuple expression that should return a truth-value.
2228
- # When used in shell, the predicate is taken as a string and compiled with
2229
- # TupleHandle.compile. We also provide a shortcut for equality expressions.
2230
- # Note that, in that case, values are expected to be ruby code literals,
2231
- # evaluated with Kernel.eval. Therefore, strings must be doubly quoted.
2232
- #
2233
- # alf restrict suppliers -- "status > 20"
2234
- # alf restrict suppliers -- city "'London'"
2235
- #
2236
- class Restrict < Factory::Operator(__FILE__, __LINE__)
2237
- include Operator::Relational, Operator::Unary
2238
-
2239
- # Restriction predicate
2240
- attr_accessor :predicate
2241
-
2242
- # Builds a Restrict operator instance
2243
- def initialize(predicate = "true")
2244
- @predicate = TupleHandle.compile(predicate)
2245
- yield self if block_given?
2246
- end
2247
-
2248
- protected
2249
-
2250
- # (see Operator::CommandMethods#set_args)
2251
- def set_args(args)
2252
- @predicate = if args.size > 1
2253
- TupleHandle.compile tuple_collect(args.each_slice(2)){|a,expr|
2254
- [a, Kernel.eval(expr)]
2255
- }
2256
- else
2257
- TupleHandle.compile(args.first)
2258
- end
2259
- self
2260
- end
2261
-
2262
- # (see Operator#_each)
2263
- def _each
2264
- handle = TupleHandle.new
2265
- each_input_tuple{|t| yield(t) if handle.set(t).evaluate(@predicate) }
2266
- end
2267
-
2268
- end # class Restrict
2269
-
2270
- #
2271
- # Relational join (and cross-join)
2272
- #
2273
- # SYNOPSIS
2274
- # #{program_name} #{command_name} [LEFT] RIGHT
2275
- #
2276
- # API & EXAMPLE
2277
- #
2278
- # (join :suppliers, :parts)
2279
- #
2280
- # DESCRIPTION
2281
- #
2282
- # This operator computes the (natural) join of two input iterators. Natural
2283
- # join means that, unlike what is commonly used in SQL, the default behavior
2284
- # is to join on common attributes. You can use the rename operator if this
2285
- # behavior does not fit your needs.
2286
- #
2287
- # alf join suppliers supplies
2288
- #
2289
- class Join < Factory::Operator(__FILE__, __LINE__)
2290
- include Operator::Relational, Operator::Shortcut, Operator::Binary
2291
-
2292
- #
2293
- # Performs a Join of two relations through a Hash buffer on the right
2294
- # one.
2295
- #
2296
- class HashBased
2297
- include Operator::Binary
2298
-
2299
- #
2300
- # Implements a special Buffer for join-based relational operators.
2301
- #
2302
- # Example:
2303
- #
2304
- # buffer = Buffer::Join.new(...) # pass the right part of the join
2305
- # left.each do |left_tuple|
2306
- # key, rest = buffer.split(tuple)
2307
- # buffer.each(key) do |right_tuple|
2308
- # #
2309
- # # do whatever you want with left and right tuples
2310
- # #
2311
- # end
2312
- # end
2313
- #
2314
- class JoinBuffer
2315
-
2316
- #
2317
- # Creates a buffer instance with the right part of the join.
2318
- #
2319
- # @param [Iterator] enum a tuple iterator, right part of the join.
2320
- #
2321
- def initialize(enum)
2322
- @buffer = nil
2323
- @key = nil
2324
- @enum = enum
2325
- end
2326
-
2327
- #
2328
- # Splits a left tuple according to the common key.
2329
- #
2330
- # @param [Hash] tuple a left tuple of the join
2331
- # @return [Array] an array of two elements, the key and the rest
2332
- # @see ProjectionKey#split
2333
- #
2334
- def split(tuple)
2335
- _init(tuple) unless @key
2336
- @key.split(tuple)
2337
- end
2338
-
2339
- #
2340
- # Yields each right tuple that matches a given key value.
2341
- #
2342
- # @param [Hash] key a tuple that matches elements of the common key
2343
- # (typically the first element returned by #split)
2344
- #
2345
- def each(key)
2346
- @buffer[key].each(&Proc.new) if @buffer.has_key?(key)
2347
- end
2348
-
2349
- private
2350
-
2351
- # Initialize the buffer with a right tuple
2352
- def _init(right)
2353
- @buffer = Hash.new{|h,k| h[k] = []}
2354
- @enum.each do |left|
2355
- @key = Tools::ProjectionKey.coerce(left.keys & right.keys) unless @key
2356
- @buffer[@key.project(left)] << left
2357
- end
2358
- @key = Tools::ProjectionKey.coerce([]) unless @key
2359
- end
2360
-
2361
- end # class JoinBuffer
2362
-
2363
- protected
2364
-
2365
- # (see Operator#_each)
2366
- def _each
2367
- buffer = JoinBuffer.new(right)
2368
- left.each do |left_tuple|
2369
- key, rest = buffer.split(left_tuple)
2370
- buffer.each(key) do |right|
2371
- yield(left_tuple.merge(right))
2372
- end
2373
- end
2374
- end
2375
-
2376
- end
2377
-
2378
- protected
2379
-
2380
- # (see Shortcut#longexpr)
2381
- def longexpr
2382
- chain HashBased.new,
2383
- datasets
2384
- end
2385
-
2386
- end # class Join
2387
-
2388
- #
2389
- # Relational intersection (aka a logical and)
2390
- #
2391
- # SYNOPSIS
2392
- # #{program_name} #{command_name} [LEFT] RIGHT
2393
- #
2394
- # API & EXAMPLE
2395
- #
2396
- # # Give suppliers that live in Paris and have status >= 20
2397
- # (intersect \\
2398
- # (restrict :suppliers, lambda{ status >= 20 }),
2399
- # (restrict :suppliers, lambda{ city == 'Paris' }))
2400
- #
2401
- # DESCRIPTION
2402
- #
2403
- # This operator computes the intersection between its two operands. The
2404
- # intersection is simply the set of common tuples between them. Both operands
2405
- # must have the same heading.
2406
- #
2407
- # alf intersect ... ...
2408
- #
2409
- class Intersect < Factory::Operator(__FILE__, __LINE__)
2410
- include Operator::Relational, Operator::Shortcut, Operator::Binary
2411
-
2412
- class HashBased
2413
- include Operator::Binary
2414
-
2415
- protected
2416
-
2417
- def _prepare
2418
- @index = Hash.new
2419
- right.each{|t| @index[t] = true}
2420
- end
2421
-
2422
- def _each
2423
- left.each do |left_tuple|
2424
- yield(left_tuple) if @index.has_key?(left_tuple)
2425
- end
2426
- end
2427
-
2428
- end
2429
-
2430
- protected
2431
-
2432
- # (see Shortcut#longexpr)
2433
- def longexpr
2434
- chain HashBased.new,
2435
- datasets
2436
- end
2437
-
2438
- end # class Intersect
2439
-
2440
- #
2441
- # Relational minus (aka difference)
2442
- #
2443
- # SYNOPSIS
2444
- # #{program_name} #{command_name} [LEFT] RIGHT
2445
- #
2446
- # API & EXAMPLE
2447
- #
2448
- # # Give all suppliers but those living in Paris
2449
- # (minus :suppliers,
2450
- # (restrict :suppliers, lambda{ city == 'Paris' }))
2451
- #
2452
- # DESCRIPTION
2453
- #
2454
- # This operator computes the difference between its two operands. The
2455
- # difference is simply the set of tuples in left operands non shared by
2456
- # the right one.
2457
- #
2458
- # alf minus ... ...
2459
- #
2460
- class Minus < Factory::Operator(__FILE__, __LINE__)
2461
- include Operator::Relational, Operator::Shortcut, Operator::Binary
2462
-
2463
- class HashBased
2464
- include Operator::Binary
2465
-
2466
- protected
2467
-
2468
- def _prepare
2469
- @index = Hash.new
2470
- right.each{|t| @index[t] = true}
2471
- end
2472
-
2473
- def _each
2474
- left.each do |left_tuple|
2475
- yield(left_tuple) unless @index.has_key?(left_tuple)
2476
- end
2477
- end
2478
-
2479
- end
2480
-
2481
- protected
2482
-
2483
- # (see Shortcut#longexpr)
2484
- def longexpr
2485
- chain HashBased.new,
2486
- datasets
2487
- end
2488
-
2489
- end # class Minus
2490
-
2491
- #
2492
- # Relational union
2493
- #
2494
- # SYNOPSIS
2495
- # #{program_name} #{command_name} [LEFT] RIGHT
2496
- #
2497
- # API & EXAMPLE
2498
- #
2499
- # (union (project :suppliers, [:city]),
2500
- # (project :parts, [:city]))
2501
- #
2502
- # DESCRIPTION
2503
- #
2504
- # This operator computes the union join of two input iterators. Input
2505
- # iterators should have the same heading. The result never contain duplicates.
2506
- #
2507
- # alf union ... ...
2508
- #
2509
- class Union < Factory::Operator(__FILE__, __LINE__)
2510
- include Operator::Relational, Operator::Shortcut, Operator::Binary
2511
-
2512
- class DisjointBased
2513
- include Operator::Binary
2514
-
2515
- protected
2516
-
2517
- def _each
2518
- left.each(&Proc.new)
2519
- right.each(&Proc.new)
2520
- end
2521
-
2522
- end
2523
-
2524
- protected
2525
-
2526
- # (see Shortcut#longexpr)
2527
- def longexpr
2528
- chain Operator::NonRelational::Compact.new,
2529
- DisjointBased.new,
2530
- datasets
2531
- end
2532
-
2533
- end # class Union
2534
-
2535
- #
2536
- # Relational matching
2537
- #
2538
- # SYNOPSIS
2539
- # #{program_name} #{command_name} [LEFT] RIGHT
2540
- #
2541
- # API & EXAMPLE
2542
- #
2543
- # (matching :suppliers, :supplies)
2544
- #
2545
- # DESCRIPTION
2546
- #
2547
- # This operator restricts left tuples to those for which there exists at
2548
- # least one right tuple that joins. This is a shortcut operator for the
2549
- # longer expression:
2550
- #
2551
- # (project (join xxx, yyy), [xxx's attributes])
2552
- #
2553
- # In shell:
2554
- #
2555
- # alf matching suppliers supplies
2556
- #
2557
- class Matching < Factory::Operator(__FILE__, __LINE__)
2558
- include Operator::Relational, Operator::Shortcut, Operator::Binary
2559
-
2560
- #
2561
- # Performs a Matching of two relations through a Hash buffer on the right
2562
- # one.
2563
- #
2564
- class HashBased
2565
- include Operator::Binary
2566
-
2567
- # (see Operator#_each)
2568
- def _each
2569
- seen, key = nil, nil
2570
- left.each do |left_tuple|
2571
- seen ||= begin
2572
- h = Hash.new
2573
- right.each do |right_tuple|
2574
- key ||= Tools::ProjectionKey.coerce(left_tuple.keys & right_tuple.keys)
2575
- h[key.project(right_tuple)] = true
2576
- end
2577
- key ||= Tools::ProjectionKey.coerce([])
2578
- h
2579
- end
2580
- yield(left_tuple) if seen.has_key?(key.project(left_tuple))
2581
- end
2582
- end
2583
-
2584
- end # class HashBased
2585
-
2586
- protected
2587
-
2588
- # (see Shortcut#longexpr)
2589
- def longexpr
2590
- chain HashBased.new,
2591
- datasets
2592
- end
2593
-
2594
- end # class Matching
2595
-
2596
- #
2597
- # Relational not matching
2598
- #
2599
- # SYNOPSIS
2600
- # #{program_name} #{command_name} [LEFT] RIGHT
2601
- #
2602
- # API & EXAMPLE
2603
- #
2604
- # (not_matching :suppliers, :supplies)
2605
- #
2606
- # DESCRIPTION
2607
- #
2608
- # This operator restricts left tuples to those for which there does not
2609
- # exist any right tuple that joins. This is a shortcut operator for the
2610
- # longer expression:
2611
- #
2612
- # (minus xxx, (matching xxx, yyy))
2613
- #
2614
- # In shell:
2615
- #
2616
- # alf not-matching suppliers supplies
2617
- #
2618
- class NotMatching < Factory::Operator(__FILE__, __LINE__)
2619
- include Operator::Relational, Operator::Shortcut, Operator::Binary
2620
-
2621
- #
2622
- # Performs a NotMatching of two relations through a Hash buffer on the
2623
- # right one.
2624
- #
2625
- class HashBased
2626
- include Operator::Binary
2627
-
2628
- # (see Operator#_each)
2629
- def _each
2630
- seen, key = nil, nil
2631
- left.each do |left_tuple|
2632
- seen ||= begin
2633
- h = Hash.new
2634
- right.each do |right_tuple|
2635
- key ||= Tools::ProjectionKey.coerce(left_tuple.keys & right_tuple.keys)
2636
- h[key.project(right_tuple)] = true
2637
- end
2638
- key ||= Tools::ProjectionKey.coerce([])
2639
- h
2640
- end
2641
- yield(left_tuple) unless seen.has_key?(key.project(left_tuple))
2642
- end
2643
- end
2644
-
2645
- end # class HashBased
2646
-
2647
- protected
2648
-
2649
- # (see Shortcut#longexpr)
2650
- def longexpr
2651
- chain HashBased.new,
2652
- datasets
2653
- end
2654
-
2655
- end # class NotMatching
2656
-
2657
- #
2658
- # Relational wraping (tuple-valued attributes)
2659
- #
2660
- # SYNOPSIS
2661
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ... NEWNAME
2662
- #
2663
- # API & EXAMPLE
2664
- #
2665
- # (wrap :suppliers, [:city, :status], :loc_and_status)
2666
- #
2667
- # DESCRIPTION
2668
- #
2669
- # This operator wraps attributes ATTR1 to ATTRN as a new, tuple-based
2670
- # attribute whose name is NEWNAME. When used in shell, names of wrapped
2671
- # attributes are taken from commandline arguments, expected the last one
2672
- # which defines the new name to use:
2673
- #
2674
- # alf wrap suppliers -- city status loc_and_status
2675
- #
2676
- class Wrap < Factory::Operator(__FILE__, __LINE__)
2677
- include Operator::Relational, Operator::Transform
2678
-
2679
- # Array of wraping attributes
2680
- attr_accessor :attributes
2681
-
2682
- # New name for the wrapped attribute
2683
- attr_accessor :as
2684
-
2685
- # Builds a Wrap operator instance
2686
- def initialize(attributes = [], as = :wrapped)
2687
- @attributes = attributes
2688
- @as = as
2689
- end
2690
-
2691
- protected
2692
-
2693
- # (see Operator::CommandMethods#set_args)
2694
- def set_args(args)
2695
- @as = args.pop.to_sym
2696
- @attributes = args.collect{|a| a.to_sym}
2697
- self
2698
- end
2699
-
2700
- # (see Operator::Transform#_tuple2tuple)
2701
- def _tuple2tuple(tuple)
2702
- others = tuple_collect(tuple.keys - @attributes){|k| [k,tuple[k]] }
2703
- others[as] = tuple_collect(attributes){|k| [k, tuple[k]] }
2704
- others
2705
- end
2706
-
2707
- end # class Wrap
2708
-
2709
- #
2710
- # Relational un-wraping (inverse of wrap)
2711
- #
2712
- # SYNOPSIS
2713
- # #{program_name} #{command_name} [OPERAND] -- ATTR
2714
- #
2715
- # API & EXAMPLE
2716
- #
2717
- # # Assuming wrapped = (wrap :suppliers, [:city, :status], :loc_and_status)
2718
- # (unwrap wrapped, :loc_and_status)
2719
- #
2720
- # DESCRIPTION
2721
- #
2722
- # This operator unwraps the tuple-valued attribute named ATTR so as to
2723
- # flatten its pairs with 'upstream' tuple. The latter should be such so that
2724
- # no name collision occurs. When used in shell, the name of the attribute to
2725
- # unwrap is taken as the first commandline argument:
2726
- #
2727
- # alf unwrap wrap -- loc_and_status
2728
- #
2729
- class Unwrap < Factory::Operator(__FILE__, __LINE__)
2730
- include Operator::Relational, Operator::Transform
2731
-
2732
- # Name of the attribute to unwrap
2733
- attr_accessor :attribute
2734
-
2735
- # Builds a Rename operator instance
2736
- def initialize(attribute = :wrapped)
2737
- @attribute = attribute
2738
- end
2739
-
2740
- protected
2741
-
2742
- # (see Operator::CommandMethods#set_args)
2743
- def set_args(args)
2744
- @attribute = args.first.to_sym
2745
- self
2746
- end
2747
-
2748
- # (see Operator::Transform#_tuple2tuple)
2749
- def _tuple2tuple(tuple)
2750
- tuple = tuple.dup
2751
- wrapped = tuple.delete(@attribute) || {}
2752
- tuple.merge(wrapped)
2753
- end
2754
-
2755
- end # class Unwrap
2756
-
2757
- #
2758
- # Relational grouping (relation-valued attributes)
2759
- #
2760
- # SYNOPSIS
2761
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ... NEWNAME
2762
- #
2763
- # API & EXAMPLE
2764
- #
2765
- # (group :supplies, [:pid, :qty], :supplying)
2766
- # (group :supplies, [:sid], :supplying, true)
2767
- #
2768
- # DESCRIPTION
2769
- #
2770
- # This operator groups attributes ATTR1 to ATTRN as a new, relation-valued
2771
- # attribute whose name is NEWNAME. When used in shell, names of grouped
2772
- # attributes are taken from commandline arguments, expected the last one
2773
- # which defines the new name to use:
2774
- #
2775
- # alf group supplies -- pid qty supplying
2776
- # alf group supplies --allbut -- sid supplying
2777
- #
2778
- class Group < Factory::Operator(__FILE__, __LINE__)
2779
- include Operator::Relational, Operator::Unary
2780
-
2781
- # Attributes on which grouping applies
2782
- attr_accessor :attributes
2783
-
2784
- # Attribute name for grouping tuple
2785
- attr_accessor :as
2786
-
2787
- # Group all but attributes?
2788
- attr_accessor :allbut
2789
-
2790
- # Creates a Group instance
2791
- def initialize(attributes = [], as = :group, allbut = false)
2792
- @attributes = attributes
2793
- @as = as
2794
- @allbut = allbut
2795
- end
2796
-
2797
- options do |opt|
2798
- opt.on('--allbut', "Group all but specified attributes"){ @allbut = true }
2799
- end
2800
-
2801
- protected
2802
-
2803
- # (see Operator::CommandMethods#set_args)
2804
- def set_args(args)
2805
- @as = args.pop.to_sym
2806
- @attributes = args.collect{|a| a.to_sym}
2807
- self
2808
- end
2809
-
2810
- # See Operator#_prepare
2811
- def _prepare
2812
- pkey = ProjectionKey.new(attributes, !allbut)
2813
- @index = Hash.new{|h,k| h[k] = Set.new}
2814
- each_input_tuple do |tuple|
2815
- key, rest = pkey.split(tuple)
2816
- @index[key] << rest
2817
- end
2818
- end
2819
-
2820
- # See Operator#_each
2821
- def _each
2822
- @index.each_pair do |k,v|
2823
- yield(k.merge(@as => Relation.coerce(v)))
2824
- end
2825
- end
2826
-
2827
- end # class Group
2828
-
2829
- #
2830
- # Relational un-grouping (inverse of group)
2831
- #
2832
- # SYNOPSIS
2833
- # #{program_name} #{command_name} [OPERAND] -- ATTR
2834
- #
2835
- # API & EXAMPLE
2836
- #
2837
- # # Assuming grouped = (group enum, [:pid, :qty], :supplying)
2838
- # (ungroup grouped, :supplying)
2839
- #
2840
- # DESCRIPTION
2841
- #
2842
- # This operator ungroups the relation-valued attribute named ATTR and outputs
2843
- # tuples as the flattening of each of of its tuples merged with the upstream
2844
- # one. Sub relation should be such so that no name collision occurs. When
2845
- # used in shell, the name of the attribute to ungroup is taken as the first
2846
- # commandline argument:
2847
- #
2848
- # alf ungroup group -- supplying
2849
- #
2850
- class Ungroup < Factory::Operator(__FILE__, __LINE__)
2851
- include Operator::Relational, Operator::Unary
2852
-
2853
- # Relation-value attribute to ungroup
2854
- attr_accessor :attribute
2855
-
2856
- # Creates a Group instance
2857
- def initialize(attribute = :grouped)
2858
- @attribute = attribute
2859
- end
2860
-
2861
- protected
2862
-
2863
- # (see Operator::CommandMethods#set_args)
2864
- def set_args(args)
2865
- @attribute = args.pop.to_sym
2866
- self
2867
- end
2868
-
2869
- # See Operator#_each
2870
- def _each
2871
- each_input_tuple do |tuple|
2872
- tuple = tuple.dup
2873
- subrel = tuple.delete(@attribute)
2874
- subrel.each do |subtuple|
2875
- yield(tuple.merge(subtuple))
2876
- end
2877
- end
2878
- end
2879
-
2880
- end # class Ungroup
2881
-
2882
- #
2883
- # Relational summarization (group-by + aggregate ops)
2884
- #
2885
- # SYNOPSIS
2886
- # #{program_name} #{command_name} [OPERAND] [--allbut] --by=KEY1,KEY2... -- AGG1 EXPR1...
2887
- #
2888
- # OPTIONS
2889
- # #{summarized_options}
2890
- #
2891
- # API & EXAMPLE
2892
- #
2893
- # (summarize :supplies, [:sid],
2894
- # :total_qty => Aggregator.sum(:qty))
2895
- #
2896
- # # Or, to specify an allbut projection
2897
- # (summarize :supplies, [:qty, :pid],
2898
- # :total_qty => Aggregator.sum(:qty), true)
2899
- #
2900
- # DESCRIPTION
2901
- #
2902
- # This operator summarizes input tuples on the projection on KEY1,KEY2,...
2903
- # attributes and applies aggregate operators on sets of matching tuples.
2904
- # Introduced names AGG should be disjoint from KEY attributes.
2905
- #
2906
- # When used in shell, the aggregations are taken from commandline arguments
2907
- # AGG and EXPR, where AGG is the name of a new attribute and EXPR is an
2908
- # aggregation expression evaluated on Aggregator:
2909
- #
2910
- # alf summarize supplies --by=sid -- total_qty "sum(:qty)"
2911
- # alf summarize supplies --allbut --by=pid,qty -- total_qty "sum(:qty)"
2912
- #
2913
- class Summarize < Factory::Operator(__FILE__, __LINE__)
2914
- include Operator::Relational, Operator::Shortcut, Operator::Unary
2915
-
2916
- # By attributes
2917
- attr_accessor :by
2918
-
2919
- # Allbut on by?
2920
- attr_accessor :allbut
2921
-
2922
- # Aggregations as a AGG => Aggregator(EXPR) hash
2923
- attr_accessor :aggregators
2924
-
2925
- def initialize(by = [], aggregators = {}, allbut = false)
2926
- @by = by
2927
- @allbut = allbut
2928
- @aggregators = aggregators
2929
- end
2930
-
2931
- # Installs the options
2932
- options do |opt|
2933
- opt.on('--by=x,y,z', 'Specify by attributes', Array) do |args|
2934
- @by = args.collect{|a| a.to_sym}
2935
- end
2936
- opt.on('--allbut', 'Make an allbut projection/summarization') do
2937
- @allbut = true
2938
- end
2939
- end
2940
-
2941
- # Summarizes according to a complete order
2942
- class SortBased
2943
- include Alf::Operator::Cesure
2944
-
2945
- attr_reader :cesure_key
2946
- attr_reader :aggregators
2947
-
2948
- def initialize(by_key, aggregators)
2949
- @cesure_key, @aggregators = by_key, aggregators
2950
- end
2951
-
2952
- protected
2953
-
2954
- def start_cesure(key, receiver)
2955
- @aggs = tuple_collect(@aggregators) do |a,agg|
2956
- [a, agg.least]
2957
- end
2958
- end
2959
-
2960
- def accumulate_cesure(tuple, receiver)
2961
- @aggs = tuple_collect(@aggregators) do |a,agg|
2962
- [a, agg.happens(@aggs[a], tuple)]
2963
- end
2964
- end
2965
-
2966
- def flush_cesure(key, receiver)
2967
- @aggs = tuple_collect(@aggregators) do |a,agg|
2968
- [a, agg.finalize(@aggs[a])]
2969
- end
2970
- receiver.call key.merge(@aggs)
2971
- end
2972
-
2973
- end # class SortBased
2974
-
2975
- # Summarizes in-memory with a hash
2976
- class HashBased
2977
- include Operator::Relational, Operator::Unary
2978
-
2979
- attr_reader :by_key
2980
- attr_reader :aggregators
2981
-
2982
- def initialize(by_key, aggregators)
2983
- @by_key, @aggregators = by_key, aggregators
2984
- end
2985
-
2986
- protected
2987
-
2988
- def _each
2989
- index = Hash.new do |h,k|
2990
- h[k] = tuple_collect(@aggregators) do |a,agg|
2991
- [a, agg.least]
2992
- end
2993
- end
2994
- each_input_tuple do |tuple|
2995
- key, rest = by_key.split(tuple)
2996
- index[key] = tuple_collect(@aggregators) do |a,agg|
2997
- [a, agg.happens(index[key][a], tuple)]
2998
- end
2999
- end
3000
- index.each_pair do |key,aggs|
3001
- aggs = tuple_collect(@aggregators) do |a,agg|
3002
- [a, agg.finalize(aggs[a])]
3003
- end
3004
- yield key.merge(aggs)
3005
- end
3006
- end
3007
-
3008
- end
3009
-
3010
- protected
3011
-
3012
- # (see Operator::CommandMethods#set_args)
3013
- def set_args(args)
3014
- @aggregators = tuple_collect(args.each_slice(2)) do |a,expr|
3015
- [a.to_sym, Aggregator.compile(expr)]
3016
- end
3017
- self
3018
- end
3019
-
3020
- def longexpr
3021
- if @allbut
3022
- by_key = Tools::ProjectionKey.new(@by, @allbut)
3023
- chain HashBased.new(by_key, @aggregators),
3024
- datasets
3025
- else
3026
- by_key = Tools::ProjectionKey.new(@by, @allbut)
3027
- chain SortBased.new(by_key, @aggregators),
3028
- Operator::NonRelational::Sort.new(by_key.to_ordering_key),
3029
- datasets
3030
- end
3031
- end
3032
-
3033
- end # class Summarize
3034
-
3035
- #
3036
- # Relational ranking (explicit tuple positions)
3037
- #
3038
- # SYNOPSIS
3039
- # #{program_name} #{command_name} [OPERAND] --order=OR1... -- [RANKNAME]
3040
- #
3041
- # OPTIONS
3042
- # #{summarized_options}
3043
- #
3044
- # API & EXAMPLE
3045
- #
3046
- # # Position attribute => # of tuples with smaller weight
3047
- # (rank :parts, [:weight], :position)
3048
- #
3049
- # # Position attribute => # of tuples with greater weight
3050
- # (rank :parts, [[:weight, :desc]], :position)
3051
- #
3052
- # DESCRIPTION
3053
- #
3054
- # This operator computes the ranking of input tuples, according to an order
3055
- # relation. Precisely, it extends the input tuples with a RANKNAME attribute
3056
- # whose value is the number of tuples which are considered strictly less
3057
- # according to the specified order. For the two examples above:
3058
- #
3059
- # alf rank parts --order=weight -- position
3060
- # alf rank parts --order=weight,desc -- position
3061
- #
3062
- # Note that, unless the ordering key includes a candidate key for the input
3063
- # relation, the newly RANKNAME attribute is not necessarily a candidate key
3064
- # for the output one. In the example above, adding the :pid attribute
3065
- # ensured that position will contain all different values:
3066
- #
3067
- # alf rank parts --order=weight,pid -- position
3068
- #
3069
- # Or even:
3070
- #
3071
- # alf rank parts --order=weight,desc,pid,asc -- position
3072
- #
3073
- class Rank < Factory::Operator(__FILE__, __LINE__)
3074
- include Operator::Relational, Operator::Shortcut, Operator::Unary
3075
-
3076
- # Ranking order
3077
- attr_accessor :order
3078
-
3079
- # Ranking attribute name
3080
- attr_accessor :ranking_name
3081
-
3082
- def initialize(order = [], ranking_name = :rank)
3083
- @order, @ranking_name = order, ranking_name
3084
- end
3085
-
3086
- options do |opt|
3087
- opt.on('--order=x,y,z', 'Specify ranking order', Array) do |args|
3088
- @order = args.collect{|a| a.to_sym}
3089
- end
3090
- end
3091
-
3092
- class SortBased
3093
- include Operator::Cesure
3094
-
3095
- def initialize(order, ranking_name)
3096
- @order, @ranking_name = order, ranking_name
3097
- end
3098
-
3099
- def ordering_key
3100
- OrderingKey.coerce @order
3101
- end
3102
-
3103
- def cesure_key
3104
- ProjectionKey.coerce(ordering_key)
3105
- end
3106
-
3107
- def start_cesure(key, receiver)
3108
- @rank ||= 0
3109
- @last_block = 0
3110
- end
3111
-
3112
- def accumulate_cesure(tuple, receiver)
3113
- receiver.call tuple.merge(@ranking_name => @rank)
3114
- @last_block += 1
3115
- end
3116
-
3117
- def flush_cesure(key, receiver)
3118
- @rank += @last_block
3119
- end
3120
-
3121
- end # class SortBased
3122
-
3123
- protected
3124
-
3125
- # (see Operator::CommandMethods#set_args)
3126
- def set_args(args)
3127
- unless args.empty?
3128
- self.ranking_name = args.first.to_sym
3129
- end
3130
- self
3131
- end
3132
-
3133
- def ordering_key
3134
- OrderingKey.coerce @order
3135
- end
3136
-
3137
- def longexpr
3138
- sort_key = ordering_key
3139
- chain SortBased.new(sort_key, @ranking_name),
3140
- Operator::NonRelational::Sort.new(sort_key),
3141
- datasets
3142
- end
3143
-
3144
- end # class Rank
3145
-
3146
- #
3147
- # Relational quota-queries (position, sum progression, etc.)
3148
- #
3149
- # SYNOPSIS
3150
- # #{program_name} #{command_name} [OPERAND] --by=KEY1,... --order=OR1... AGG1 EXPR1...
3151
- #
3152
- # OPTIONS
3153
- # #{summarized_options}
3154
- #
3155
- # API & EXAMPLE
3156
- #
3157
- # (quota :supplies, [:sid], [:qty],
3158
- # :position => Aggregator.count,
3159
- # :sum_qty => Aggregator.sum(:qty))
3160
- #
3161
- # DESCRIPTION
3162
- #
3163
- # This operator computes quota values on input tuples.
3164
- #
3165
- # alf quota supplies --by=sid --order=qty -- position count sum_qty "sum(:qty)"
3166
- #
3167
- class Quota < Factory::Operator(__FILE__, __LINE__)
3168
- include Operator::Relational, Operator::Experimental,
3169
- Operator::Shortcut, Operator::Unary
3170
-
3171
- # Quota by
3172
- attr_accessor :by
3173
-
3174
- # Quota order
3175
- attr_accessor :order
3176
-
3177
- # Quota aggregations
3178
- attr_accessor :aggregators
3179
-
3180
- def initialize(by = [], order = [], aggregators = {})
3181
- @by, @order, @aggregators = by, order, aggregators
3182
- end
3183
-
3184
- options do |opt|
3185
- opt.on('--by=x,y,z', 'Specify by attributes', Array) do |args|
3186
- @by = args.collect{|a| a.to_sym}
3187
- end
3188
- opt.on('--order=x,y,z', 'Specify order attributes', Array) do |args|
3189
- @order = args.collect{|a| a.to_sym}
3190
- end
3191
- end
3192
-
3193
- class SortBased
3194
- include Operator::Cesure
3195
-
3196
- def initialize(by, order, aggregators)
3197
- @by, @order, @aggregators = by, order, aggregators
3198
- end
3199
-
3200
- def cesure_key
3201
- ProjectionKey.coerce @by
3202
- end
3203
-
3204
- def ordering_key
3205
- OrderingKey.coerce @order
3206
- end
3207
-
3208
- def start_cesure(key, receiver)
3209
- @aggs = tuple_collect(@aggregators) do |a,agg|
3210
- [a, agg.least]
3211
- end
3212
- end
3213
-
3214
- def accumulate_cesure(tuple, receiver)
3215
- @aggs = tuple_collect(@aggregators) do |a,agg|
3216
- [a, agg.happens(@aggs[a], tuple)]
3217
- end
3218
- thisone = tuple_collect(@aggregators) do |a,agg|
3219
- [a, agg.finalize(@aggs[a])]
3220
- end
3221
- receiver.call tuple.merge(thisone)
3222
- end
3223
-
3224
- end # class SortBased
3225
-
3226
- protected
3227
-
3228
- # (see Operator::CommandMethods#set_args)
3229
- def set_args(args)
3230
- @aggregators = tuple_collect(args.each_slice(2)) do |a,expr|
3231
- [a.to_sym, Aggregator.compile(expr)]
3232
- end
3233
- self
3234
- end
3235
-
3236
- def cesure_key
3237
- ProjectionKey.coerce @by
3238
- end
3239
-
3240
- def ordering_key
3241
- OrderingKey.coerce @order
3242
- end
3243
-
3244
- def longexpr
3245
- sort_key = cesure_key.to_ordering_key + ordering_key
3246
- chain SortBased.new(@by, @order, @aggregators),
3247
- Operator::NonRelational::Sort.new(sort_key),
3248
- datasets
3249
- end
3250
-
3251
- end # class Quota
3252
-
3253
- end
3254
-
3255
- #
3256
- # Aggregation operator.
3257
- #
3258
- class Aggregator
3259
-
3260
- # Aggregate options
3261
- attr_reader :options
3262
-
3263
- #
3264
- # Automatically installs factory methods for inherited classes.
3265
- #
3266
- # Example:
3267
- # class Sum < Aggregate # will give a method Aggregator.sum
3268
- # ...
3269
- # end
3270
- # Aggregator.sum(:size) # factor an Sum aggregator on tuple[:size]
3271
- # Aggregator.sum{ size } # idem but works on any tuple expression
3272
- #
3273
- def self.inherited(clazz)
3274
- basename = Tools.ruby_case(Tools.class_name(clazz))
3275
- instance_eval <<-EOF
3276
- def #{basename}(*args, &block)
3277
- #{clazz}.new(*args, &block)
3278
- end
3279
- EOF
3280
- end
3281
-
3282
- def self.compile(expr, &block)
3283
- instance_eval(expr, &block)
3284
- end
3285
-
3286
- #
3287
- # Creates an Aggregator instance.
3288
- #
3289
- # This constructor can be used either by passing an attribute
3290
- # argument or a block that will be evaluated on a TupleHandle
3291
- # instance set on each aggregated tuple.
3292
- #
3293
- # Aggregator.new(:size) # will aggregate on tuple[:size]
3294
- # Aggregator.new{ size * price } # ... on tuple[:size] * tuple[:price]
3295
- #
3296
- def initialize(attribute = nil, options = {}, &block)
3297
- attribute, options = nil, attribute if attribute.is_a?(Hash)
3298
- @handle = Tools::TupleHandle.new
3299
- @options = default_options.merge(options)
3300
- @functor = Tools::TupleHandle.compile(attribute || block)
3301
- end
3302
-
3303
- #
3304
- # Returns the default options to use
3305
- #
3306
- def default_options
3307
- {}
3308
- end
3309
-
3310
- #
3311
- # Returns the least value, which is the one to use on an empty
3312
- # set.
3313
- #
3314
- # This method is intended to be overriden by subclasses; default
3315
- # implementation returns nil.
3316
- #
3317
- def least
3318
- nil
3319
- end
3320
-
3321
- #
3322
- # This method is called on each aggregated tuple and must return
3323
- # an updated _memo_ value. It can be seen as the block typically
3324
- # given to Enumerable.inject.
3325
- #
3326
- # The default implementation collects the pre-value on the tuple
3327
- # and delegates to _happens.
3328
- #
3329
- def happens(memo, tuple)
3330
- _happens(memo, @handle.set(tuple).evaluate(@functor))
3331
- end
3332
-
3333
- #
3334
- # This method finalizes a computation.
3335
- #
3336
- # Argument _memo_ is either _least_ or the result of aggregating
3337
- # through _happens_. The default implementation simply returns
3338
- # _memo_. The method is intended to be overriden for complex
3339
- # aggregations that need statefull information. See Avg for an
3340
- # example
3341
- #
3342
- def finalize(memo)
3343
- memo
3344
- end
3345
-
3346
- #
3347
- # Aggregates over an enumeration of tuples.
3348
- #
3349
- def aggregate(enum)
3350
- finalize(
3351
- enum.inject(least){|memo,tuple|
3352
- happens(memo, tuple)
3353
- })
3354
- end
3355
-
3356
- protected
3357
-
3358
- #
3359
- # @see happens.
3360
- #
3361
- # This method is intended to be overriden and returns _value_
3362
- # by default, making this aggregator a "Last" one...
3363
- #
3364
- def _happens(memo, value)
3365
- value
198
+ def Alf.Command()
199
+ Quickl::Command(){|builder|
200
+ builder.command_parent = Alf::Command::Main
201
+ builder.doc_extractor = DOC_EXTRACTOR
202
+ yield(builder) if block_given?
203
+ }
3366
204
  end
205
+
206
+ require 'alf/command/main'
207
+ require 'alf/command/exec'
208
+ require 'alf/command/help'
209
+ require 'alf/command/show'
210
+ end # module Command
3367
211
 
3368
- #
3369
- # Defines a COUNT aggregation operator
3370
- #
3371
- class Count < Aggregator
3372
- def least(); 0; end
3373
- def happens(memo, tuple) memo + 1; end
3374
- end # class Count
3375
-
3376
- #
3377
- # Defines a SUM aggregation operator
3378
- #
3379
- class Sum < Aggregator
3380
- def least(); 0; end
3381
- def _happens(memo, val) memo + val; end
3382
- end # class Sum
3383
-
3384
- #
3385
- # Defines an AVG aggregation operator
3386
- #
3387
- class Avg < Aggregator
3388
- def least(); [0.0, 0.0]; end
3389
- def _happens(memo, val) [memo.first + val, memo.last + 1]; end
3390
- def finalize(memo) memo.first / memo.last end
3391
- end # class Sum
3392
-
3393
- #
3394
- # Defines a MIN aggregation operator
3395
- #
3396
- class Min < Aggregator
3397
- def least(); nil; end
3398
- def _happens(memo, val)
3399
- memo.nil? ? val : (memo < val ? memo : val)
3400
- end
3401
- end # class Min
3402
-
3403
- #
3404
- # Defines a MAX aggregation operator
3405
- #
3406
- class Max < Aggregator
3407
- def least(); nil; end
3408
- def _happens(memo, val)
3409
- memo.nil? ? val : (memo > val ? memo : val)
3410
- end
3411
- end # class Max
3412
-
212
+ #
213
+ # Marker for all operators, relational and non-relational ones.
214
+ #
215
+ module Operator
216
+ include Iterator, Tools
217
+
3413
218
  #
3414
- # Defines a COLLECT aggregation operator
219
+ # Operator factory
3415
220
  #
3416
- class Group < Aggregator
3417
- def initialize(*attrs)
3418
- super(nil, {}){
3419
- Tools.tuple_collect(attrs){|k| [k, self.send(k)] }
3420
- }
3421
- end
3422
- def least(); Set.new; end
3423
- def _happens(memo, val)
3424
- memo << val
3425
- end
3426
- def finalize(memo)
3427
- Relation.coerce memo
221
+ def Alf.Operator()
222
+ Alf.Command() do |b|
223
+ b.instance_module Alf::Operator
3428
224
  end
3429
225
  end
226
+
227
+ require 'alf/operator/class_methods'
228
+ require 'alf/operator/signature'
229
+ require 'alf/operator/base'
230
+ require 'alf/operator/nullary'
231
+ require 'alf/operator/unary'
232
+ require 'alf/operator/binary'
233
+ require 'alf/operator/cesure'
234
+ require 'alf/operator/transform'
235
+ require 'alf/operator/shortcut'
236
+ require 'alf/operator/experimental'
237
+
3430
238
 
3431
239
  #
3432
- # Defines a COLLECT aggregation operator
3433
- #
3434
- class Collect < Aggregator
3435
- def least(); []; end
3436
- def _happens(memo, val)
3437
- memo << val
3438
- end
3439
- end
3440
-
3441
- #
3442
- # Defines a CONCAT aggregation operator
3443
- #
3444
- class Concat < Aggregator
3445
- def least(); ""; end
3446
- def default_options
3447
- {:before => "", :after => "", :between => ""}
3448
- end
3449
- def _happens(memo, val)
3450
- memo << options[:between].to_s unless memo.empty?
3451
- memo << val.to_s
3452
- end
3453
- def finalize(memo)
3454
- options[:before].to_s + memo + options[:after].to_s
3455
- end
3456
- end
3457
-
3458
- end # class Aggregator
3459
-
3460
- #
3461
- # Base class for implementing buffers.
3462
- #
3463
- class Buffer
3464
-
3465
- #
3466
- # Keeps tuples ordered on a specific key
3467
- #
3468
- # Example:
3469
- #
3470
- # sorted = Buffer::Sorted.new OrderingKey.new(...)
3471
- # sorted.add_all(...)
3472
- # sorted.each do |tuple|
3473
- # # tuples are ordered here
3474
- # end
240
+ # Marker module and namespace for non relational operators
3475
241
  #
3476
- class Sorted < Buffer
242
+ module NonRelational
243
+ require 'alf/operator/non_relational/autonum'
244
+ require 'alf/operator/non_relational/defaults'
245
+ require 'alf/operator/non_relational/compact'
246
+ require 'alf/operator/non_relational/sort'
247
+ require 'alf/operator/non_relational/clip'
248
+ require 'alf/operator/non_relational/coerce'
249
+ require 'alf/operator/non_relational/generator'
3477
250
 
3478
251
  #
3479
- # Creates a buffer instance with an ordering key
252
+ # Yields the block with each operator module in turn
3480
253
  #
3481
- def initialize(ordering_key)
3482
- @ordering_key = ordering_key
3483
- @buffer = []
3484
- end
3485
-
3486
- #
3487
- # Adds all elements of an iterator to the buffer
3488
- #
3489
- def add_all(enum)
3490
- sorter = @ordering_key.sorter
3491
- @buffer = merge_sort(@buffer, enum.to_a.sort(&sorter), sorter)
254
+ def self.each
255
+ constants.each do |c|
256
+ val = const_get(c)
257
+ yield(val) if val.ancestors.include?(Operator::NonRelational)
258
+ end
3492
259
  end
260
+
261
+ end # NonRelational
262
+
263
+ #
264
+ # Marker module and namespace for relational operators
265
+ #
266
+ module Relational
267
+ require 'alf/operator/relational/project'
268
+ require 'alf/operator/relational/extend'
269
+ require 'alf/operator/relational/rename'
270
+ require 'alf/operator/relational/restrict'
271
+ require 'alf/operator/relational/join'
272
+ require 'alf/operator/relational/intersect'
273
+ require 'alf/operator/relational/minus'
274
+ require 'alf/operator/relational/union'
275
+ require 'alf/operator/relational/matching'
276
+ require 'alf/operator/relational/not_matching'
277
+ require 'alf/operator/relational/wrap'
278
+ require 'alf/operator/relational/unwrap'
279
+ require 'alf/operator/relational/group'
280
+ require 'alf/operator/relational/ungroup'
281
+ require 'alf/operator/relational/summarize'
282
+ require 'alf/operator/relational/rank'
283
+ require 'alf/operator/relational/quota'
3493
284
 
285
+ #
286
+ # Yields the block with each operator module in turn
3494
287
  #
3495
- # (see Buffer#each)
3496
- #
3497
- def each
3498
- @buffer.each(&Proc.new)
288
+ def self.each
289
+ constants.each do |c|
290
+ val = const_get(c)
291
+ yield(val) if val.ancestors.include?(Operator::Relational)
292
+ end
3499
293
  end
3500
-
3501
- private
3502
294
 
3503
- # Implements a merge sort between two iterators s1 and s2
3504
- def merge_sort(s1, s2, sorter)
3505
- (s1 + s2).sort(&sorter)
3506
- end
3507
-
3508
- end # class Buffer::Sorted
295
+ end # module Relational
3509
296
 
3510
- end # class Buffer
297
+ end # module Operator
3511
298
 
3512
299
  #
3513
- # Defines a Heading, that is, a set of attribute (name,domain) pairs.
300
+ # Aggregation operator.
3514
301
  #
3515
- class Heading
3516
-
3517
- #
3518
- # Creates a Heading instance
3519
- #
3520
- # @param [Hash] a hash of attribute (name, type) pairs where name is
3521
- # a Symbol and type is a Class
3522
- #
3523
- def self.[](attributes)
3524
- Heading.new(attributes)
3525
- end
302
+ class Aggregator
303
+ require 'alf/aggregator/class_methods'
304
+ require 'alf/aggregator/base'
305
+ require 'alf/aggregator/aggregators'
3526
306
 
3527
- # @return [Hash] a (freezed) hash of (name, type) pairs
3528
- attr_reader :attributes
3529
-
3530
- #
3531
- # Creates a Heading instance
3532
- #
3533
- # @param [Hash] a hash of attribute (name, type) pairs where name is
3534
- # a Symbol and type is a Class
3535
- #
3536
- def initialize(attributes)
3537
- @attributes = attributes.dup.freeze
3538
- end
3539
-
3540
- #
3541
- # Returns heading's cardinality
3542
- #
3543
- def cardinality
3544
- attributes.size
3545
- end
3546
- alias :size :cardinality
3547
- alias :count :cardinality
3548
-
3549
- #
3550
- # Returns heading's hash code
3551
- #
3552
- def hash
3553
- @hash ||= attributes.hash
3554
- end
3555
-
3556
- #
3557
- # Checks equality with other heading
3558
- #
3559
- def ==(other)
3560
- other.is_a?(Heading) && (other.attributes == attributes)
3561
- end
3562
- alias :eql? :==
3563
-
3564
- #
3565
- # Converts this heading to a Hash of (name,type) pairs
3566
- #
3567
- def to_hash
3568
- attributes.dup
3569
- end
3570
-
3571
- #
3572
- # Returns a Heading literal
3573
- #
3574
- def to_ruby_literal
3575
- attributes.empty? ?
3576
- "Alf::Heading::EMPTY" :
3577
- "Alf::Heading[#{Myrrha.to_ruby_literal(attributes)[1...-1]}]"
3578
- end
3579
- alias :inspect :to_ruby_literal
3580
-
3581
- EMPTY = Alf::Heading.new({})
3582
- end # class Heading
307
+ end # class Aggregator
3583
308
 
309
+ #
310
+ # Base class for implementing buffers.
311
+ #
312
+ class Buffer
313
+ require 'alf/buffer/sorted'
314
+
315
+ end # class Buffer
316
+
3584
317
  #
3585
318
  # Defines an in-memory relation data structure.
3586
319
  #
@@ -3600,150 +333,10 @@ module Alf
3600
333
  #
3601
334
  class Relation
3602
335
  include Iterator
3603
-
3604
- protected
3605
-
3606
- # @return [Set] the set of tuples
3607
- attr_reader :tuples
3608
-
3609
- public
3610
-
3611
- #
3612
- # Creates a Relation instance.
3613
- #
3614
- # @param [Set] tuples a set of tuples
3615
- #
3616
- def initialize(tuples)
3617
- raise ArgumentError unless tuples.is_a?(Set)
3618
- @tuples = tuples
3619
- end
3620
-
3621
- #
3622
- # Coerces `val` to a relation.
3623
- #
3624
- # Recognized arguments are: Relation (identity coercion), Set of ruby hashes,
3625
- # Array of ruby hashes, Alf::Iterator.
3626
- #
3627
- # @return [Relation] a relation instance for the given set of tuples
3628
- # @raise [ArgumentError] when `val` is not recognized
3629
- #
3630
- def self.coerce(val)
3631
- case val
3632
- when Relation
3633
- val
3634
- when Set
3635
- Relation.new(val)
3636
- when Array
3637
- Relation.new val.to_set
3638
- when Iterator
3639
- Relation.new val.to_set
3640
- else
3641
- raise ArgumentError, "Unable to coerce #{val} to a Relation"
3642
- end
3643
- end
3644
-
3645
- # (see Relation.coerce)
3646
- def self.[](*tuples)
3647
- coerce(tuples)
3648
- end
3649
-
3650
- #
3651
- # (see Iterator#each)
3652
- #
3653
- def each(&block)
3654
- tuples.each(&block)
3655
- end
3656
-
3657
- #
3658
- # Returns relation's cardinality (number of tuples).
3659
- #
3660
- # @return [Integer] relation's cardinality
3661
- #
3662
- def cardinality
3663
- tuples.size
3664
- end
3665
- alias :size :cardinality
3666
- alias :count :cardinality
3667
-
3668
- # Returns true if this relation is empty
3669
- def empty?
3670
- cardinality == 0
3671
- end
3672
-
3673
- #
3674
- # Install the DSL through iteration over defined operators
3675
- #
3676
- Operator::each do |op_class|
3677
- meth_name = Tools.ruby_case(Tools.class_name(op_class)).to_sym
3678
- if op_class.unary?
3679
- define_method(meth_name) do |*args|
3680
- op = op_class.new(*args).pipe(self)
3681
- Relation.coerce(op)
3682
- end
3683
- elsif op_class.binary?
3684
- define_method(meth_name) do |right, *args|
3685
- op = op_class.new(*args).pipe([self, Iterator.coerce(right)])
3686
- Relation.coerce(op)
3687
- end
3688
- else
3689
- raise "Unexpected operator #{op_class}"
3690
- end
3691
- end # Operators::each
3692
-
3693
- alias :+ :union
3694
- alias :- :minus
3695
336
 
3696
- # Shortcut for project(attributes, true)
3697
- def allbut(attributes)
3698
- project(attributes, true)
3699
- end
3700
-
3701
- #
3702
- # (see Object#hash)
3703
- #
3704
- def hash
3705
- @tuples.hash
3706
- end
3707
-
3708
- #
3709
- # (see Object#==)
3710
- #
3711
- def ==(other)
3712
- return nil unless other.is_a?(Relation)
3713
- other.tuples == self.tuples
3714
- end
3715
- alias :eql? :==
3716
-
3717
- #
3718
- # Returns a textual representation of this relation
3719
- #
3720
- def to_s
3721
- Alf::Renderer.text(self).execute("")
3722
- end
3723
-
3724
- #
3725
- # Returns an array with all tuples in this relation.
3726
- #
3727
- # @param [Tools::OrderingKey] an optional ordering key (any argument
3728
- # recognized by OrderingKey.coerce is supported here).
3729
- # @return [Array] an array of hashes, in requested order (if specified)
3730
- #
3731
- def to_a(okey = nil)
3732
- okey = Tools::OrderingKey.coerce(okey) if okey
3733
- ary = tuples.to_a
3734
- ary.sort!(&okey.sorter) if okey
3735
- ary
3736
- end
3737
-
3738
- #
3739
- # Returns a literal representation of this relation
3740
- #
3741
- def to_ruby_literal
3742
- "Alf::Relation[" +
3743
- tuples.collect{|t| Myrrha.to_ruby_literal(t)}.join(', ') + "]"
3744
- end
3745
- alias :inspect :to_ruby_literal
3746
-
337
+ require "alf/relation/class_methods"
338
+ require "alf/relation/instance_methods"
339
+
3747
340
  DEE = Relation.coerce([{}])
3748
341
  DUM = Relation.coerce([])
3749
342
  end # class Relation
@@ -3764,123 +357,10 @@ module Alf
3764
357
  # is not intended to be directly included by third-party classes.
3765
358
  #
3766
359
  module Lispy
360
+ require 'alf/lispy/instance_methods'
3767
361
 
3768
- alias :ruby_extend :extend
3769
-
3770
- # The environment
3771
- attr_accessor :environment
3772
-
3773
- #
3774
- # Compiles a query expression given by a String or a block and returns
3775
- # the result (typically a tuple iterator)
3776
- #
3777
- # Example
3778
- #
3779
- # # with a string
3780
- # op = compile "(restrict :suppliers, lambda{ city == 'London' })"
3781
- #
3782
- # # or with a block
3783
- # op = compile {
3784
- # (restrict :suppliers, lambda{ city == 'London' })
3785
- # }
3786
- #
3787
- # @param [String] expr a Lispy expression to compile
3788
- # @return [Iterator] the iterator resulting from compilation
3789
- #
3790
- def compile(expr = nil, path = nil, &block)
3791
- if expr.nil?
3792
- instance_eval(&block)
3793
- else
3794
- b = _clean_binding
3795
- (path ? Kernel.eval(expr, b, path) : Kernel.eval(expr, b))
3796
- end
3797
- end
3798
-
3799
- #
3800
- # Evaluates a query expression given by a String or a block and returns
3801
- # the result as an in-memory relation (Alf::Relation)
3802
- #
3803
- # Example:
3804
- #
3805
- # # with a string
3806
- # rel = evaluate "(restrict :suppliers, lambda{ city == 'London' })"
3807
- #
3808
- # # or with a block
3809
- # rel = evaluate {
3810
- # (restrict :suppliers, lambda{ city == 'London' })
3811
- # }
3812
- #
3813
- def evaluate(expr = nil, path = nil, &block)
3814
- compile(expr, path, &block).to_rel
3815
- end
3816
-
3817
- #
3818
- # Delegated to the current environment
3819
- #
3820
- # This method returns the dataset associated to a given name. The result
3821
- # may depend on the current environment, but is generally an Iterator,
3822
- # often a Reader instance.
3823
- #
3824
- # @param [Symbol] name name of the dataset to retrieve
3825
- # @return [Iterator] the dataset as an iterator
3826
- # @see Environment#dataset
3827
- #
3828
- def dataset(name)
3829
- raise "Environment not set" unless @environment
3830
- @environment.dataset(name)
3831
- end
3832
-
3833
- # Functional equivalent to Alf::Relation[...]
3834
- def relation(*tuples)
3835
- Relation.coerce(tuples)
3836
- end
3837
-
3838
- #
3839
- # Install the DSL through iteration over defined operators
3840
- #
3841
- Operator::each do |op_class|
3842
- meth_name = Tools.ruby_case(Tools.class_name(op_class)).to_sym
3843
- if op_class.unary?
3844
- define_method(meth_name) do |child, *args|
3845
- child = Iterator.coerce(child, environment)
3846
- op_class.new(*args).pipe(child, environment)
3847
- end
3848
- elsif op_class.binary?
3849
- define_method(meth_name) do |left, right, *args|
3850
- operands = [left, right].collect{|x| Iterator.coerce(x, environment)}
3851
- op_class.new(*args).pipe(operands, environment)
3852
- end
3853
- else
3854
- raise "Unexpected operator #{op_class}"
3855
- end
3856
- end # Operators::each
3857
-
3858
- def allbut(child, attributes)
3859
- (project child, attributes, true)
3860
- end
3861
-
3862
- #
3863
- # Runs a command as in shell.
3864
- #
3865
- # Example:
3866
- #
3867
- # lispy = Alf.lispy(Alf::Environment.examples)
3868
- # op = lispy.run(['restrict', 'suppliers', '--', "city == 'Paris'"])
3869
- #
3870
- def run(argv, requester = nil)
3871
- Alf::Command::Main.new(environment).run(argv, requester)
3872
- end
3873
-
3874
- Agg = Alf::Aggregator
3875
362
  DUM = Relation::DUM
3876
363
  DEE = Relation::DEE
3877
-
3878
- private
3879
-
3880
- def _clean_binding
3881
- binding
3882
- end
3883
-
3884
364
  end # module Lispy
3885
365
 
3886
366
  #
@@ -3906,5 +386,4 @@ module Alf
3906
386
  end
3907
387
 
3908
388
  end # module Alf
3909
- require "alf/text"
3910
- require "alf/yaml"
389
+ require "alf/extra"