alf 0.9.3 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (270) hide show
  1. data/CHANGELOG.md +255 -129
  2. data/Gemfile +31 -1
  3. data/Gemfile.lock +17 -20
  4. data/LICENCE.md +1 -1
  5. data/Manifest.txt +2 -0
  6. data/README.md +37 -43
  7. data/TODO.md +1 -1
  8. data/alf.gemspec +10 -7
  9. data/alf.noespec +24 -13
  10. data/bin/alf +2 -2
  11. data/doc/commands/exec.md +16 -0
  12. data/doc/commands/help.md +11 -0
  13. data/doc/commands/main.md +33 -0
  14. data/doc/commands/show.md +19 -0
  15. data/doc/operators/non_relational/autonum.md +23 -0
  16. data/doc/operators/non_relational/clip.md +31 -0
  17. data/doc/operators/non_relational/coerce.md +15 -0
  18. data/doc/operators/non_relational/compact.md +20 -0
  19. data/doc/operators/non_relational/defaults.md +32 -0
  20. data/doc/operators/non_relational/generator.md +20 -0
  21. data/doc/operators/non_relational/sort.md +24 -0
  22. data/doc/operators/relational/extend.md +18 -0
  23. data/doc/operators/relational/group.md +27 -0
  24. data/doc/operators/relational/intersect.md +13 -0
  25. data/doc/operators/relational/join.md +27 -0
  26. data/doc/operators/relational/matching.md +20 -0
  27. data/doc/operators/relational/minus.md +12 -0
  28. data/doc/operators/relational/not-matching.md +20 -0
  29. data/doc/operators/relational/project.md +28 -0
  30. data/doc/operators/relational/quota.md +21 -0
  31. data/doc/operators/relational/rank.md +27 -0
  32. data/doc/operators/relational/rename.md +17 -0
  33. data/doc/operators/relational/restrict.md +25 -0
  34. data/doc/operators/relational/summarize.md +25 -0
  35. data/doc/operators/relational/ungroup.md +20 -0
  36. data/doc/operators/relational/union.md +14 -0
  37. data/doc/operators/relational/unwrap.md +20 -0
  38. data/doc/operators/relational/wrap.md +24 -0
  39. data/examples/csv/suppliers.csv +6 -0
  40. data/examples/logs/access.log +1000 -0
  41. data/examples/logs/combined.alf +2 -0
  42. data/examples/logs/hits.alf +14 -0
  43. data/examples/logs/not_found.alf +7 -0
  44. data/examples/logs/robots-cheating.alf +11 -0
  45. data/examples/logs/robots.alf +8 -0
  46. data/examples/northwind/customers.csv +92 -0
  47. data/examples/northwind/northwind.db +0 -0
  48. data/examples/northwind/orders.csv +831 -0
  49. data/examples/operators/clip.alf +1 -1
  50. data/examples/operators/database.alf +5 -6
  51. data/examples/operators/defaults.alf +1 -1
  52. data/examples/operators/group.alf +1 -1
  53. data/examples/operators/project.alf +2 -1
  54. data/examples/operators/pseudo-with.alf +2 -2
  55. data/examples/operators/quota.alf +2 -2
  56. data/examples/operators/summarize.alf +2 -2
  57. data/lib/alf/aggregator/aggregators.rb +77 -0
  58. data/lib/alf/aggregator/base.rb +95 -0
  59. data/lib/alf/aggregator/class_methods.rb +57 -0
  60. data/lib/alf/buffer/sorted.rb +48 -0
  61. data/lib/alf/command/class_methods.rb +27 -0
  62. data/lib/alf/command/doc_manager.rb +72 -0
  63. data/lib/alf/command/exec.rb +12 -0
  64. data/lib/alf/command/help.rb +31 -0
  65. data/lib/alf/command/main.rb +146 -0
  66. data/lib/alf/command/show.rb +33 -0
  67. data/lib/alf/environment/base.rb +37 -0
  68. data/lib/alf/environment/class_methods.rb +93 -0
  69. data/lib/alf/environment/explicit.rb +38 -0
  70. data/lib/alf/environment/folder.rb +62 -0
  71. data/lib/alf/extra/csv.rb +104 -0
  72. data/lib/alf/extra/logs.rb +100 -0
  73. data/lib/alf/extra/sequel.rb +77 -0
  74. data/lib/alf/{yaml.rb → extra/yaml.rb} +0 -0
  75. data/lib/alf/extra.rb +5 -0
  76. data/lib/alf/iterator/base.rb +38 -0
  77. data/lib/alf/iterator/class_methods.rb +22 -0
  78. data/lib/alf/iterator/proxy.rb +33 -0
  79. data/lib/alf/lispy/instance_methods.rb +157 -0
  80. data/lib/alf/operator/base.rb +74 -0
  81. data/lib/alf/operator/binary.rb +32 -0
  82. data/lib/alf/operator/cesure.rb +45 -0
  83. data/lib/alf/operator/class_methods.rb +132 -0
  84. data/lib/alf/operator/experimental.rb +9 -0
  85. data/lib/alf/operator/non_relational/autonum.rb +24 -0
  86. data/lib/alf/operator/non_relational/clip.rb +20 -0
  87. data/lib/alf/operator/non_relational/coerce.rb +21 -0
  88. data/lib/alf/operator/non_relational/compact.rb +62 -0
  89. data/lib/alf/operator/non_relational/defaults.rb +25 -0
  90. data/lib/alf/operator/non_relational/generator.rb +38 -0
  91. data/lib/alf/operator/non_relational/sort.rb +23 -0
  92. data/lib/alf/operator/nullary.rb +20 -0
  93. data/lib/alf/operator/relational/extend.rb +24 -0
  94. data/lib/alf/operator/relational/group.rb +32 -0
  95. data/lib/alf/operator/relational/intersect.rb +37 -0
  96. data/lib/alf/operator/relational/join.rb +106 -0
  97. data/lib/alf/operator/relational/matching.rb +45 -0
  98. data/lib/alf/operator/relational/minus.rb +37 -0
  99. data/lib/alf/operator/relational/not_matching.rb +45 -0
  100. data/lib/alf/operator/relational/project.rb +22 -0
  101. data/lib/alf/operator/relational/quota.rb +51 -0
  102. data/lib/alf/operator/relational/rank.rb +55 -0
  103. data/lib/alf/operator/relational/rename.rb +19 -0
  104. data/lib/alf/operator/relational/restrict.rb +20 -0
  105. data/lib/alf/operator/relational/summarize.rb +83 -0
  106. data/lib/alf/operator/relational/ungroup.rb +25 -0
  107. data/lib/alf/operator/relational/union.rb +32 -0
  108. data/lib/alf/operator/relational/unwrap.rb +21 -0
  109. data/lib/alf/operator/relational/wrap.rb +22 -0
  110. data/lib/alf/operator/shortcut.rb +53 -0
  111. data/lib/alf/operator/signature.rb +262 -0
  112. data/lib/alf/operator/transform.rb +27 -0
  113. data/lib/alf/operator/unary.rb +38 -0
  114. data/lib/alf/reader/alf_file.rb +24 -0
  115. data/lib/alf/reader/base.rb +119 -0
  116. data/lib/alf/reader/class_methods.rb +82 -0
  117. data/lib/alf/reader/rash.rb +28 -0
  118. data/lib/alf/relation/class_methods.rb +37 -0
  119. data/lib/alf/relation/instance_methods.rb +127 -0
  120. data/lib/alf/renderer/base.rb +72 -0
  121. data/lib/alf/renderer/class_methods.rb +58 -0
  122. data/lib/alf/renderer/rash.rb +19 -0
  123. data/lib/alf/{text.rb → renderer/text.rb} +1 -1
  124. data/lib/alf/tools/coerce.rb +14 -0
  125. data/lib/alf/tools/miscellaneous.rb +77 -0
  126. data/lib/alf/tools/to_lispy.rb +99 -0
  127. data/lib/alf/tools/to_ruby_literal.rb +14 -0
  128. data/lib/alf/tools/tuple_handle.rb +50 -0
  129. data/lib/alf/types/attr_list.rb +56 -0
  130. data/lib/alf/types/attr_name.rb +28 -0
  131. data/lib/alf/types/boolean.rb +12 -0
  132. data/lib/alf/types/heading.rb +96 -0
  133. data/lib/alf/types/ordering.rb +93 -0
  134. data/lib/alf/types/renaming.rb +57 -0
  135. data/lib/alf/types/summarization.rb +76 -0
  136. data/lib/alf/types/tuple_computation.rb +61 -0
  137. data/lib/alf/types/tuple_expression.rb +61 -0
  138. data/lib/alf/types/tuple_predicate.rb +49 -0
  139. data/lib/alf/version.rb +2 -2
  140. data/lib/alf.rb +193 -3714
  141. data/spec/integration/__database__/group.alf +1 -1
  142. data/spec/integration/__database__/suppliers_csv.csv +6 -0
  143. data/spec/integration/command/alf/alf.db +0 -0
  144. data/spec/integration/command/alf/alf_env_sqlite.cmd +1 -0
  145. data/spec/integration/command/alf/alf_env_sqlite.stdout +9 -0
  146. data/spec/integration/command/alf/alf_help.cmd +1 -0
  147. data/spec/integration/command/alf/alf_help.stdout +67 -0
  148. data/spec/integration/command/autonum/autonum_0.cmd +1 -1
  149. data/spec/integration/command/coerce/coerce_1.cmd +1 -0
  150. data/spec/integration/command/coerce/coerce_1.stdout +5 -0
  151. data/spec/integration/command/defaults/defaults_0.cmd +1 -1
  152. data/spec/integration/command/defaults/defaults_0.stdout +9 -9
  153. data/spec/integration/command/defaults/defaults_2.cmd +1 -0
  154. data/spec/integration/command/defaults/defaults_2.stdout +9 -0
  155. data/spec/integration/command/generator/generator_1.cmd +1 -0
  156. data/spec/integration/command/generator/generator_1.stdout +10 -0
  157. data/spec/integration/command/generator/generator_2.cmd +1 -0
  158. data/spec/integration/command/generator/generator_2.stdout +5 -0
  159. data/spec/integration/command/generator/generator_3.cmd +1 -0
  160. data/spec/integration/command/generator/generator_3.stdout +5 -0
  161. data/spec/integration/command/group/group_0.cmd +1 -1
  162. data/spec/integration/command/group/group_1.cmd +1 -1
  163. data/spec/integration/command/help/help_1.cmd +1 -0
  164. data/spec/integration/command/help/help_1.stdout +22 -0
  165. data/spec/integration/command/quota/quota_0.cmd +1 -1
  166. data/spec/integration/command/rank/rank_1.cmd +1 -1
  167. data/spec/integration/command/rank/rank_1.stdout +10 -10
  168. data/spec/integration/command/rank/rank_2.cmd +1 -1
  169. data/spec/integration/command/rank/rank_2.stdout +10 -10
  170. data/spec/integration/command/rank/rank_3.cmd +1 -1
  171. data/spec/integration/command/rank/rank_3.stdout +10 -10
  172. data/spec/integration/command/rank/rank_4.cmd +1 -1
  173. data/spec/integration/command/rank/rank_5.cmd +1 -1
  174. data/spec/integration/command/show/show_csv.cmd +1 -0
  175. data/spec/integration/command/show/show_csv.stdout +6 -0
  176. data/spec/integration/command/show/show_rash_2.cmd +1 -1
  177. data/spec/integration/command/show/show_rash_2.stdout +5 -5
  178. data/spec/integration/command/sort/sort_0.cmd +1 -1
  179. data/spec/integration/command/sort/sort_1.cmd +1 -1
  180. data/spec/integration/command/sort/sort_1.stdout +2 -2
  181. data/spec/integration/command/sort/sort_2.cmd +1 -0
  182. data/spec/integration/command/sort/sort_2.stdout +9 -0
  183. data/spec/integration/command/sort/sort_3.cmd +1 -0
  184. data/spec/integration/command/sort/sort_3.stdout +9 -0
  185. data/spec/integration/command/summarize/summarize_0.cmd +1 -1
  186. data/spec/integration/command/ungroup/ungroup_0.cmd +1 -1
  187. data/spec/integration/command/wrap/wrap_0.cmd +1 -1
  188. data/spec/integration/semantics/test_project.alf +5 -6
  189. data/spec/integration/semantics/test_rank.alf +16 -16
  190. data/spec/integration/test_command.rb +17 -6
  191. data/spec/integration/test_examples.rb +1 -1
  192. data/spec/regression/logs/apache_combined.log +5 -0
  193. data/spec/regression/logs/test_path_attribute.rb +25 -0
  194. data/spec/regression/relation/test_relation_allbut_all.rb +14 -0
  195. data/spec/shared/an_operator_class.rb +10 -5
  196. data/spec/spec_helper.rb +1 -7
  197. data/spec/unit/assumptions/test_set.rb +64 -0
  198. data/spec/unit/command/doc_manager/dynamic.md +1 -0
  199. data/spec/unit/command/doc_manager/example.md +1 -0
  200. data/spec/unit/command/doc_manager/example_1.txt +11 -0
  201. data/spec/unit/command/doc_manager/static.md +1 -0
  202. data/spec/unit/command/doc_manager/test_call.rb +49 -0
  203. data/spec/unit/csv/input.csv +3 -0
  204. data/spec/unit/csv/test_reader.rb +66 -0
  205. data/spec/unit/csv/test_renderer.rb +73 -0
  206. data/spec/unit/lispy/test_relation.rb +37 -0
  207. data/spec/unit/lispy/test_run.rb +40 -0
  208. data/spec/unit/lispy/test_tuple.rb +36 -0
  209. data/spec/unit/logs/apache_combined.log +5 -0
  210. data/spec/unit/logs/postgresql.log +29 -0
  211. data/spec/unit/logs/test_reader.rb +56 -0
  212. data/spec/unit/operator/non_relational/compact/{buffer_based.rb → test_buffer_based.rb} +0 -0
  213. data/spec/unit/operator/non_relational/test_clip.rb +1 -1
  214. data/spec/unit/operator/non_relational/test_coerce.rb +35 -0
  215. data/spec/unit/operator/non_relational/test_defaults.rb +15 -2
  216. data/spec/unit/operator/non_relational/test_generator.rb +78 -0
  217. data/spec/unit/operator/relational/join/test_hash_based.rb +4 -4
  218. data/spec/unit/operator/relational/matching/test_hash_based.rb +6 -6
  219. data/spec/unit/operator/relational/not_matching/test_hash_based.rb +4 -4
  220. data/spec/unit/operator/relational/summarize/test_hash_based.rb +10 -6
  221. data/spec/unit/operator/relational/summarize/test_sort_based.rb +18 -7
  222. data/spec/unit/operator/relational/test_group.rb +8 -8
  223. data/spec/unit/operator/relational/test_intersect.rb +3 -3
  224. data/spec/unit/operator/relational/test_minus.rb +3 -3
  225. data/spec/unit/operator/relational/test_project.rb +12 -2
  226. data/spec/unit/operator/relational/test_quota.rb +5 -6
  227. data/spec/unit/operator/relational/test_summarize.rb +9 -11
  228. data/spec/unit/operator/relational/test_union.rb +1 -1
  229. data/spec/unit/operator/relational/test_wrap.rb +1 -1
  230. data/spec/unit/operator/signature/test_collect_on.rb +45 -0
  231. data/spec/unit/operator/signature/test_initialize.rb +17 -0
  232. data/spec/unit/operator/signature/test_install.rb +56 -0
  233. data/spec/unit/operator/signature/test_option_parser.rb +36 -0
  234. data/spec/unit/operator/signature/test_parse_args.rb +60 -0
  235. data/spec/unit/operator/signature/test_parse_argv.rb +87 -0
  236. data/spec/unit/operator/signature/test_to_lispy.rb +102 -0
  237. data/spec/unit/operator/signature/test_to_shell.rb +103 -0
  238. data/spec/unit/operator/test_non_relational.rb +3 -1
  239. data/spec/unit/relation/test_relops.rb +20 -15
  240. data/spec/unit/sequel/alf.db +0 -0
  241. data/spec/unit/sequel/test_environment.rb +54 -0
  242. data/spec/unit/test_aggregator.rb +32 -22
  243. data/spec/unit/test_environment.rb +5 -0
  244. data/spec/unit/test_lispy.rb +4 -0
  245. data/spec/unit/test_relation.rb +5 -0
  246. data/spec/unit/text/test_cell.rb +6 -6
  247. data/spec/unit/text/test_row.rb +3 -3
  248. data/spec/unit/text/test_table.rb +6 -6
  249. data/spec/unit/tools/test_coalesce.rb +15 -0
  250. data/spec/unit/tools/test_coerce.rb +10 -0
  251. data/spec/unit/tools/test_to_lispy.rb +138 -0
  252. data/spec/unit/tools/test_to_ruby_literal.rb +10 -0
  253. data/spec/unit/tools/test_tuple_handle.rb +1 -59
  254. data/spec/unit/types/test_attr_list.rb +106 -0
  255. data/spec/unit/types/test_attr_name.rb +52 -0
  256. data/spec/unit/{test_heading.rb → types/test_heading.rb} +10 -0
  257. data/spec/unit/types/test_ordering.rb +127 -0
  258. data/spec/unit/types/test_renaming.rb +55 -0
  259. data/spec/unit/types/test_summarization.rb +63 -0
  260. data/spec/unit/types/test_tuple_computation.rb +60 -0
  261. data/spec/unit/types/test_tuple_expression.rb +64 -0
  262. data/spec/unit/types/test_tuple_predicate.rb +79 -0
  263. data/tasks/debug_mail.rake +1 -1
  264. data/tasks/debug_mail.txt +5 -0
  265. data/tasks/gh-pages.rake +63 -0
  266. metadata +325 -52
  267. data/spec/unit/operator/test_command_methods.rb +0 -38
  268. data/spec/unit/tools/test_ordering_key.rb +0 -94
  269. data/spec/unit/tools/test_parse_commandline_args.rb +0 -47
  270. data/spec/unit/tools/test_projection_key.rb +0 -83
data/lib/alf.rb CHANGED
@@ -12,338 +12,37 @@ require 'myrrha/coerce'
12
12
  # Classy data-manipulation dressed in a DSL (+ commandline)
13
13
  #
14
14
  module Alf
15
-
16
- #
17
- # Provides tooling methods that are used here and there in Alf.
18
- #
19
- module Tools
20
-
21
- #
22
- # Parse a string with commandline arguments and returns an array.
23
- #
24
- # Example:
25
- #
26
- # parse_commandline_args("--text --size=10") # => ['--text', '--size=10']
27
- #
28
- def parse_commandline_args(args)
29
- args = args.split(/\s+/)
30
- result = []
31
- until args.empty?
32
- if args.first[0,1] == '"'
33
- if args.first[-1,1] == '"'
34
- result << args.shift[1...-1]
35
- else
36
- block = [ args.shift[1..-1] ]
37
- while args.first[-1,1] != '"'
38
- block << args.shift
39
- end
40
- block << args.shift[0...-1]
41
- result << block.join(" ")
42
- end
43
- elsif args.first[0,1] == "'"
44
- if args.first[-1,1] == "'"
45
- result << args.shift[1...-1]
46
- else
47
- block = [ args.shift[1..-1] ]
48
- while args.first[-1,1] != "'"
49
- block << args.shift
50
- end
51
- block << args.shift[0...-1]
52
- result << block.join(" ")
53
- end
54
- else
55
- result << args.shift
56
- end
57
- end
58
- result
59
- end
60
15
 
61
- # Helper to define methods with multiple signatures.
62
- #
63
- # Example:
64
- #
65
- # varargs([1, "hello"], [Integer, String]) # => [1, "hello"]
66
- # varargs(["hello"], [Integer, String]) # => [nil, "hello"]
67
- #
68
- def varargs(args, types)
69
- types.collect{|t| t===args.first ? args.shift : nil}
70
- end
71
-
72
- #
73
- # Attempt to require(who) the most friendly way as possible.
74
- #
75
- def friendly_require(who, dep = nil, retried = false)
76
- gem(who, dep) if dep && defined?(Gem)
77
- require who
78
- rescue LoadError => ex
79
- if retried
80
- raise "Unable to require #{who}, which is now needed\n"\
81
- "Try 'gem install #{who}'"
82
- else
83
- require 'rubygems' unless defined?(Gem)
84
- friendly_require(who, dep, true)
85
- end
86
- end
16
+ #
17
+ # Encapsulates all types
18
+ #
19
+ module Types
20
+ require 'alf/types/attr_name'
21
+ require 'alf/types/boolean'
22
+ require 'alf/types/heading'
23
+ require 'alf/types/ordering'
24
+ require 'alf/types/attr_list'
25
+ require 'alf/types/renaming'
26
+ require 'alf/types/tuple_expression'
27
+ require 'alf/types/tuple_predicate'
28
+ require 'alf/types/summarization'
29
+ require 'alf/types/tuple_computation'
87
30
 
88
- # Returns the unqualified name of a ruby class or module
89
- #
90
- # Example
91
- #
92
- # class_name(Alf::Tools) -> :Tools
93
- #
94
- def class_name(clazz)
95
- clazz.name.to_s =~ /([A-Za-z0-9_]+)$/
96
- $1.to_sym
97
- end
98
-
99
- #
100
- # Converts an unqualified class or module name to a ruby case method name.
101
- #
102
- # Example
103
- #
104
- # ruby_case(:Alf) -> "alf"
105
- # ruby_case(:HelloWorld) -> "hello_world"
106
- #
107
- def ruby_case(s)
108
- s.to_s.gsub(/[A-Z]/){|x| "_#{x.downcase}"}[1..-1]
109
- end
110
-
111
- #
112
- # Returns the first non nil values from arguments
113
- #
114
- # Example
115
- #
116
- # coalesce(nil, 1, "abc") -> 1
117
- #
118
- def coalesce(*args)
119
- args.find{|x| !x.nil?}
120
- end
121
-
122
- #
123
- # Iterates over enum and yields the block on each element.
124
- # Collect block results as key/value pairs returns them as
125
- # a Hash.
126
- #
127
- def tuple_collect(enum)
128
- tuple = {}
129
- enum.each do |elm|
130
- k, v = yield(elm)
131
- tuple[k] = v
132
- end
133
- tuple
31
+ # Install all types on Alf now
32
+ constants.each do |s|
33
+ Alf.const_set(s, const_get(s))
134
34
  end
35
+ end
135
36
 
136
- #
137
- # Provides a handle, implementing a flyweight design pattern on tuples.
138
- #
139
- class TupleHandle
140
-
141
- # Creates an handle instance
142
- def initialize
143
- @tuple = nil
144
- end
145
-
146
- #
147
- # Sets the next tuple to use.
148
- #
149
- # This method installs the handle as a side effect
150
- # on first call.
151
- #
152
- def set(tuple)
153
- build(tuple) if @tuple.nil?
154
- @tuple = tuple
155
- self
156
- end
157
-
158
- #
159
- # Compiles a tuple expression and returns a lambda
160
- # instance that can be passed to evaluate later.
161
- #
162
- def self.compile(expr)
163
- case expr
164
- when Proc
165
- expr
166
- when NilClass
167
- compile('true')
168
- when Hash
169
- if expr.empty?
170
- compile(nil)
171
- else
172
- compile expr.each_pair.collect{|k,v|
173
- "(self.#{k} == #{Myrrha.to_ruby_literal(v)})"
174
- }.join(" && ")
175
- end
176
- when Array
177
- compile(Hash[*expr])
178
- when String, Symbol
179
- eval("lambda{ #{expr} }")
180
- else
181
- raise ArgumentError, "Unable to compile #{expr} to a TupleHandle"
182
- end
183
- end
184
-
185
- #
186
- # Evaluates an expression on the current tuple. Expression
187
- # can be a lambda or a string (immediately compiled in the
188
- # later case).
189
- #
190
- def evaluate(expr)
191
- if RUBY_VERSION < "1.9"
192
- instance_eval(&TupleHandle.compile(expr))
193
- else
194
- instance_exec(&TupleHandle.compile(expr))
195
- end
196
- end
197
-
198
- private
199
-
200
- #
201
- # Builds this handle with a tuple.
202
- #
203
- # This method should be called only once and installs
204
- # instance methods on the handle with keys of _tuple_.
205
- #
206
- def build(tuple)
207
- tuple.keys.each do |k|
208
- (class << self; self; end).send(:define_method, k) do
209
- @tuple[k]
210
- end
211
- end
212
- end
213
-
214
- end # class TupleHandle
215
-
216
- #
217
- # Defines a projection key
218
- #
219
- class ProjectionKey
220
- include Tools
221
-
222
- # Projection attributes
223
- attr_accessor :attributes
224
-
225
- # Allbut projection?
226
- attr_accessor :allbut
227
-
228
- def initialize(attributes, allbut = false)
229
- @attributes = attributes
230
- @allbut = allbut
231
- end
232
-
233
- def self.coerce(arg)
234
- case arg
235
- when Array
236
- ProjectionKey.new(arg, false)
237
- when OrderingKey
238
- ProjectionKey.new(arg.attributes, false)
239
- when ProjectionKey
240
- arg
241
- else
242
- raise ArgumentError, "Unable to coerce #{arg} to a projection key"
243
- end
244
- end
245
-
246
- def to_ordering_key
247
- OrderingKey.new attributes.collect{|arg|
248
- [arg, :asc]
249
- }
250
- end
251
-
252
- def project(tuple)
253
- split(tuple).first
254
- end
255
-
256
- def split(tuple)
257
- projection, rest = {}, tuple.dup
258
- attributes.each do |a|
259
- projection[a] = tuple[a]
260
- rest.delete(a)
261
- end
262
- @allbut ? [rest, projection] : [projection, rest]
263
- end
264
-
265
- end # class ProjectionKey
266
-
267
- #
268
- # Encapsulates tools for computing orders on tuples
269
- #
270
- class OrderingKey
271
-
272
- attr_reader :ordering
273
-
274
- def initialize(ordering = [])
275
- @ordering = ordering
276
- @sorter = nil
277
- end
278
-
279
- #
280
- # Coerces `arg` to an ordering key.
281
- #
282
- # Implemented coercions are:
283
- # * Array of symbols (all attributes in ascending order)
284
- # * Array of [Symbol, :asc|:desc] pairs (obvious semantics)
285
- # * ProjectionKey (all its attributes in ascending order)
286
- # * OrderingKey (self)
287
- #
288
- # @return [OrderingKey]
289
- # @raises [ArgumentError] when `arg` is not recognized
290
- #
291
- def self.coerce(arg)
292
- case arg
293
- when Array
294
- if arg.all?{|a| a.is_a?(Array)}
295
- OrderingKey.new(arg)
296
- elsif arg.all?{|a| a.is_a?(Symbol)}
297
- sliced = arg.each_slice(2)
298
- if sliced.all?{|a,o| [:asc,:desc].include?(o)}
299
- OrderingKey.new sliced.to_a
300
- else
301
- OrderingKey.new arg.collect{|a| [a, :asc]}
302
- end
303
- end
304
- when ProjectionKey
305
- arg.to_ordering_key
306
- when OrderingKey
307
- arg
308
- else
309
- raise ArgumentError, "Unable to coerce #{arg} to an ordering key"
310
- end
311
- end
312
-
313
- def attributes
314
- @ordering.collect{|arg| arg.first}
315
- end
316
-
317
- def order_by(attr, order = :asc)
318
- @ordering << [attr, order]
319
- @sorter = nil
320
- self
321
- end
322
-
323
- def order_of(attr)
324
- @ordering.find{|arg| arg.first == attr}.last
325
- end
326
-
327
- def compare(t1,t2)
328
- @ordering.each do |attr,order|
329
- x, y = t1[attr], t2[attr]
330
- comp = x.respond_to?(:<=>) ? (x <=> y) : (x.to_s <=> y.to_s)
331
- comp *= -1 if order == :desc
332
- return comp unless comp == 0
333
- end
334
- return 0
335
- end
336
-
337
- def sorter
338
- @sorter ||= lambda{|t1,t2| compare(t1, t2)}
339
- end
340
-
341
- def +(other)
342
- other = OrderingKey.coerce(other)
343
- OrderingKey.new(@ordering + other.ordering)
344
- end
345
-
346
- end # class OrderingKey
37
+ #
38
+ # Provides tooling methods that are used here and there in Alf.
39
+ #
40
+ module Tools
41
+ require 'alf/tools/coerce'
42
+ require 'alf/tools/to_ruby_literal'
43
+ require 'alf/tools/to_lispy'
44
+ require 'alf/tools/tuple_handle'
45
+ require 'alf/tools/miscellaneous'
347
46
 
348
47
  extend Tools
349
48
  end # module Tools
@@ -375,211 +74,11 @@ module Alf
375
74
  # for details.
376
75
  #
377
76
  class Environment
378
-
379
- # Registered environments
380
- @@environments = []
381
-
382
- #
383
- # Register an environment class under a specific name.
384
- #
385
- # Registered class must implement a recognizes? method that takes an array
386
- # of arguments; it must returns true if an environment instance can be built
387
- # using those arguments, false otherwise. Please be very specific in the
388
- # implementation for returning true. See also autodetect and recognizes?
389
- #
390
- # @param [Symbol] name name of the environment kind
391
- # @param [Class] clazz class that implemented the environment
392
- #
393
- def self.register(name, clazz)
394
- @@environments << [name, clazz]
395
- (class << self; self; end).
396
- send(:define_method, name) do |*args|
397
- clazz.new(*args)
398
- end
399
- end
400
-
401
- #
402
- # Auto-detect the environment to use for specific arguments.
403
- #
404
- # This method returns an instance of the first registered Environment class
405
- # that returns true to an invocation of recognizes?(args). It raises an
406
- # ArgumentError if no such class can be found.
407
- #
408
- # @return [Environment] an environment instance
409
- # @raise [ArgumentError] when no registered class recognizes the arguments
410
- #
411
- def self.autodetect(*args)
412
- if (args.size == 1) && args.first.is_a?(Environment)
413
- return args.first
414
- else
415
- @@environments.each do |name,clazz|
416
- return clazz.new(*args) if clazz.recognizes?(args)
417
- end
418
- end
419
- raise ArgumentError, "Unable to auto-detect Environment with #{args.inspect}"
420
- end
421
-
422
- #
423
- # (see Environment.autodetect)
424
- #
425
- def self.coerce(*args)
426
- autodetect(*args)
427
- end
428
-
429
- #
430
- # Returns true _args_ can be used for building an environment instance,
431
- # false otherwise.
432
- #
433
- # When returning true, an immediate invocation of new(*args) should
434
- # succeed. While runtime exception are admitted (no such database, for
435
- # example), argument errors should not occur (missing argument, wrong
436
- # typing, etc.).
437
- #
438
- # Please be specific in the implementation of this extension point, as
439
- # registered environments for a chain and each of them should have a
440
- # chance of being selected.
441
- #
442
- def self.recognizes?(args)
443
- false
444
- end
445
-
446
- #
447
- # Returns a dataset whose name is provided.
448
- #
449
- # This method resolves named datasets to tuple enumerables. When the
450
- # dataset exists, this method must return an Iterator, typically a
451
- # Reader instance. Otherwise, it must throw a NoSuchDatasetError.
452
- #
453
- # @param [Symbol] name the name of a dataset
454
- # @return [Iterator] an iterator, typically a Reader instance
455
- # @raise [NoSuchDatasetError] when the dataset does not exists
456
- #
457
- def dataset(name)
458
- end
459
- undef :dataset
460
-
461
- #
462
- # Branches this environment and puts some additional explicit
463
- # definitions.
464
- #
465
- # This method is provided for (with ...) expressions and should not
466
- # be overriden by subclasses.
467
- #
468
- # @param [Hash] a set of (name, Iterator) pairs.
469
- # @return [Environment] an environment instance with new definitions set
470
- #
471
- def branch(defs)
472
- Explicit.new(defs, self)
473
- end
474
-
475
- #
476
- # Specialization of Environment that works with explicitely defined
477
- # datasources and allow branching and unbranching.
478
- #
479
- class Explicit < Environment
480
-
481
- #
482
- # Creates a new environment instance with initial definitions
483
- # and optional child environment.
484
- #
485
- def initialize(defs = {}, child = nil)
486
- @defs = defs
487
- @child = child
488
- end
489
-
490
- #
491
- # Unbranches this environment and returns its child
492
- #
493
- def unbranch
494
- @child
495
- end
496
-
497
- # (see Environment#dataset)
498
- def dataset(name)
499
- if @defs.has_key?(name)
500
- @defs[name]
501
- elsif @child
502
- @child.dataset(name)
503
- else
504
- raise "No such dataset #{name}"
505
- end
506
- end
507
-
508
- end # class Explicit
509
-
510
- #
511
- # Specialization of Environment to work on files of a given folder.
512
- #
513
- # This kind of environment resolves datasets by simply looking at
514
- # recognized files in a specific folder. "Recognized" files are simply
515
- # those for which a Reader subclass has been previously registered.
516
- # This environment then serves reader instances.
517
- #
518
- class Folder < Environment
519
-
520
- #
521
- # (see Environment.recognizes?)
522
- #
523
- # Returns true if args contains onely a String which is an existing
524
- # folder.
525
- #
526
- def self.recognizes?(args)
527
- (args.size == 1) &&
528
- args.first.is_a?(String) &&
529
- File.directory?(args.first.to_s)
530
- end
531
-
532
- #
533
- # Creates an environment instance, wired to the specified folder.
534
- #
535
- # @param [String] folder path to the folder to use as dataset source
536
- #
537
- def initialize(folder)
538
- @folder = folder
539
- end
540
-
541
- # (see Environment#dataset)
542
- def dataset(name)
543
- if file = find_file(name)
544
- Reader.reader(file, self)
545
- else
546
- raise "No such dataset #{name} (#{@folder})"
547
- end
548
- end
549
-
550
- protected
551
-
552
- def find_file(name)
553
- # TODO: refactor this, because it allows getting out of the folder
554
- if File.exists?(name.to_s)
555
- name.to_s
556
- elsif File.exists?(explicit = File.join(@folder, name.to_s)) &&
557
- File.file?(explicit)
558
- explicit
559
- else
560
- Dir[File.join(@folder, "#{name}.*")].find do |f|
561
- File.file?(f)
562
- end
563
- end
564
- end
565
-
566
- Environment.register(:folder, self)
567
- end # class Folder
568
-
569
- #
570
- # Returns the default environment
571
- #
572
- def self.default
573
- examples
574
- end
575
-
576
- #
577
- # Returns the examples environment
578
- #
579
- def self.examples
580
- folder File.expand_path('../../examples/operators', __FILE__)
581
- end
582
-
77
+ require 'alf/environment/class_methods'
78
+ require 'alf/environment/base'
79
+ require 'alf/environment/explicit'
80
+ require 'alf/environment/folder'
81
+
583
82
  end # class Environment
584
83
 
585
84
  #
@@ -600,48 +99,9 @@ module Alf
600
99
  module Iterator
601
100
  include Enumerable
602
101
 
603
- #
604
- # Wire the iterator input and an optional execution environment.
605
- #
606
- # Iterators (typically Reader and Operator instances) work from input data
607
- # that come from files, or other operators, and so on. This method wires
608
- # this input data to the iterator. Wiring is required before any attempt
609
- # to call each, unless autowiring occurs at construction. The exact kind of
610
- # input object is left at discretion of Iterator implementations.
611
- #
612
- # @param [Object] input the iterator input, at discretion of the Iterator
613
- # implementation.
614
- # @param [Environment] environment an optional environment for resolving
615
- # named datasets if needed.
616
- # @return [Object] self
617
- #
618
- def pipe(input, environment = nil)
619
- self
620
- end
621
- undef :pipe
622
-
623
- #
624
- # Coerces something to an iterator
625
- #
626
- def self.coerce(arg, environment = nil)
627
- case arg
628
- when Iterator, Array
629
- arg
630
- else
631
- Reader.coerce(arg, environment)
632
- end
633
- end
634
-
635
- #
636
- # Converts this iterator to an in-memory Relation.
637
- #
638
- # @return [Relation] a relation instance, as the set of tuples
639
- # that would be yield by this iterator.
640
- #
641
- def to_rel
642
- Relation::coerce(self)
643
- end
644
-
102
+ require 'alf/iterator/class_methods'
103
+ require 'alf/iterator/base'
104
+ require 'alf/iterator/proxy'
645
105
  end # module Iterator
646
106
 
647
107
  #
@@ -673,2914 +133,187 @@ module Alf
673
133
  #
674
134
  class Reader
675
135
  include Iterator
676
-
677
- # Registered readers
678
- @@readers = []
679
-
680
- #
681
- # Registers a reader class associated with specific file extensions
136
+
137
+ require 'alf/reader/class_methods'
138
+ require 'alf/reader/base'
139
+ require 'alf/reader/rash'
140
+ require 'alf/reader/alf_file'
141
+ end # class Reader
142
+
143
+ #
144
+ # Renders a relation (given by any Iterator) in a specific format.
145
+ #
146
+ # A renderer takes an Iterator instance as input and renders it on an output
147
+ # stream. Renderers are **not** iterators themselves, even if they mimic the
148
+ # {#pipe} method. Their usage is made via the {#execute} method.
149
+ #
150
+ # Similarly to the {Reader} class, this one provides a registration mechanism
151
+ # for specific output formats. The common scenario is as follows:
152
+ #
153
+ # # Register a new renderer for :foo format (automatically provides the
154
+ # # '--foo Render output as a foo stream' option of 'alf show') and with
155
+ # # the FooRenderer class for handling rendering.
156
+ # Renderer.register(:foo, "as a foo stream", FooRenderer)
157
+ #
158
+ # # Later on, you can request a renderer instance for a specific format
159
+ # # as follows (wiring input is optional)
160
+ # r = Renderer.renderer(:foo, [an Iterator])
161
+ #
162
+ # # Also, a factory method is automatically installed on the Renderer class
163
+ # # itself.
164
+ # r = Renderer.foo([an Iterator])
165
+ #
166
+ class Renderer
167
+ require 'alf/renderer/class_methods'
168
+ require 'alf/renderer/base'
169
+ require 'alf/renderer/rash'
170
+ require 'alf/renderer/text'
171
+
172
+ end # class Renderer
173
+
174
+ #
175
+ # Marker module and namespace for Alf main commands, those that are **not**
176
+ # operators at all.
177
+ #
178
+ module Command
179
+ require 'alf/command/class_methods'
180
+ require 'alf/command/doc_manager'
181
+
182
+ # This is the main documentation extractor
183
+ DOC_EXTRACTOR = DocManager.new
184
+
682
185
  #
683
- # Registered class must provide a constructor with the following signature
684
- # <code>new(path_or_io, environment = nil)</code>. The name must be a symbol
685
- # which can safely be used as a ruby method name. A factory class method of
686
- # that name and same signature is automatically installed on the Reader
687
- # class.
186
+ # Delegator command factory
688
187
  #
689
- # @param [Symbol] name a name for the kind of data decoded
690
- # @param [Array] extensions file extensions mapped to the registered reader
691
- # class (should include the '.', e.g. '.foo')
692
- # @param [Class] class Reader subclass used to decode this kind of files
693
- #
694
- def self.register(name, extensions, clazz)
695
- @@readers << [name, extensions, clazz]
696
- (class << self; self; end).
697
- send(:define_method, name) do |*args|
698
- clazz.new(*args)
699
- end
188
+ def Alf.Delegator()
189
+ Quickl::Delegator(){|builder|
190
+ builder.doc_extractor = DOC_EXTRACTOR
191
+ yield(builder) if block_given?
192
+ }
700
193
  end
701
-
194
+
702
195
  #
703
- # When filepath is a String, returns a reader instance for a specific file
704
- # whose path is given as argument. Otherwise, delegate the call to
705
- # <code>coerce(filepath)</code>
196
+ # Command factory
706
197
  #
707
- # @param [String] filepath path to a file for which extension is recognized
708
- # @param [Array] args optional additional arguments that must be passed at
709
- # reader's class new method.
710
- # @return [Reader] a reader instance
711
- #
712
- def self.reader(filepath, *args)
713
- if filepath.is_a?(String)
714
- ext = File.extname(filepath)
715
- if registered = @@readers.find{|r| r[1].include?(ext)}
716
- registered[2].new(filepath, *args)
717
- else
718
- raise "No registered reader for #{ext} (#{filepath})"
719
- end
720
- elsif args.empty?
721
- coerce(filepath)
722
- else
723
- raise ArgumentError, "Unable to return a reader for #{filepath} and #{args}"
724
- end
725
- end
726
-
727
- #
728
- # Coerces an argument to a reader, using an optional environment to convert
729
- # named datasets.
730
- #
731
- # This method automatically provides readers for Strings and Symbols through
732
- # passed environment (**not** through the reader factory) and for IO objects
733
- # (through Rash reader). It is part if Alf's internals and should be used
734
- # with care.
735
- #
736
- def self.coerce(arg, environment = nil)
737
- case arg
738
- when Reader
739
- arg
740
- when IO
741
- rash(arg, environment)
742
- when String, Symbol
743
- if environment
744
- environment.dataset(arg.to_sym)
745
- else
746
- raise "No environment set"
747
- end
748
- else
749
- raise ArgumentError, "Unable to coerce #{arg.inspect} to a reader"
750
- end
751
- end
752
-
753
- # Default reader options
754
- DEFAULT_OPTIONS = {}
755
-
756
- # @return [Environment] Wired environment
757
- attr_accessor :environment
758
-
759
- # @return [String or IO] Input IO, or file name
760
- attr_accessor :input
761
-
762
- # @return [Hash] Reader's options
763
- attr_accessor :options
764
-
765
- #
766
- # Creates a reader instance.
767
- #
768
- # @param [String or IO] path to a file or IO object for input
769
- # @param [Environment] environment wired environment, serving this reader
770
- # @param [Hash] options Reader's options (see doc of subclasses)
771
- #
772
- def initialize(*args)
773
- @input, @environment, @options = case args.first
774
- when String, IO, StringIO
775
- Tools.varargs(args, [args.first.class, Environment, Hash])
776
- else
777
- Tools.varargs(args, [String, Environment, Hash])
778
- end
779
- @options = self.class.const_get(:DEFAULT_OPTIONS).merge(@options || {})
780
- end
781
-
782
- #
783
- # (see Iterator#pipe)
784
- #
785
- def pipe(input, env = environment)
786
- @input = input
787
- self
788
- end
789
-
790
- #
791
- # (see Iterator#each)
792
- #
793
- # @private the default implementation reads lines of the input stream and
794
- # yields the block with <code>line2tuple(line)</code> on each of them. This
795
- # method may be overriden if this behavior does not fit reader's needs.
796
- #
797
- def each
798
- each_input_line do |line|
799
- tuple = line2tuple(line)
800
- yield tuple unless tuple.nil?
801
- end
802
- end
803
-
804
- protected
805
-
806
- #
807
- # Returns the input file path, or nil if this Reader is bound to an IO
808
- # directly.
809
- #
810
- def input_path
811
- input.is_a?(String) ? input : nil
812
- end
813
-
814
- #
815
- # Coerces the input object to an IO and yields the block with it.
816
- #
817
- # StringIO and IO input are yield directly while file paths are first
818
- # opened in read mode and then yield.
819
- #
820
- def with_input_io
821
- case input
822
- when IO, StringIO
823
- yield input
824
- when String
825
- File.open(input, 'r'){|io| yield io}
826
- else
827
- raise "Unable to convert #{input} to an IO object"
828
- end
829
- end
830
-
831
- #
832
- # Returns the whole input text.
833
- #
834
- # This feature should only be used by subclasses on inputs that are
835
- # small enough to fit in memory. Consider implementing readers without this
836
- # feature on files that could be larger.
837
- #
838
- def input_text
839
- with_input_io{|io| io.readlines.join}
840
- end
841
-
842
- #
843
- # Yields the block with each line of the input text in turn.
844
- #
845
- # This method is an helper for files that capture one tuple on each input
846
- # line. It should be used in those cases, as the resulting reader will not
847
- # load all input in memory but serve tuples on demand.
848
- #
849
- def each_input_line
850
- with_input_io{|io| io.each_line(&Proc.new)}
851
- end
852
-
853
- #
854
- # Converts a line previously read from the input stream to a tuple.
855
- #
856
- # The line is simply ignored is this method return nil. Errors should be
857
- # properly handled by raising exceptions. This method MUST be implemented
858
- # by subclasses unless each is overriden.
859
- #
860
- def line2tuple(line)
861
- end
862
- undef :line2tuple
863
-
864
- #
865
- # Specialization of the Reader contract for .rash files.
866
- #
867
- # A .rash file/stream contains one ruby hash literal on each line. This
868
- # reader simply decodes each of them in turn with Kernel.eval, providing a
869
- # state-less reader (that is, tuples are not all loaded in memory at once).
870
- #
871
- class Rash < Reader
872
-
873
- # (see Reader#line2tuple)
874
- def line2tuple(line)
875
- begin
876
- h = Kernel.eval(line)
877
- raise "hash expected, got #{h}" unless h.is_a?(Hash)
878
- rescue Exception => ex
879
- $stderr << "Skipping #{line.strip}: #{ex.message}\n"
880
- nil
881
- else
882
- return h
883
- end
884
- end
885
-
886
- Reader.register(:rash, [".rash"], self)
887
- end # class Rash
888
-
889
- #
890
- # Specialization of the Reader contrat for .alf files.
891
- #
892
- # A .alf file simply contains a query expression in the Lispy DSL. This
893
- # reader decodes and compiles the expression and delegates the enumeration
894
- # to the obtained operator.
895
- #
896
- # Note that an Environment must be wired at creation or piping time.
897
- # NoSuchDatasetError will certainly occur otherwise.
898
- #
899
- class AlfFile < Reader
900
-
901
- # (see Reader#each)
902
- def each
903
- op = Alf.lispy(environment).compile(input_text, input_path)
904
- op.each(&Proc.new)
905
- end
906
-
907
- Reader.register(:alf, [".alf"], self)
908
- end # module AlfFile
909
-
910
- end # module Reader
911
-
912
- #
913
- # Renders a relation (given by any Iterator) in a specific format.
914
- #
915
- # A renderer takes an Iterator instance as input and renders it on an output
916
- # stream. Renderers are **not** iterators themselves, even if they mimic the
917
- # {#pipe} method. Their usage is made via the {#execute} method.
918
- #
919
- # Similarly to the {Reader} class, this one provides a registration mechanism
920
- # for specific output formats. The common scenario is as follows:
921
- #
922
- # # Register a new renderer for :foo format (automatically provides the
923
- # # '--foo Render output as a foo stream' option of 'alf show') and with
924
- # # the FooRenderer class for handling rendering.
925
- # Renderer.register(:foo, "as a foo stream", FooRenderer)
926
- #
927
- # # Later on, you can request a renderer instance for a specific format
928
- # # as follows (wiring input is optional)
929
- # r = Renderer.renderer(:foo, [an Iterator])
930
- #
931
- # # Also, a factory method is automatically installed on the Renderer class
932
- # # itself.
933
- # r = Renderer.foo([an Iterator])
934
- #
935
- class Renderer
936
-
937
- # Registered renderers
938
- @@renderers = []
939
-
940
- #
941
- # Register a renderering class with a given name and description.
942
- #
943
- # Registered class must at least provide a constructor with an empty
944
- # signature. The name must be a symbol which can safely be used as a ruby
945
- # method name. A factory class method of that name and degelation signature
946
- # is automatically installed on the Renderer class.
947
- #
948
- # @param [Symbol] name a name for the output format
949
- # @param [String] description an output format description (for 'alf show')
950
- # @param [Class] clazz Renderer subclass used to render in this format
951
- #
952
- def self.register(name, description, clazz)
953
- @@renderers << [name, description, clazz]
954
- (class << self; self; end).
955
- send(:define_method, name) do |*args|
956
- clazz.new(*args)
957
- end
958
- end
959
-
960
- #
961
- # Returns a Renderer instance for the given output format name.
962
- #
963
- # @param [Symbol] name name of an output format previously registered
964
- # @param [...] args other arguments to pass to the renderer constructor
965
- # @return [Renderer] a Renderer instance, already wired if args are
966
- # provided
967
- #
968
- def self.renderer(name, *args)
969
- if r = @@renderers.find{|triple| triple[0] == name}
970
- r[2].new(*args)
971
- else
972
- raise "No renderer registered for #{name}"
973
- end
974
- end
975
-
976
- #
977
- # Yields each (name,description,clazz) previously registered in turn
978
- #
979
- def self.each_renderer
980
- @@renderers.each(&Proc.new)
981
- end
982
-
983
- # Default renderer options
984
- DEFAULT_OPTIONS = {}
985
-
986
- # Renderer input (typically an Iterator)
987
- attr_accessor :input
988
-
989
- # @return [Environment] Optional wired environment
990
- attr_accessor :environment
991
-
992
- # @return [Hash] Renderer's options
993
- attr_accessor :options
994
-
995
- #
996
- # Creates a reader instance.
997
- #
998
- # @param [Iterator] iterator an Iterator of tuples to render
999
- # @param [Environment] environment wired environment, serving this reader
1000
- # @param [Hash] options Reader's options (see doc of subclasses)
1001
- #
1002
- def initialize(*args)
1003
- @input, @environment, @options = case args.first
1004
- when Array
1005
- Tools.varargs(args, [Array, Environment, Hash])
1006
- else
1007
- Tools.varargs(args, [Iterator, Environment, Hash])
1008
- end
1009
- @options = self.class.const_get(:DEFAULT_OPTIONS).merge(@options || {})
1010
- end
1011
-
1012
- #
1013
- # Sets the renderer input.
1014
- #
1015
- # This method mimics {Iterator#pipe} and have the same contract.
1016
- #
1017
- def pipe(input, env = environment)
1018
- self.environment = env
1019
- self.input = input
1020
- self
1021
- end
1022
-
1023
- #
1024
- # Executes the rendering, outputting the resulting tuples on the provided
1025
- # output buffer.
1026
- #
1027
- # The default implementation simply coerces the input as an Iterator and
1028
- # delegates the call to {#render}.
1029
- #
1030
- def execute(output = $stdout)
1031
- render(Iterator.coerce(input, environment), output)
1032
- end
1033
-
1034
- protected
1035
-
1036
- #
1037
- # Renders tuples served by the iterator to the output buffer provided and
1038
- # returns the latter.
1039
- #
1040
- # This method must be implemented by subclasses unless {#execute} is
1041
- # overriden.
1042
- #
1043
- def render(iterator, output)
1044
- end
1045
- undef :render
1046
-
1047
- #
1048
- # Implements the Renderer contract through inspect
1049
- #
1050
- class Rash < Renderer
1051
-
1052
- # (see Renderer#render)
1053
- def render(input, output)
1054
- input.each do |tuple|
1055
- output << Myrrha.to_ruby_literal(tuple) << "\n"
1056
- end
1057
- output
1058
- end
1059
-
1060
- Renderer.register(:rash, "as ruby hashes", self)
1061
- end # class Rash
1062
-
1063
- end # module Renderer
1064
-
1065
- #
1066
- # Provides a factory over Alf operators and handles the interface with
1067
- # Quickl for commandline support.
1068
- #
1069
- # This module is part of Alf's internal architecture and should not be used
1070
- # at all by third-party projects.
1071
- #
1072
- module Factory
1073
-
1074
- # @see Quickl::Command
1075
- def Command(file, line)
1076
- Quickl::Command(file, line){|builder|
1077
- builder.command_parent = Alf::Command::Main
1078
- yield(builder) if block_given?
1079
- }
1080
- end
1081
-
1082
- # @see Operator
1083
- def Operator(file, line)
1084
- Command(file, line) do |b|
1085
- b.instance_module Alf::Operator
1086
- end
1087
- end
1088
-
1089
- extend Factory
1090
- end # module Factory
1091
-
1092
- #
1093
- # Marker module and namespace for Alf main commands, those that are **not**
1094
- # operators at all.
1095
- #
1096
- module Command
1097
-
1098
- #
1099
- # alf - Classy data-manipulation dressed in a DSL (+ commandline)
1100
- #
1101
- # SYNOPSIS
1102
- # alf [--version] [--help]
1103
- # alf -e '(lispy command)'
1104
- # alf [FILE.alf]
1105
- # alf [alf opts] OPERATOR [operator opts] ARGS ...
1106
- # alf help OPERATOR
1107
- #
1108
- # OPTIONS
1109
- # #{summarized_options}
1110
- #
1111
- # RELATIONAL COMMANDS
1112
- # #{summarized_subcommands subcommands.select{|cmd|
1113
- # cmd.include?(Alf::Operator::Relational) &&
1114
- # !cmd.include?(Alf::Operator::Experimental)
1115
- # }}
1116
- #
1117
- # EXPERIMENTAL OPERATORS
1118
- # #{summarized_subcommands subcommands.select{|cmd|
1119
- # cmd.include?(Alf::Operator::Relational) &&
1120
- # cmd.include?(Alf::Operator::Experimental)
1121
- # }}
1122
- #
1123
- # NON-RELATIONAL COMMANDS
1124
- # #{summarized_subcommands subcommands.select{|cmd|
1125
- # cmd.include?(Alf::Operator::NonRelational)
1126
- # }}
1127
- #
1128
- # OTHER NON-RELATIONAL COMMANDS
1129
- # #{summarized_subcommands subcommands.select{|cmd|
1130
- # cmd.include?(Alf::Command)
1131
- # }}
1132
- #
1133
- # See '#{program_name} help COMMAND' for details about a specific command.
1134
- #
1135
- class Main < Quickl::Delegator(__FILE__, __LINE__)
1136
- include Command
1137
-
1138
- # Environment instance to use to get base iterators
1139
- attr_accessor :environment
1140
-
1141
- # Output renderer
1142
- attr_accessor :renderer
1143
-
1144
- # Creates a command instance
1145
- def initialize(env = Environment.default)
1146
- @environment = env
1147
- end
1148
-
1149
- # Install options
1150
- options do |opt|
1151
- @execute = false
1152
- opt.on("-e", "--execute", "Execute one line of script (Lispy API)") do
1153
- @execute = true
1154
- end
1155
-
1156
- @renderer = nil
1157
- Renderer.each_renderer do |name,descr,clazz|
1158
- opt.on("--#{name}", "Render output #{descr}"){
1159
- @renderer = clazz.new
1160
- }
1161
- end
1162
-
1163
- opt.on('--env=ENV',
1164
- "Set the environment to use") do |value|
1165
- @environment = Environment.autodetect(value)
1166
- end
1167
-
1168
- opt.on('-rlibrary', "require the library, before executing alf") do |value|
1169
- require(value)
1170
- end
1171
-
1172
- opt.on_tail('-h', "--help", "Show help") do
1173
- raise Quickl::Help
1174
- end
1175
-
1176
- opt.on_tail('-v', "--version", "Show version") do
1177
- raise Quickl::Exit, "alf #{Alf::VERSION}"\
1178
- " (c) 2011, Bernard Lambeau"
1179
- end
1180
- end # Alf's options
1181
-
1182
- #
1183
- def _normalize(args)
1184
- opts = []
1185
- while !args.empty? && (args.first =~ /^\-/)
1186
- opts << args.shift
1187
- end
1188
- if args.empty? or (args.size == 1 && File.exists?(args.first))
1189
- opts << "exec"
1190
- end
1191
- opts += args
1192
- end
1193
-
1194
- #
1195
- # Overrided because Quickl only keep --options but modifying it there
1196
- # should probably be considered a broken API.
1197
- #
1198
- def _run(argv = [])
1199
- argv = _normalize(argv)
1200
-
1201
- # 1) Extract my options and parse them
1202
- my_argv = []
1203
- while argv.first =~ /^-/
1204
- my_argv << argv.shift
1205
- end
1206
- parse_options(my_argv)
1207
-
1208
- # 2) build the operator according to -e option
1209
- operator = if @execute
1210
- Alf.lispy(environment).compile(argv.first)
1211
- else
1212
- super
1213
- end
1214
-
1215
- # 3) if there is a requester, then we do the job (assuming bin/alf)
1216
- # with the renderer to use. Otherwise, we simply return built operator
1217
- if operator && requester
1218
- renderer = self.renderer ||= Renderer::Rash.new
1219
- renderer.pipe(operator, environment).execute($stdout)
1220
- else
1221
- operator
1222
- end
1223
- end
1224
-
1225
- end
1226
-
1227
- #
1228
- # Output input tuples through a specific renderer (text, yaml, ...)
1229
- #
1230
- # SYNOPSIS
1231
- # #{program_name} #{command_name} DATASET
1232
- #
1233
- # OPTIONS
1234
- # #{summarized_options}
1235
- #
1236
- # DESCRIPTION
1237
- #
1238
- # When a dataset name is specified as commandline arg, request the
1239
- # environment to provide this dataset and prints it. Otherwise, take what
1240
- # comes on standard input.
1241
- #
1242
- # Note that this command is not an operator and should not be piped anymore.
1243
- #
1244
- class Show < Factory::Command(__FILE__, __LINE__)
1245
- include Command
1246
-
1247
- options do |opt|
1248
- @renderer = nil
1249
- Renderer.each_renderer do |name,descr,clazz|
1250
- opt.on("--#{name}", "Render output #{descr}"){
1251
- @renderer = clazz.new
1252
- }
1253
- end
1254
- end
1255
-
1256
- def execute(args)
1257
- requester.renderer = (@renderer || requester.renderer || Text::Renderer.new)
1258
- args = [ $stdin ] if args.empty?
1259
- args.first
1260
- end
1261
-
1262
- end # class Show
1263
-
1264
- #
1265
- # Executes an .alf file on current environment
1266
- #
1267
- # SYNOPSIS
1268
- # #{program_name} #{command_name} [FILE]
1269
- #
1270
- # OPTIONS
1271
- # #{summarized_options}
1272
- #
1273
- # DESCRIPTION
1274
- #
1275
- # This command executes the .alf file passed as first argument (or what comes
1276
- # on standard input) as a alf query to be executed on the current environment.
1277
- #
1278
- class Exec < Factory::Command(__FILE__, __LINE__)
1279
- include Command
1280
-
1281
- def execute(args)
1282
- Reader.alf(args.first || $stdin, requester.environment)
1283
- end
1284
-
1285
- end # class Exec
1286
-
1287
- #
1288
- # Show help about a specific command
1289
- #
1290
- # SYNOPSIS
1291
- # #{program_name} #{command_name} COMMAND
1292
- #
1293
- class Help < Factory::Command(__FILE__, __LINE__)
1294
- include Command
1295
-
1296
- # Let NoSuchCommandError be passed to higher stage
1297
- no_react_to Quickl::NoSuchCommand
1298
-
1299
- # Command execution
1300
- def execute(args)
1301
- if args.size != 1
1302
- puts super_command.help
1303
- else
1304
- cmd = has_command!(args.first, super_command)
1305
- puts cmd.help
1306
- end
1307
- nil
1308
- end
1309
-
1310
- end # class Help
1311
-
1312
- end
1313
-
1314
- #
1315
- # Marker for all operators, relational and non-relational ones.
1316
- #
1317
- module Operator
1318
- include Iterator, Tools
1319
-
1320
- #
1321
- # Yields non-relational then relational operators, in turn.
1322
- #
1323
- def self.each
1324
- Operator::NonRelational.each{|x| yield(x)}
1325
- Operator::Relational.each{|x| yield(x)}
1326
- end
1327
-
1328
- #
1329
- # Encapsulates method that allows making operator introspection, that is,
1330
- # knowing operator cardinality and similar stuff.
1331
- #
1332
- module Introspection
1333
-
1334
- #
1335
- # Returns true if this operator is an unary operator, false otherwise
1336
- #
1337
- def unary?
1338
- ancestors.include?(Operator::Unary)
1339
- end
1340
-
1341
- #
1342
- # Returns true if this operator is a binary operator, false otherwise
1343
- #
1344
- def binary?
1345
- ancestors.include?(Operator::Binary)
1346
- end
1347
-
1348
- end # module Introspection
1349
-
1350
- # Ensures that the Introspection module is set on real operators
1351
- def self.included(mod)
1352
- mod.extend(Introspection) if mod.is_a?(Class)
1353
- end
1354
-
1355
- #
1356
- # Encapsulates method definitions that convert operators to Quickl
1357
- # commands
1358
- #
1359
- module CommandMethods
1360
-
1361
- protected
1362
-
1363
- #
1364
- # Configures the operator from arguments taken from command line.
1365
- #
1366
- # This method is intended to be overriden by subclasses and must return the
1367
- # operator itself.
1368
- #
1369
- def set_args(args)
1370
- self
1371
- end
1372
-
1373
- #
1374
- # Overrides Quickl::Command::Single#_run to handles the '--' separator
1375
- # correctly.
1376
- #
1377
- # This is because parse_options tend to eat the '--' separator... This
1378
- # could be handled in Quickl itself, but it should be considered a broken
1379
- # API and will only be available in quickl >= 0.3.0 (probably)
1380
- #
1381
- def _run(argv = [])
1382
- operands, args = split_command_args(argv).collect do |arr|
1383
- parse_options(arr)
1384
- end
1385
- self.set_args(args)
1386
- if operands = command_line_operands(operands)
1387
- env = environment || (requester ? requester.environment : nil)
1388
- self.pipe(operands, env)
1389
- end
1390
- self
1391
- end
1392
-
1393
- def split_command_args(args)
1394
- case (i = args.index("--"))
1395
- when NilClass
1396
- [args, []]
1397
- when 0
1398
- [[ $stdin ], args[1..-1]]
1399
- else
1400
- [args[0...i], args[i+1..-1]]
1401
- end
1402
- end
1403
-
1404
- def command_line_operands(operands)
1405
- operands
1406
- end
1407
-
1408
- end # module CommandMethods
1409
- include CommandMethods
1410
-
1411
- # Operators input datasets
1412
- attr_accessor :datasets
1413
-
1414
- # Optional environment
1415
- attr_reader :environment
1416
-
1417
- # Sets the environment on this operator and propagate on
1418
- # datasets
1419
- def environment=(env)
1420
- # this is to avoid infinite loop (TODO: why is there infinite loops??)
1421
- return if @environment == env
1422
-
1423
- # set and propagate on children
1424
- @environment = env
1425
- datasets.each do |dataset|
1426
- if dataset.respond_to?(:environment)
1427
- dataset.environment = env
1428
- end
1429
- end if datasets
1430
-
1431
- env
1432
- end
1433
-
1434
- #
1435
- # Sets the operator input
1436
- #
1437
- def pipe(input, env = environment)
1438
- raise NotImplementedError, "Operator#pipe should be overriden"
1439
- end
1440
-
1441
- #
1442
- # Yields each tuple in turn
1443
- #
1444
- # This method is implemented in a way that ensures that all operators are
1445
- # thread safe. It is not intended to be overriden, use _each instead.
1446
- #
1447
- def each
1448
- op = self.dup
1449
- op._prepare
1450
- op._each(&Proc.new)
1451
- end
1452
-
1453
- protected
1454
-
1455
- #
1456
- # Prepares the iterator before subsequent call to _each.
1457
- #
1458
- # This method is intended to be overriden by suclasses to install what's
1459
- # need for successful iteration. The default implementation does nothing.
1460
- #
1461
- def _prepare
1462
- end
1463
-
1464
- # Internal implementation of the iterator.
1465
- #
1466
- # This method must be implemented by subclasses. It is safe to use instance
1467
- # variables (typically initialized in _prepare) here.
1468
- #
1469
- def _each
1470
- end
1471
-
1472
- #
1473
- # Specialization of Operator for operators that work on a unary input
1474
- #
1475
- module Unary
1476
- include Operator
1477
-
1478
- #
1479
- # Sets the operator input
1480
- #
1481
- def pipe(input, env = environment)
1482
- self.environment = env
1483
- self.datasets = [ input ]
1484
- self
1485
- end
1486
-
1487
- protected
1488
-
1489
- def command_line_operands(operands)
1490
- operands.first || $stdin
1491
- end
1492
-
1493
- #
1494
- # Simply returns the first dataset
1495
- #
1496
- def input
1497
- Iterator.coerce(datasets.first, environment)
1498
- end
1499
-
1500
- #
1501
- # Yields the block with each input tuple.
1502
- #
1503
- # This method should be preferred to <code>input.each</code> when possible.
1504
- #
1505
- def each_input_tuple
1506
- input.each(&Proc.new)
1507
- end
1508
-
1509
- end # module Unary
1510
-
1511
- #
1512
- # Specialization of Operator for operators that work on a binary input
1513
- #
1514
- module Binary
1515
- include Operator
1516
-
1517
- #
1518
- # Sets the operator input
1519
- #
1520
- def pipe(input, env = environment)
1521
- self.environment = env
1522
- self.datasets = input
1523
- self
1524
- end
1525
-
1526
- protected
1527
-
1528
- def command_line_operands(operands)
1529
- (operands.size < 2) ? ([$stdin] + operands) : operands
1530
- end
1531
-
1532
- # Returns the left operand
1533
- def left
1534
- Iterator.coerce(datasets.first, environment)
1535
- end
1536
-
1537
- # Returns the right operand
1538
- def right
1539
- Iterator.coerce(datasets.last, environment)
1540
- end
1541
-
1542
- end # module Binary
1543
-
1544
- #
1545
- # Specialization of Operator for operators that simply convert single tuples
1546
- # to single tuples.
1547
- #
1548
- module Transform
1549
- include Unary
1550
-
1551
- protected
1552
-
1553
- # (see Operator#_each)
1554
- def _each
1555
- each_input_tuple do |tuple|
1556
- yield _tuple2tuple(tuple)
1557
- end
1558
- end
1559
-
1560
- #
1561
- # Transforms an input tuple to an output tuple
1562
- #
1563
- def _tuple2tuple(tuple)
1564
- end
1565
-
1566
- end # module Transform
1567
-
1568
- #
1569
- # Specialization of Operator for implementing operators that rely on a
1570
- # cesure algorithm.
1571
- #
1572
- module Cesure
1573
- include Unary
1574
-
1575
- protected
1576
-
1577
- # (see Operator#_each)
1578
- def _each
1579
- receiver, proj_key, prev_key = Proc.new, cesure_key, nil
1580
- each_input_tuple do |tuple|
1581
- cur_key = proj_key.project(tuple)
1582
- if cur_key != prev_key
1583
- flush_cesure(prev_key, receiver) unless prev_key.nil?
1584
- start_cesure(cur_key, receiver)
1585
- prev_key = cur_key
1586
- end
1587
- accumulate_cesure(tuple, receiver)
1588
- end
1589
- flush_cesure(prev_key, receiver) unless prev_key.nil?
1590
- end
1591
-
1592
- def cesure_key
1593
- end
1594
-
1595
- def start_cesure(key, receiver)
1596
- end
1597
-
1598
- def accumulate_cesure(tuple, receiver)
1599
- end
1600
-
1601
- def flush_cesure(key, receiver)
1602
- end
1603
-
1604
- end # module Cesure
1605
-
1606
- #
1607
- # Specialization of Operator for operators that are shortcuts for longer
1608
- # expressions.
1609
- #
1610
- module Shortcut
1611
- include Operator
1612
-
1613
- #
1614
- # Sets the operator input
1615
- #
1616
- def pipe(input, env = environment)
1617
- self.environment = env
1618
- self.datasets = input
1619
- self
1620
- end
1621
-
1622
- protected
1623
-
1624
- # (see Operator#_each)
1625
- def _each
1626
- longexpr.each(&Proc.new)
1627
- end
1628
-
1629
- #
1630
- # Compiles the longer expression and returns it.
1631
- #
1632
- # @return (Iterator) the compiled longer expression, typically another
1633
- # Operator instance
1634
- #
1635
- def longexpr
1636
- end
1637
- undef :longexpr
1638
-
1639
- #
1640
- # This is an helper ala Lispy#chain for implementing (#longexpr).
1641
- #
1642
- # @param [Array] elements a list of Iterator-able
1643
- # @return [Operator] the first element of the list, but piped with the
1644
- # next one, and so on.
1645
- #
1646
- def chain(*elements)
1647
- elements = elements.reverse
1648
- elements[1..-1].inject(elements.first) do |c, elm|
1649
- elm.pipe(c, environment)
1650
- elm
1651
- end
1652
- end
1653
-
1654
- end # module Shortcut
1655
-
1656
- # Marker for experimental operators
1657
- module Experimental; end
1658
-
1659
- end # module Operator
1660
-
1661
- #
1662
- # Marker module and namespace for non relational operators
1663
- #
1664
- module Operator::NonRelational
1665
-
1666
- #
1667
- # Yields the block with each operator module in turn
1668
- #
1669
- def self.each
1670
- constants.each do |c|
1671
- val = const_get(c)
1672
- yield(val) if val.ancestors.include?(Operator::NonRelational)
1673
- end
1674
- end
1675
-
1676
- #
1677
- # Extend its operand with an unique autonumber attribute
1678
- #
1679
- # SYNOPSIS
1680
- #
1681
- # #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
1682
- #
1683
- # DESCRIPTION
1684
- #
1685
- # This non-relational operator guarantees uniqueness of output tuples by
1686
- # adding an attribute called 'ATTRNAME' whose value is an Integer. No
1687
- # guarantee is given about ordering of output tuples, nor to the fact
1688
- # that this autonumber is sequential. Only that all values are different.
1689
- # If the presence of duplicates was the only "non-relational" aspect of
1690
- # input tuples, the result may be considered a valid relation representation.
1691
- #
1692
- # IN RUBY
1693
- #
1694
- # (autonum OPERAND, ATTRNAME = :autonum)
1695
- #
1696
- # (autonum :suppliers)
1697
- # (autonum :suppliers, :unique_id)
1698
- #
1699
- # IN SHELL
1700
- #
1701
- # #{program_name} #{command_name} [OPERAND] -- [ATTRNAME]
1702
- #
1703
- # alf autonum suppliers
1704
- # alf autonum suppliers -- unique_id
1705
- #
1706
- class Autonum < Factory::Operator(__FILE__, __LINE__)
1707
- include Operator::NonRelational, Operator::Transform
1708
-
1709
- # Names of the new attribute to add
1710
- attr_accessor :attrname
1711
-
1712
- def initialize(attrname = :autonum)
1713
- @attrname = attrname
1714
- end
1715
-
1716
- protected
1717
-
1718
- # (see Operator::CommandMethods#set_args)
1719
- def set_args(args)
1720
- @attrname = args.last.to_sym unless args.empty?
1721
- end
1722
-
1723
- # (see Operator#_prepare)
1724
- def _prepare
1725
- @autonum = -1
1726
- end
1727
-
1728
- # (see Operator::Transform#_tuple2tuple)
1729
- def _tuple2tuple(tuple)
1730
- tuple.merge(@attrname => (@autonum += 1))
1731
- end
1732
-
1733
- end # class Autonum
1734
-
1735
- #
1736
- # Force default values on missing/nil attributes
1737
- #
1738
- # SYNOPSIS
1739
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 VAL1 ...
1740
- #
1741
- # OPTIONS
1742
- # #{summarized_options}
1743
- #
1744
- # API & EXAMPLE
1745
- #
1746
- # # Non strict mode
1747
- # (defaults :suppliers, :country => 'Belgium')
1748
- #
1749
- # # Strict mode (--strict)
1750
- # (defaults :suppliers, {:country => 'Belgium'}, true)
1751
- #
1752
- # DESCRIPTION
1753
- #
1754
- # This operator rewrites tuples so as to ensure that all values for specified
1755
- # attributes ATTRx are defined and not nil. Missing or nil attributes are
1756
- # replaced by the associated default value VALx.
1757
- #
1758
- # When used in shell, the hash of default values is built from commandline
1759
- # arguments ala Hash[...]. However, to keep type safety VALx are interpreted
1760
- # as ruby literals and built with Kernel.eval. This means that strings must
1761
- # be doubly quoted. For the example of the API section:
1762
- #
1763
- # alf defaults suppliers -- country "'Belgium'"
1764
- #
1765
- # When used in --strict mode, the operator simply project resulting tuples on
1766
- # attributes for which a default value has been specified. Using the strict
1767
- # mode guarantess that the heading of all tuples is the same, and that no nil
1768
- # value ever remains. However, this operator never remove duplicates.
1769
- #
1770
- class Defaults < Factory::Operator(__FILE__, __LINE__)
1771
- include Operator::NonRelational, Operator::Transform
1772
-
1773
- # Default values as a ATTR -> VAL hash
1774
- attr_accessor :defaults
1775
-
1776
- # Strict mode?
1777
- attr_accessor :strict
1778
-
1779
- # Builds a Defaults operator instance
1780
- def initialize(defaults = {}, strict = false)
1781
- @defaults = defaults
1782
- @strict = strict
1783
- end
1784
-
1785
- options do |opt|
1786
- opt.on('-s', '--strict', 'Strictly restrict to default attributes'){
1787
- self.strict = true
1788
- }
1789
- end
1790
-
1791
- protected
1792
-
1793
- # (see Operator::CommandMethods#set_args)
1794
- def set_args(args)
1795
- @defaults = tuple_collect(args.each_slice(2)) do |k,v|
1796
- [k.to_sym, Kernel.eval(v)]
1797
- end
1798
- self
1799
- end
1800
-
1801
- # (see Operator::Transform#_tuple2tuple)
1802
- def _tuple2tuple(tuple)
1803
- if strict
1804
- tuple_collect(@defaults){|k,v|
1805
- [k, coalesce(tuple[k], v)]
1806
- }
1807
- else
1808
- @defaults.merge tuple_collect(tuple){|k,v|
1809
- [k, coalesce(v, @defaults[k])]
1810
- }
1811
- end
1812
- end
1813
-
1814
- end # class Defaults
1815
-
1816
- #
1817
- # Remove tuple duplicates
1818
- #
1819
- # SYNOPSIS
1820
- # #{program_name} #{command_name} [OPERAND]
1821
- #
1822
- # API & EXAMPLE
1823
- #
1824
- # # clip, unlike project, typically leave duplicates
1825
- # (compact (clip :suppliers, [ :city ]))
1826
- #
1827
- # DESCRIPTION
1828
- #
1829
- # This operator remove duplicates from input tuples. As defaults, it is a non
1830
- # relational operator that helps normalizing input for implementing relational
1831
- # operators. This one is centric in converting bags of tuples to sets of
1832
- # tuples, as required by true relations.
1833
- #
1834
- # alf compact ...
1835
- #
1836
- class Compact < Factory::Operator(__FILE__, __LINE__)
1837
- include Operator::NonRelational, Operator::Shortcut, Operator::Unary
1838
-
1839
- # Removes duplicates according to a complete order
1840
- class SortBased
1841
- include Operator::Cesure
1842
-
1843
- def cesure_key
1844
- @cesure_key ||= ProjectionKey.new([],true)
1845
- end
1846
-
1847
- def accumulate_cesure(tuple, receiver)
1848
- @tuple = tuple
1849
- end
1850
-
1851
- def flush_cesure(key, receiver)
1852
- receiver.call(@tuple)
1853
- end
1854
-
1855
- end # class SortBased
1856
-
1857
- # Removes duplicates by loading all in memory and filtering
1858
- # them there
1859
- class BufferBased
1860
- include Operator::Unary
1861
-
1862
- def _prepare
1863
- @tuples = input.to_a.uniq
1864
- end
1865
-
1866
- def _each
1867
- @tuples.each(&Proc.new)
1868
- end
1869
-
1870
- end # class BufferBased
1871
-
1872
- protected
1873
-
1874
- def longexpr
1875
- chain BufferBased.new,
1876
- datasets
1877
- end
1878
-
1879
- end # class Compact
1880
-
1881
- #
1882
- # Sort input tuples according to an order relation
1883
- #
1884
- # SYNOPSIS
1885
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 ORDER1 ATTR2 ORDER2...
1886
- #
1887
- # API & EXAMPLE
1888
- #
1889
- # # sort on supplier name in ascending order
1890
- # (sort :suppliers, [:name])
1891
- #
1892
- # # sort on city then on name
1893
- # (sort :suppliers, [:city, :name])
1894
- #
1895
- # # sort on city DESC then on name ASC
1896
- # (sort :suppliers, [[:city, :desc], [:name, :asc]])
1897
- #
1898
- # => See OrderingKey about specifying orderings
1899
- #
1900
- # DESCRIPTION
1901
- #
1902
- # This operator sorts input tuples on ATTR1 then ATTR2, etc. and outputs
1903
- # them sorted after that. This is, of course, a non relational operator as
1904
- # relations are unordered sets. It is provided to implement operators that
1905
- # need tuples to be sorted to work correctly. When used in shell, the key
1906
- # ordering must be specified in its longest form:
1907
- #
1908
- # alf sort suppliers -- name asc
1909
- # alf sort suppliers -- city desc name asc
1910
- #
1911
- # LIMITATIONS
1912
- #
1913
- # The fact that the ordering must be completely specified with commandline
1914
- # arguments is a limitation, shortcuts could be provided in the future.
1915
- #
1916
- class Sort < Factory::Operator(__FILE__, __LINE__)
1917
- include Operator::NonRelational, Operator::Unary
1918
-
1919
- def initialize(ordering_key = [])
1920
- @ordering_key = OrderingKey.coerce(ordering_key)
1921
- yield self if block_given?
1922
- end
1923
-
1924
- def ordering=(ordering)
1925
- @ordering_key = OrderingKey.coerce(ordering)
1926
- end
1927
-
1928
- protected
1929
-
1930
- def set_args(args)
1931
- self.ordering = args.collect{|c| c.to_sym}.each_slice(2).to_a
1932
- self
1933
- end
1934
-
1935
- def _prepare
1936
- @buffer = Buffer::Sorted.new(@ordering_key)
1937
- @buffer.add_all(input)
1938
- end
1939
-
1940
- def _each
1941
- @buffer.each(&Proc.new)
1942
- end
1943
-
1944
- end # class Sort
1945
-
1946
- #
1947
- # Clip input tuples to a subset of attributes
1948
- #
1949
- # SYNOPSIS
1950
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ...
1951
- #
1952
- # OPTIONS
1953
- # #{summarized_options}
1954
- #
1955
- # API & EXAMPLE
1956
- #
1957
- # # Keep only name and city attributes
1958
- # (clip :suppliers, [:name, :city])
1959
- #
1960
- # # Keep all but name and city attributes
1961
- # (clip :suppliers, [:name, :city], true)
1962
- #
1963
- # DESCRIPTION
1964
- #
1965
- # This operator clips tuples on attributes whose names are specified as
1966
- # arguments. This is similar to the relational PROJECT operator, expect
1967
- # that this one does not removed duplicates that can occur from clipping.
1968
- # In other words, clipping may lead to bags of tuples instead of sets.
1969
- #
1970
- # When used in shell, the clipping/projection key is simply taken from
1971
- # commandline arguments:
1972
- #
1973
- # alf clip suppliers -- name city
1974
- # alf clip suppliers --allbut -- name city
1975
- #
1976
- class Clip < Factory::Operator(__FILE__, __LINE__)
1977
- include Operator::NonRelational, Operator::Transform
1978
-
1979
- # Builds a Clip operator instance
1980
- def initialize(attributes = [], allbut = false)
1981
- @projection_key = ProjectionKey.new(attributes, allbut)
1982
- yield self if block_given?
1983
- end
1984
-
1985
- def attributes=(attrs)
1986
- @projection_key.attributes = attrs
1987
- end
1988
-
1989
- def allbut=(allbut)
1990
- @projection_key.allbut = allbut
1991
- end
1992
-
1993
- # Installs the options
1994
- options do |opt|
1995
- opt.on('-a', '--allbut', 'Apply a ALLBUT clipping') do
1996
- self.allbut = true
1997
- end
1998
- end
1999
-
2000
- protected
2001
-
2002
- # (see Operator::CommandMethods#set_args)
2003
- def set_args(args)
2004
- self.attributes = args.collect{|a| a.to_sym}
2005
- self
2006
- end
2007
-
2008
- # (see Operator::Transform#_tuple2tuple)
2009
- def _tuple2tuple(tuple)
2010
- @projection_key.project(tuple)
2011
- end
2012
-
2013
- end # class Clip
2014
-
2015
- end # Operator::NonRelational
2016
-
2017
- #
2018
- # Marker module and namespace for relational operators
2019
- #
2020
- module Operator::Relational
2021
-
2022
- #
2023
- # Yields the block with each operator module in turn
2024
- #
2025
- def self.each
2026
- constants.each do |c|
2027
- val = const_get(c)
2028
- yield(val) if val.ancestors.include?(Operator::Relational)
2029
- end
2030
- end
2031
-
2032
- # Relational projection (clip + compact)
2033
- #
2034
- # SYNOPSIS
2035
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ...
2036
- #
2037
- # OPTIONS
2038
- # #{summarized_options}
2039
- #
2040
- # API & EXAMPLE
2041
- #
2042
- # # Project on name and city attributes
2043
- # (project :suppliers, [:name, :city])
2044
- #
2045
- # # Project on all but name and city attributes
2046
- # (allbut :suppliers, [:name, :city])
2047
- #
2048
- # DESCRIPTION
2049
- #
2050
- # This operator projects tuples on attributes whose names are specified as
2051
- # arguments. This is similar to clip, except that this ones is a truly
2052
- # relational one, that is, it also removes duplicates tuples.
2053
- #
2054
- # When used in shell, the clipping/projection key is simply taken from
2055
- # commandline arguments:
2056
- #
2057
- # alf project suppliers -- name city
2058
- # alf project --allbut suppliers -- name city
2059
- #
2060
- class Project < Factory::Operator(__FILE__, __LINE__)
2061
- include Operator::Relational, Operator::Shortcut, Operator::Unary
2062
-
2063
- # Builds a Project operator instance
2064
- def initialize(attributes = [], allbut = false)
2065
- @projection_key = ProjectionKey.new(attributes, allbut)
2066
- yield self if block_given?
2067
- end
2068
-
2069
- def attributes=(attrs)
2070
- @projection_key.attributes = attrs
2071
- end
2072
-
2073
- def allbut=(allbut)
2074
- @projection_key.allbut = allbut
2075
- end
2076
-
2077
- # Installs the options
2078
- options do |opt|
2079
- opt.on('-a', '--allbut', 'Apply a ALLBUT projection') do
2080
- self.allbut = true
2081
- end
2082
- end
2083
-
2084
- protected
2085
-
2086
- # (see Operator::CommandMethods#set_args)
2087
- def set_args(args)
2088
- self.attributes = args.collect{|a| a.to_sym}
2089
- self
2090
- end
2091
-
2092
- # (see Operator::Shortcut#longexpr)
2093
- def longexpr
2094
- chain Operator::NonRelational::Compact.new,
2095
- Operator::NonRelational::Clip.new(@projection_key.attributes,
2096
- @projection_key.allbut),
2097
- datasets
2098
- end
2099
-
2100
- end # class Project
2101
-
2102
- #
2103
- # Relational extension (additional, computed attributes)
2104
- #
2105
- # SYNOPSIS
2106
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 EXPR1 ATTR2 EXPR2...
2107
- #
2108
- # API & EXAMPLE
2109
- #
2110
- # (extend :supplies, :sp => lambda{ sid + "/" + pid },
2111
- # :big => lambda{ qty > 100 ? true : false })
2112
- #
2113
- # DESCRIPTION
2114
- #
2115
- # This command extend input tuples with new attributes (named ATTR1, ...)
2116
- # whose value is the result of evaluating tuple expressions (i.e. EXPR1, ...).
2117
- # See main documentation about the semantics of tuple expressions. When used
2118
- # in shell, the hash of extensions is built from commandline arguments ala
2119
- # Hash[...]. Tuple expressions must be specified as code literals there:
2120
- #
2121
- # alf extend supplies -- sp 'sid + "/" + pid' big "qty > 100 ? true : false"
2122
- #
2123
- # Attributes ATTRx should not already exist, no behavior is guaranteed if
2124
- # this precondition is not respected.
2125
- #
2126
- class Extend < Factory::Operator(__FILE__, __LINE__)
2127
- include Operator::Relational, Operator::Transform
2128
-
2129
- # Extensions as a Hash attr => lambda{...}
2130
- attr_accessor :extensions
2131
-
2132
- # Builds an Extend operator instance
2133
- def initialize(extensions = {})
2134
- @extensions = extensions
2135
- end
2136
-
2137
- protected
2138
-
2139
- # (see Operator::CommandMethods#set_args)
2140
- def set_args(args)
2141
- @extensions = tuple_collect(args.each_slice(2)){|k,v|
2142
- [k.to_sym, TupleHandle.compile(v)]
2143
- }
2144
- self
2145
- end
2146
-
2147
- # (see Operator#_prepare)
2148
- def _prepare
2149
- @handle = TupleHandle.new
2150
- end
2151
-
2152
- # (see Operator::Transform#_tuple2tuple)
2153
- def _tuple2tuple(tuple)
2154
- tuple.merge tuple_collect(@extensions){|k,v|
2155
- [k, @handle.set(tuple).evaluate(v)]
2156
- }
2157
- end
2158
-
2159
- end # class Extend
2160
-
2161
- #
2162
- # Relational renaming (rename some attributes)
2163
- #
2164
- # SYNOPSIS
2165
- # #{program_name} #{command_name} [OPERAND] -- OLD1 NEW1 ...
2166
- #
2167
- # OPTIONS
2168
- # #{summarized_options}
2169
- #
2170
- # API & EXAMPLE
2171
- #
2172
- # (rename :suppliers, :name => :supplier_name, :city => :supplier_city)
2173
- #
2174
- # DESCRIPTION
2175
- #
2176
- # This command renames OLD attributes as NEW as specified by arguments.
2177
- # Attributes OLD should exist in source tuples while attributes NEW should
2178
- # not. When used in shell, renaming attributes are built ala Hash[...] from
2179
- # commandline arguments:
2180
- #
2181
- # alf rename suppliers -- name supplier_name city supplier_city
2182
- #
2183
- class Rename < Factory::Operator(__FILE__, __LINE__)
2184
- include Operator::Relational, Operator::Transform
2185
-
2186
- # Hash of source -> target attribute renamings
2187
- attr_accessor :renaming
2188
-
2189
- # Builds a Rename operator instance
2190
- def initialize(renaming = {})
2191
- @renaming = renaming
2192
- end
2193
-
2194
- protected
2195
-
2196
- # (see Operator::CommandMethods#set_args)
2197
- def set_args(args)
2198
- @renaming = Hash[*args.collect{|c| c.to_sym}]
2199
- self
2200
- end
2201
-
2202
- # (see Operator::Transform#_tuple2tuple)
2203
- def _tuple2tuple(tuple)
2204
- tuple_collect(tuple){|k,v| [@renaming[k] || k, v]}
2205
- end
2206
-
2207
- end # class Rename
2208
-
2209
- #
2210
- # Relational restriction (aka where, predicate filtering)
2211
- #
2212
- # SYNOPSIS
2213
- # #{program_name} #{command_name} [OPERAND] -- EXPR
2214
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 VAL1 ...
2215
- #
2216
- # API & EXAMPLE
2217
- #
2218
- # # Restrict to suppliers with status greater than 20
2219
- # (restrict :suppliers, lambda{ status > 20 })
2220
- #
2221
- # # Restrict to suppliers that live in London
2222
- # (restrict :suppliers, lambda{ city == 'London' })
2223
- #
2224
- # DESCRIPTION
2225
- #
2226
- # This command restricts tuples to those for which EXPR evaluates to true.
2227
- # EXPR must be a valid tuple expression that should return a truth-value.
2228
- # When used in shell, the predicate is taken as a string and compiled with
2229
- # TupleHandle.compile. We also provide a shortcut for equality expressions.
2230
- # Note that, in that case, values are expected to be ruby code literals,
2231
- # evaluated with Kernel.eval. Therefore, strings must be doubly quoted.
2232
- #
2233
- # alf restrict suppliers -- "status > 20"
2234
- # alf restrict suppliers -- city "'London'"
2235
- #
2236
- class Restrict < Factory::Operator(__FILE__, __LINE__)
2237
- include Operator::Relational, Operator::Unary
2238
-
2239
- # Restriction predicate
2240
- attr_accessor :predicate
2241
-
2242
- # Builds a Restrict operator instance
2243
- def initialize(predicate = "true")
2244
- @predicate = TupleHandle.compile(predicate)
2245
- yield self if block_given?
2246
- end
2247
-
2248
- protected
2249
-
2250
- # (see Operator::CommandMethods#set_args)
2251
- def set_args(args)
2252
- @predicate = if args.size > 1
2253
- TupleHandle.compile tuple_collect(args.each_slice(2)){|a,expr|
2254
- [a, Kernel.eval(expr)]
2255
- }
2256
- else
2257
- TupleHandle.compile(args.first)
2258
- end
2259
- self
2260
- end
2261
-
2262
- # (see Operator#_each)
2263
- def _each
2264
- handle = TupleHandle.new
2265
- each_input_tuple{|t| yield(t) if handle.set(t).evaluate(@predicate) }
2266
- end
2267
-
2268
- end # class Restrict
2269
-
2270
- #
2271
- # Relational join (and cross-join)
2272
- #
2273
- # SYNOPSIS
2274
- # #{program_name} #{command_name} [LEFT] RIGHT
2275
- #
2276
- # API & EXAMPLE
2277
- #
2278
- # (join :suppliers, :parts)
2279
- #
2280
- # DESCRIPTION
2281
- #
2282
- # This operator computes the (natural) join of two input iterators. Natural
2283
- # join means that, unlike what is commonly used in SQL, the default behavior
2284
- # is to join on common attributes. You can use the rename operator if this
2285
- # behavior does not fit your needs.
2286
- #
2287
- # alf join suppliers supplies
2288
- #
2289
- class Join < Factory::Operator(__FILE__, __LINE__)
2290
- include Operator::Relational, Operator::Shortcut, Operator::Binary
2291
-
2292
- #
2293
- # Performs a Join of two relations through a Hash buffer on the right
2294
- # one.
2295
- #
2296
- class HashBased
2297
- include Operator::Binary
2298
-
2299
- #
2300
- # Implements a special Buffer for join-based relational operators.
2301
- #
2302
- # Example:
2303
- #
2304
- # buffer = Buffer::Join.new(...) # pass the right part of the join
2305
- # left.each do |left_tuple|
2306
- # key, rest = buffer.split(tuple)
2307
- # buffer.each(key) do |right_tuple|
2308
- # #
2309
- # # do whatever you want with left and right tuples
2310
- # #
2311
- # end
2312
- # end
2313
- #
2314
- class JoinBuffer
2315
-
2316
- #
2317
- # Creates a buffer instance with the right part of the join.
2318
- #
2319
- # @param [Iterator] enum a tuple iterator, right part of the join.
2320
- #
2321
- def initialize(enum)
2322
- @buffer = nil
2323
- @key = nil
2324
- @enum = enum
2325
- end
2326
-
2327
- #
2328
- # Splits a left tuple according to the common key.
2329
- #
2330
- # @param [Hash] tuple a left tuple of the join
2331
- # @return [Array] an array of two elements, the key and the rest
2332
- # @see ProjectionKey#split
2333
- #
2334
- def split(tuple)
2335
- _init(tuple) unless @key
2336
- @key.split(tuple)
2337
- end
2338
-
2339
- #
2340
- # Yields each right tuple that matches a given key value.
2341
- #
2342
- # @param [Hash] key a tuple that matches elements of the common key
2343
- # (typically the first element returned by #split)
2344
- #
2345
- def each(key)
2346
- @buffer[key].each(&Proc.new) if @buffer.has_key?(key)
2347
- end
2348
-
2349
- private
2350
-
2351
- # Initialize the buffer with a right tuple
2352
- def _init(right)
2353
- @buffer = Hash.new{|h,k| h[k] = []}
2354
- @enum.each do |left|
2355
- @key = Tools::ProjectionKey.coerce(left.keys & right.keys) unless @key
2356
- @buffer[@key.project(left)] << left
2357
- end
2358
- @key = Tools::ProjectionKey.coerce([]) unless @key
2359
- end
2360
-
2361
- end # class JoinBuffer
2362
-
2363
- protected
2364
-
2365
- # (see Operator#_each)
2366
- def _each
2367
- buffer = JoinBuffer.new(right)
2368
- left.each do |left_tuple|
2369
- key, rest = buffer.split(left_tuple)
2370
- buffer.each(key) do |right|
2371
- yield(left_tuple.merge(right))
2372
- end
2373
- end
2374
- end
2375
-
2376
- end
2377
-
2378
- protected
2379
-
2380
- # (see Shortcut#longexpr)
2381
- def longexpr
2382
- chain HashBased.new,
2383
- datasets
2384
- end
2385
-
2386
- end # class Join
2387
-
2388
- #
2389
- # Relational intersection (aka a logical and)
2390
- #
2391
- # SYNOPSIS
2392
- # #{program_name} #{command_name} [LEFT] RIGHT
2393
- #
2394
- # API & EXAMPLE
2395
- #
2396
- # # Give suppliers that live in Paris and have status >= 20
2397
- # (intersect \\
2398
- # (restrict :suppliers, lambda{ status >= 20 }),
2399
- # (restrict :suppliers, lambda{ city == 'Paris' }))
2400
- #
2401
- # DESCRIPTION
2402
- #
2403
- # This operator computes the intersection between its two operands. The
2404
- # intersection is simply the set of common tuples between them. Both operands
2405
- # must have the same heading.
2406
- #
2407
- # alf intersect ... ...
2408
- #
2409
- class Intersect < Factory::Operator(__FILE__, __LINE__)
2410
- include Operator::Relational, Operator::Shortcut, Operator::Binary
2411
-
2412
- class HashBased
2413
- include Operator::Binary
2414
-
2415
- protected
2416
-
2417
- def _prepare
2418
- @index = Hash.new
2419
- right.each{|t| @index[t] = true}
2420
- end
2421
-
2422
- def _each
2423
- left.each do |left_tuple|
2424
- yield(left_tuple) if @index.has_key?(left_tuple)
2425
- end
2426
- end
2427
-
2428
- end
2429
-
2430
- protected
2431
-
2432
- # (see Shortcut#longexpr)
2433
- def longexpr
2434
- chain HashBased.new,
2435
- datasets
2436
- end
2437
-
2438
- end # class Intersect
2439
-
2440
- #
2441
- # Relational minus (aka difference)
2442
- #
2443
- # SYNOPSIS
2444
- # #{program_name} #{command_name} [LEFT] RIGHT
2445
- #
2446
- # API & EXAMPLE
2447
- #
2448
- # # Give all suppliers but those living in Paris
2449
- # (minus :suppliers,
2450
- # (restrict :suppliers, lambda{ city == 'Paris' }))
2451
- #
2452
- # DESCRIPTION
2453
- #
2454
- # This operator computes the difference between its two operands. The
2455
- # difference is simply the set of tuples in left operands non shared by
2456
- # the right one.
2457
- #
2458
- # alf minus ... ...
2459
- #
2460
- class Minus < Factory::Operator(__FILE__, __LINE__)
2461
- include Operator::Relational, Operator::Shortcut, Operator::Binary
2462
-
2463
- class HashBased
2464
- include Operator::Binary
2465
-
2466
- protected
2467
-
2468
- def _prepare
2469
- @index = Hash.new
2470
- right.each{|t| @index[t] = true}
2471
- end
2472
-
2473
- def _each
2474
- left.each do |left_tuple|
2475
- yield(left_tuple) unless @index.has_key?(left_tuple)
2476
- end
2477
- end
2478
-
2479
- end
2480
-
2481
- protected
2482
-
2483
- # (see Shortcut#longexpr)
2484
- def longexpr
2485
- chain HashBased.new,
2486
- datasets
2487
- end
2488
-
2489
- end # class Minus
2490
-
2491
- #
2492
- # Relational union
2493
- #
2494
- # SYNOPSIS
2495
- # #{program_name} #{command_name} [LEFT] RIGHT
2496
- #
2497
- # API & EXAMPLE
2498
- #
2499
- # (union (project :suppliers, [:city]),
2500
- # (project :parts, [:city]))
2501
- #
2502
- # DESCRIPTION
2503
- #
2504
- # This operator computes the union join of two input iterators. Input
2505
- # iterators should have the same heading. The result never contain duplicates.
2506
- #
2507
- # alf union ... ...
2508
- #
2509
- class Union < Factory::Operator(__FILE__, __LINE__)
2510
- include Operator::Relational, Operator::Shortcut, Operator::Binary
2511
-
2512
- class DisjointBased
2513
- include Operator::Binary
2514
-
2515
- protected
2516
-
2517
- def _each
2518
- left.each(&Proc.new)
2519
- right.each(&Proc.new)
2520
- end
2521
-
2522
- end
2523
-
2524
- protected
2525
-
2526
- # (see Shortcut#longexpr)
2527
- def longexpr
2528
- chain Operator::NonRelational::Compact.new,
2529
- DisjointBased.new,
2530
- datasets
2531
- end
2532
-
2533
- end # class Union
2534
-
2535
- #
2536
- # Relational matching
2537
- #
2538
- # SYNOPSIS
2539
- # #{program_name} #{command_name} [LEFT] RIGHT
2540
- #
2541
- # API & EXAMPLE
2542
- #
2543
- # (matching :suppliers, :supplies)
2544
- #
2545
- # DESCRIPTION
2546
- #
2547
- # This operator restricts left tuples to those for which there exists at
2548
- # least one right tuple that joins. This is a shortcut operator for the
2549
- # longer expression:
2550
- #
2551
- # (project (join xxx, yyy), [xxx's attributes])
2552
- #
2553
- # In shell:
2554
- #
2555
- # alf matching suppliers supplies
2556
- #
2557
- class Matching < Factory::Operator(__FILE__, __LINE__)
2558
- include Operator::Relational, Operator::Shortcut, Operator::Binary
2559
-
2560
- #
2561
- # Performs a Matching of two relations through a Hash buffer on the right
2562
- # one.
2563
- #
2564
- class HashBased
2565
- include Operator::Binary
2566
-
2567
- # (see Operator#_each)
2568
- def _each
2569
- seen, key = nil, nil
2570
- left.each do |left_tuple|
2571
- seen ||= begin
2572
- h = Hash.new
2573
- right.each do |right_tuple|
2574
- key ||= Tools::ProjectionKey.coerce(left_tuple.keys & right_tuple.keys)
2575
- h[key.project(right_tuple)] = true
2576
- end
2577
- key ||= Tools::ProjectionKey.coerce([])
2578
- h
2579
- end
2580
- yield(left_tuple) if seen.has_key?(key.project(left_tuple))
2581
- end
2582
- end
2583
-
2584
- end # class HashBased
2585
-
2586
- protected
2587
-
2588
- # (see Shortcut#longexpr)
2589
- def longexpr
2590
- chain HashBased.new,
2591
- datasets
2592
- end
2593
-
2594
- end # class Matching
2595
-
2596
- #
2597
- # Relational not matching
2598
- #
2599
- # SYNOPSIS
2600
- # #{program_name} #{command_name} [LEFT] RIGHT
2601
- #
2602
- # API & EXAMPLE
2603
- #
2604
- # (not_matching :suppliers, :supplies)
2605
- #
2606
- # DESCRIPTION
2607
- #
2608
- # This operator restricts left tuples to those for which there does not
2609
- # exist any right tuple that joins. This is a shortcut operator for the
2610
- # longer expression:
2611
- #
2612
- # (minus xxx, (matching xxx, yyy))
2613
- #
2614
- # In shell:
2615
- #
2616
- # alf not-matching suppliers supplies
2617
- #
2618
- class NotMatching < Factory::Operator(__FILE__, __LINE__)
2619
- include Operator::Relational, Operator::Shortcut, Operator::Binary
2620
-
2621
- #
2622
- # Performs a NotMatching of two relations through a Hash buffer on the
2623
- # right one.
2624
- #
2625
- class HashBased
2626
- include Operator::Binary
2627
-
2628
- # (see Operator#_each)
2629
- def _each
2630
- seen, key = nil, nil
2631
- left.each do |left_tuple|
2632
- seen ||= begin
2633
- h = Hash.new
2634
- right.each do |right_tuple|
2635
- key ||= Tools::ProjectionKey.coerce(left_tuple.keys & right_tuple.keys)
2636
- h[key.project(right_tuple)] = true
2637
- end
2638
- key ||= Tools::ProjectionKey.coerce([])
2639
- h
2640
- end
2641
- yield(left_tuple) unless seen.has_key?(key.project(left_tuple))
2642
- end
2643
- end
2644
-
2645
- end # class HashBased
2646
-
2647
- protected
2648
-
2649
- # (see Shortcut#longexpr)
2650
- def longexpr
2651
- chain HashBased.new,
2652
- datasets
2653
- end
2654
-
2655
- end # class NotMatching
2656
-
2657
- #
2658
- # Relational wraping (tuple-valued attributes)
2659
- #
2660
- # SYNOPSIS
2661
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ... NEWNAME
2662
- #
2663
- # API & EXAMPLE
2664
- #
2665
- # (wrap :suppliers, [:city, :status], :loc_and_status)
2666
- #
2667
- # DESCRIPTION
2668
- #
2669
- # This operator wraps attributes ATTR1 to ATTRN as a new, tuple-based
2670
- # attribute whose name is NEWNAME. When used in shell, names of wrapped
2671
- # attributes are taken from commandline arguments, expected the last one
2672
- # which defines the new name to use:
2673
- #
2674
- # alf wrap suppliers -- city status loc_and_status
2675
- #
2676
- class Wrap < Factory::Operator(__FILE__, __LINE__)
2677
- include Operator::Relational, Operator::Transform
2678
-
2679
- # Array of wraping attributes
2680
- attr_accessor :attributes
2681
-
2682
- # New name for the wrapped attribute
2683
- attr_accessor :as
2684
-
2685
- # Builds a Wrap operator instance
2686
- def initialize(attributes = [], as = :wrapped)
2687
- @attributes = attributes
2688
- @as = as
2689
- end
2690
-
2691
- protected
2692
-
2693
- # (see Operator::CommandMethods#set_args)
2694
- def set_args(args)
2695
- @as = args.pop.to_sym
2696
- @attributes = args.collect{|a| a.to_sym}
2697
- self
2698
- end
2699
-
2700
- # (see Operator::Transform#_tuple2tuple)
2701
- def _tuple2tuple(tuple)
2702
- others = tuple_collect(tuple.keys - @attributes){|k| [k,tuple[k]] }
2703
- others[as] = tuple_collect(attributes){|k| [k, tuple[k]] }
2704
- others
2705
- end
2706
-
2707
- end # class Wrap
2708
-
2709
- #
2710
- # Relational un-wraping (inverse of wrap)
2711
- #
2712
- # SYNOPSIS
2713
- # #{program_name} #{command_name} [OPERAND] -- ATTR
2714
- #
2715
- # API & EXAMPLE
2716
- #
2717
- # # Assuming wrapped = (wrap :suppliers, [:city, :status], :loc_and_status)
2718
- # (unwrap wrapped, :loc_and_status)
2719
- #
2720
- # DESCRIPTION
2721
- #
2722
- # This operator unwraps the tuple-valued attribute named ATTR so as to
2723
- # flatten its pairs with 'upstream' tuple. The latter should be such so that
2724
- # no name collision occurs. When used in shell, the name of the attribute to
2725
- # unwrap is taken as the first commandline argument:
2726
- #
2727
- # alf unwrap wrap -- loc_and_status
2728
- #
2729
- class Unwrap < Factory::Operator(__FILE__, __LINE__)
2730
- include Operator::Relational, Operator::Transform
2731
-
2732
- # Name of the attribute to unwrap
2733
- attr_accessor :attribute
2734
-
2735
- # Builds a Rename operator instance
2736
- def initialize(attribute = :wrapped)
2737
- @attribute = attribute
2738
- end
2739
-
2740
- protected
2741
-
2742
- # (see Operator::CommandMethods#set_args)
2743
- def set_args(args)
2744
- @attribute = args.first.to_sym
2745
- self
2746
- end
2747
-
2748
- # (see Operator::Transform#_tuple2tuple)
2749
- def _tuple2tuple(tuple)
2750
- tuple = tuple.dup
2751
- wrapped = tuple.delete(@attribute) || {}
2752
- tuple.merge(wrapped)
2753
- end
2754
-
2755
- end # class Unwrap
2756
-
2757
- #
2758
- # Relational grouping (relation-valued attributes)
2759
- #
2760
- # SYNOPSIS
2761
- # #{program_name} #{command_name} [OPERAND] -- ATTR1 ATTR2 ... NEWNAME
2762
- #
2763
- # API & EXAMPLE
2764
- #
2765
- # (group :supplies, [:pid, :qty], :supplying)
2766
- # (group :supplies, [:sid], :supplying, true)
2767
- #
2768
- # DESCRIPTION
2769
- #
2770
- # This operator groups attributes ATTR1 to ATTRN as a new, relation-valued
2771
- # attribute whose name is NEWNAME. When used in shell, names of grouped
2772
- # attributes are taken from commandline arguments, expected the last one
2773
- # which defines the new name to use:
2774
- #
2775
- # alf group supplies -- pid qty supplying
2776
- # alf group supplies --allbut -- sid supplying
2777
- #
2778
- class Group < Factory::Operator(__FILE__, __LINE__)
2779
- include Operator::Relational, Operator::Unary
2780
-
2781
- # Attributes on which grouping applies
2782
- attr_accessor :attributes
2783
-
2784
- # Attribute name for grouping tuple
2785
- attr_accessor :as
2786
-
2787
- # Group all but attributes?
2788
- attr_accessor :allbut
2789
-
2790
- # Creates a Group instance
2791
- def initialize(attributes = [], as = :group, allbut = false)
2792
- @attributes = attributes
2793
- @as = as
2794
- @allbut = allbut
2795
- end
2796
-
2797
- options do |opt|
2798
- opt.on('--allbut', "Group all but specified attributes"){ @allbut = true }
2799
- end
2800
-
2801
- protected
2802
-
2803
- # (see Operator::CommandMethods#set_args)
2804
- def set_args(args)
2805
- @as = args.pop.to_sym
2806
- @attributes = args.collect{|a| a.to_sym}
2807
- self
2808
- end
2809
-
2810
- # See Operator#_prepare
2811
- def _prepare
2812
- pkey = ProjectionKey.new(attributes, !allbut)
2813
- @index = Hash.new{|h,k| h[k] = Set.new}
2814
- each_input_tuple do |tuple|
2815
- key, rest = pkey.split(tuple)
2816
- @index[key] << rest
2817
- end
2818
- end
2819
-
2820
- # See Operator#_each
2821
- def _each
2822
- @index.each_pair do |k,v|
2823
- yield(k.merge(@as => Relation.coerce(v)))
2824
- end
2825
- end
2826
-
2827
- end # class Group
2828
-
2829
- #
2830
- # Relational un-grouping (inverse of group)
2831
- #
2832
- # SYNOPSIS
2833
- # #{program_name} #{command_name} [OPERAND] -- ATTR
2834
- #
2835
- # API & EXAMPLE
2836
- #
2837
- # # Assuming grouped = (group enum, [:pid, :qty], :supplying)
2838
- # (ungroup grouped, :supplying)
2839
- #
2840
- # DESCRIPTION
2841
- #
2842
- # This operator ungroups the relation-valued attribute named ATTR and outputs
2843
- # tuples as the flattening of each of of its tuples merged with the upstream
2844
- # one. Sub relation should be such so that no name collision occurs. When
2845
- # used in shell, the name of the attribute to ungroup is taken as the first
2846
- # commandline argument:
2847
- #
2848
- # alf ungroup group -- supplying
2849
- #
2850
- class Ungroup < Factory::Operator(__FILE__, __LINE__)
2851
- include Operator::Relational, Operator::Unary
2852
-
2853
- # Relation-value attribute to ungroup
2854
- attr_accessor :attribute
2855
-
2856
- # Creates a Group instance
2857
- def initialize(attribute = :grouped)
2858
- @attribute = attribute
2859
- end
2860
-
2861
- protected
2862
-
2863
- # (see Operator::CommandMethods#set_args)
2864
- def set_args(args)
2865
- @attribute = args.pop.to_sym
2866
- self
2867
- end
2868
-
2869
- # See Operator#_each
2870
- def _each
2871
- each_input_tuple do |tuple|
2872
- tuple = tuple.dup
2873
- subrel = tuple.delete(@attribute)
2874
- subrel.each do |subtuple|
2875
- yield(tuple.merge(subtuple))
2876
- end
2877
- end
2878
- end
2879
-
2880
- end # class Ungroup
2881
-
2882
- #
2883
- # Relational summarization (group-by + aggregate ops)
2884
- #
2885
- # SYNOPSIS
2886
- # #{program_name} #{command_name} [OPERAND] [--allbut] --by=KEY1,KEY2... -- AGG1 EXPR1...
2887
- #
2888
- # OPTIONS
2889
- # #{summarized_options}
2890
- #
2891
- # API & EXAMPLE
2892
- #
2893
- # (summarize :supplies, [:sid],
2894
- # :total_qty => Aggregator.sum(:qty))
2895
- #
2896
- # # Or, to specify an allbut projection
2897
- # (summarize :supplies, [:qty, :pid],
2898
- # :total_qty => Aggregator.sum(:qty), true)
2899
- #
2900
- # DESCRIPTION
2901
- #
2902
- # This operator summarizes input tuples on the projection on KEY1,KEY2,...
2903
- # attributes and applies aggregate operators on sets of matching tuples.
2904
- # Introduced names AGG should be disjoint from KEY attributes.
2905
- #
2906
- # When used in shell, the aggregations are taken from commandline arguments
2907
- # AGG and EXPR, where AGG is the name of a new attribute and EXPR is an
2908
- # aggregation expression evaluated on Aggregator:
2909
- #
2910
- # alf summarize supplies --by=sid -- total_qty "sum(:qty)"
2911
- # alf summarize supplies --allbut --by=pid,qty -- total_qty "sum(:qty)"
2912
- #
2913
- class Summarize < Factory::Operator(__FILE__, __LINE__)
2914
- include Operator::Relational, Operator::Shortcut, Operator::Unary
2915
-
2916
- # By attributes
2917
- attr_accessor :by
2918
-
2919
- # Allbut on by?
2920
- attr_accessor :allbut
2921
-
2922
- # Aggregations as a AGG => Aggregator(EXPR) hash
2923
- attr_accessor :aggregators
2924
-
2925
- def initialize(by = [], aggregators = {}, allbut = false)
2926
- @by = by
2927
- @allbut = allbut
2928
- @aggregators = aggregators
2929
- end
2930
-
2931
- # Installs the options
2932
- options do |opt|
2933
- opt.on('--by=x,y,z', 'Specify by attributes', Array) do |args|
2934
- @by = args.collect{|a| a.to_sym}
2935
- end
2936
- opt.on('--allbut', 'Make an allbut projection/summarization') do
2937
- @allbut = true
2938
- end
2939
- end
2940
-
2941
- # Summarizes according to a complete order
2942
- class SortBased
2943
- include Alf::Operator::Cesure
2944
-
2945
- attr_reader :cesure_key
2946
- attr_reader :aggregators
2947
-
2948
- def initialize(by_key, aggregators)
2949
- @cesure_key, @aggregators = by_key, aggregators
2950
- end
2951
-
2952
- protected
2953
-
2954
- def start_cesure(key, receiver)
2955
- @aggs = tuple_collect(@aggregators) do |a,agg|
2956
- [a, agg.least]
2957
- end
2958
- end
2959
-
2960
- def accumulate_cesure(tuple, receiver)
2961
- @aggs = tuple_collect(@aggregators) do |a,agg|
2962
- [a, agg.happens(@aggs[a], tuple)]
2963
- end
2964
- end
2965
-
2966
- def flush_cesure(key, receiver)
2967
- @aggs = tuple_collect(@aggregators) do |a,agg|
2968
- [a, agg.finalize(@aggs[a])]
2969
- end
2970
- receiver.call key.merge(@aggs)
2971
- end
2972
-
2973
- end # class SortBased
2974
-
2975
- # Summarizes in-memory with a hash
2976
- class HashBased
2977
- include Operator::Relational, Operator::Unary
2978
-
2979
- attr_reader :by_key
2980
- attr_reader :aggregators
2981
-
2982
- def initialize(by_key, aggregators)
2983
- @by_key, @aggregators = by_key, aggregators
2984
- end
2985
-
2986
- protected
2987
-
2988
- def _each
2989
- index = Hash.new do |h,k|
2990
- h[k] = tuple_collect(@aggregators) do |a,agg|
2991
- [a, agg.least]
2992
- end
2993
- end
2994
- each_input_tuple do |tuple|
2995
- key, rest = by_key.split(tuple)
2996
- index[key] = tuple_collect(@aggregators) do |a,agg|
2997
- [a, agg.happens(index[key][a], tuple)]
2998
- end
2999
- end
3000
- index.each_pair do |key,aggs|
3001
- aggs = tuple_collect(@aggregators) do |a,agg|
3002
- [a, agg.finalize(aggs[a])]
3003
- end
3004
- yield key.merge(aggs)
3005
- end
3006
- end
3007
-
3008
- end
3009
-
3010
- protected
3011
-
3012
- # (see Operator::CommandMethods#set_args)
3013
- def set_args(args)
3014
- @aggregators = tuple_collect(args.each_slice(2)) do |a,expr|
3015
- [a.to_sym, Aggregator.compile(expr)]
3016
- end
3017
- self
3018
- end
3019
-
3020
- def longexpr
3021
- if @allbut
3022
- by_key = Tools::ProjectionKey.new(@by, @allbut)
3023
- chain HashBased.new(by_key, @aggregators),
3024
- datasets
3025
- else
3026
- by_key = Tools::ProjectionKey.new(@by, @allbut)
3027
- chain SortBased.new(by_key, @aggregators),
3028
- Operator::NonRelational::Sort.new(by_key.to_ordering_key),
3029
- datasets
3030
- end
3031
- end
3032
-
3033
- end # class Summarize
3034
-
3035
- #
3036
- # Relational ranking (explicit tuple positions)
3037
- #
3038
- # SYNOPSIS
3039
- # #{program_name} #{command_name} [OPERAND] --order=OR1... -- [RANKNAME]
3040
- #
3041
- # OPTIONS
3042
- # #{summarized_options}
3043
- #
3044
- # API & EXAMPLE
3045
- #
3046
- # # Position attribute => # of tuples with smaller weight
3047
- # (rank :parts, [:weight], :position)
3048
- #
3049
- # # Position attribute => # of tuples with greater weight
3050
- # (rank :parts, [[:weight, :desc]], :position)
3051
- #
3052
- # DESCRIPTION
3053
- #
3054
- # This operator computes the ranking of input tuples, according to an order
3055
- # relation. Precisely, it extends the input tuples with a RANKNAME attribute
3056
- # whose value is the number of tuples which are considered strictly less
3057
- # according to the specified order. For the two examples above:
3058
- #
3059
- # alf rank parts --order=weight -- position
3060
- # alf rank parts --order=weight,desc -- position
3061
- #
3062
- # Note that, unless the ordering key includes a candidate key for the input
3063
- # relation, the newly RANKNAME attribute is not necessarily a candidate key
3064
- # for the output one. In the example above, adding the :pid attribute
3065
- # ensured that position will contain all different values:
3066
- #
3067
- # alf rank parts --order=weight,pid -- position
3068
- #
3069
- # Or even:
3070
- #
3071
- # alf rank parts --order=weight,desc,pid,asc -- position
3072
- #
3073
- class Rank < Factory::Operator(__FILE__, __LINE__)
3074
- include Operator::Relational, Operator::Shortcut, Operator::Unary
3075
-
3076
- # Ranking order
3077
- attr_accessor :order
3078
-
3079
- # Ranking attribute name
3080
- attr_accessor :ranking_name
3081
-
3082
- def initialize(order = [], ranking_name = :rank)
3083
- @order, @ranking_name = order, ranking_name
3084
- end
3085
-
3086
- options do |opt|
3087
- opt.on('--order=x,y,z', 'Specify ranking order', Array) do |args|
3088
- @order = args.collect{|a| a.to_sym}
3089
- end
3090
- end
3091
-
3092
- class SortBased
3093
- include Operator::Cesure
3094
-
3095
- def initialize(order, ranking_name)
3096
- @order, @ranking_name = order, ranking_name
3097
- end
3098
-
3099
- def ordering_key
3100
- OrderingKey.coerce @order
3101
- end
3102
-
3103
- def cesure_key
3104
- ProjectionKey.coerce(ordering_key)
3105
- end
3106
-
3107
- def start_cesure(key, receiver)
3108
- @rank ||= 0
3109
- @last_block = 0
3110
- end
3111
-
3112
- def accumulate_cesure(tuple, receiver)
3113
- receiver.call tuple.merge(@ranking_name => @rank)
3114
- @last_block += 1
3115
- end
3116
-
3117
- def flush_cesure(key, receiver)
3118
- @rank += @last_block
3119
- end
3120
-
3121
- end # class SortBased
3122
-
3123
- protected
3124
-
3125
- # (see Operator::CommandMethods#set_args)
3126
- def set_args(args)
3127
- unless args.empty?
3128
- self.ranking_name = args.first.to_sym
3129
- end
3130
- self
3131
- end
3132
-
3133
- def ordering_key
3134
- OrderingKey.coerce @order
3135
- end
3136
-
3137
- def longexpr
3138
- sort_key = ordering_key
3139
- chain SortBased.new(sort_key, @ranking_name),
3140
- Operator::NonRelational::Sort.new(sort_key),
3141
- datasets
3142
- end
3143
-
3144
- end # class Rank
3145
-
3146
- #
3147
- # Relational quota-queries (position, sum progression, etc.)
3148
- #
3149
- # SYNOPSIS
3150
- # #{program_name} #{command_name} [OPERAND] --by=KEY1,... --order=OR1... AGG1 EXPR1...
3151
- #
3152
- # OPTIONS
3153
- # #{summarized_options}
3154
- #
3155
- # API & EXAMPLE
3156
- #
3157
- # (quota :supplies, [:sid], [:qty],
3158
- # :position => Aggregator.count,
3159
- # :sum_qty => Aggregator.sum(:qty))
3160
- #
3161
- # DESCRIPTION
3162
- #
3163
- # This operator computes quota values on input tuples.
3164
- #
3165
- # alf quota supplies --by=sid --order=qty -- position count sum_qty "sum(:qty)"
3166
- #
3167
- class Quota < Factory::Operator(__FILE__, __LINE__)
3168
- include Operator::Relational, Operator::Experimental,
3169
- Operator::Shortcut, Operator::Unary
3170
-
3171
- # Quota by
3172
- attr_accessor :by
3173
-
3174
- # Quota order
3175
- attr_accessor :order
3176
-
3177
- # Quota aggregations
3178
- attr_accessor :aggregators
3179
-
3180
- def initialize(by = [], order = [], aggregators = {})
3181
- @by, @order, @aggregators = by, order, aggregators
3182
- end
3183
-
3184
- options do |opt|
3185
- opt.on('--by=x,y,z', 'Specify by attributes', Array) do |args|
3186
- @by = args.collect{|a| a.to_sym}
3187
- end
3188
- opt.on('--order=x,y,z', 'Specify order attributes', Array) do |args|
3189
- @order = args.collect{|a| a.to_sym}
3190
- end
3191
- end
3192
-
3193
- class SortBased
3194
- include Operator::Cesure
3195
-
3196
- def initialize(by, order, aggregators)
3197
- @by, @order, @aggregators = by, order, aggregators
3198
- end
3199
-
3200
- def cesure_key
3201
- ProjectionKey.coerce @by
3202
- end
3203
-
3204
- def ordering_key
3205
- OrderingKey.coerce @order
3206
- end
3207
-
3208
- def start_cesure(key, receiver)
3209
- @aggs = tuple_collect(@aggregators) do |a,agg|
3210
- [a, agg.least]
3211
- end
3212
- end
3213
-
3214
- def accumulate_cesure(tuple, receiver)
3215
- @aggs = tuple_collect(@aggregators) do |a,agg|
3216
- [a, agg.happens(@aggs[a], tuple)]
3217
- end
3218
- thisone = tuple_collect(@aggregators) do |a,agg|
3219
- [a, agg.finalize(@aggs[a])]
3220
- end
3221
- receiver.call tuple.merge(thisone)
3222
- end
3223
-
3224
- end # class SortBased
3225
-
3226
- protected
3227
-
3228
- # (see Operator::CommandMethods#set_args)
3229
- def set_args(args)
3230
- @aggregators = tuple_collect(args.each_slice(2)) do |a,expr|
3231
- [a.to_sym, Aggregator.compile(expr)]
3232
- end
3233
- self
3234
- end
3235
-
3236
- def cesure_key
3237
- ProjectionKey.coerce @by
3238
- end
3239
-
3240
- def ordering_key
3241
- OrderingKey.coerce @order
3242
- end
3243
-
3244
- def longexpr
3245
- sort_key = cesure_key.to_ordering_key + ordering_key
3246
- chain SortBased.new(@by, @order, @aggregators),
3247
- Operator::NonRelational::Sort.new(sort_key),
3248
- datasets
3249
- end
3250
-
3251
- end # class Quota
3252
-
3253
- end
3254
-
3255
- #
3256
- # Aggregation operator.
3257
- #
3258
- class Aggregator
3259
-
3260
- # Aggregate options
3261
- attr_reader :options
3262
-
3263
- #
3264
- # Automatically installs factory methods for inherited classes.
3265
- #
3266
- # Example:
3267
- # class Sum < Aggregate # will give a method Aggregator.sum
3268
- # ...
3269
- # end
3270
- # Aggregator.sum(:size) # factor an Sum aggregator on tuple[:size]
3271
- # Aggregator.sum{ size } # idem but works on any tuple expression
3272
- #
3273
- def self.inherited(clazz)
3274
- basename = Tools.ruby_case(Tools.class_name(clazz))
3275
- instance_eval <<-EOF
3276
- def #{basename}(*args, &block)
3277
- #{clazz}.new(*args, &block)
3278
- end
3279
- EOF
3280
- end
3281
-
3282
- def self.compile(expr, &block)
3283
- instance_eval(expr, &block)
3284
- end
3285
-
3286
- #
3287
- # Creates an Aggregator instance.
3288
- #
3289
- # This constructor can be used either by passing an attribute
3290
- # argument or a block that will be evaluated on a TupleHandle
3291
- # instance set on each aggregated tuple.
3292
- #
3293
- # Aggregator.new(:size) # will aggregate on tuple[:size]
3294
- # Aggregator.new{ size * price } # ... on tuple[:size] * tuple[:price]
3295
- #
3296
- def initialize(attribute = nil, options = {}, &block)
3297
- attribute, options = nil, attribute if attribute.is_a?(Hash)
3298
- @handle = Tools::TupleHandle.new
3299
- @options = default_options.merge(options)
3300
- @functor = Tools::TupleHandle.compile(attribute || block)
3301
- end
3302
-
3303
- #
3304
- # Returns the default options to use
3305
- #
3306
- def default_options
3307
- {}
3308
- end
3309
-
3310
- #
3311
- # Returns the least value, which is the one to use on an empty
3312
- # set.
3313
- #
3314
- # This method is intended to be overriden by subclasses; default
3315
- # implementation returns nil.
3316
- #
3317
- def least
3318
- nil
3319
- end
3320
-
3321
- #
3322
- # This method is called on each aggregated tuple and must return
3323
- # an updated _memo_ value. It can be seen as the block typically
3324
- # given to Enumerable.inject.
3325
- #
3326
- # The default implementation collects the pre-value on the tuple
3327
- # and delegates to _happens.
3328
- #
3329
- def happens(memo, tuple)
3330
- _happens(memo, @handle.set(tuple).evaluate(@functor))
3331
- end
3332
-
3333
- #
3334
- # This method finalizes a computation.
3335
- #
3336
- # Argument _memo_ is either _least_ or the result of aggregating
3337
- # through _happens_. The default implementation simply returns
3338
- # _memo_. The method is intended to be overriden for complex
3339
- # aggregations that need statefull information. See Avg for an
3340
- # example
3341
- #
3342
- def finalize(memo)
3343
- memo
3344
- end
3345
-
3346
- #
3347
- # Aggregates over an enumeration of tuples.
3348
- #
3349
- def aggregate(enum)
3350
- finalize(
3351
- enum.inject(least){|memo,tuple|
3352
- happens(memo, tuple)
3353
- })
3354
- end
3355
-
3356
- protected
3357
-
3358
- #
3359
- # @see happens.
3360
- #
3361
- # This method is intended to be overriden and returns _value_
3362
- # by default, making this aggregator a "Last" one...
3363
- #
3364
- def _happens(memo, value)
3365
- value
198
+ def Alf.Command()
199
+ Quickl::Command(){|builder|
200
+ builder.command_parent = Alf::Command::Main
201
+ builder.doc_extractor = DOC_EXTRACTOR
202
+ yield(builder) if block_given?
203
+ }
3366
204
  end
205
+
206
+ require 'alf/command/main'
207
+ require 'alf/command/exec'
208
+ require 'alf/command/help'
209
+ require 'alf/command/show'
210
+ end # module Command
3367
211
 
3368
- #
3369
- # Defines a COUNT aggregation operator
3370
- #
3371
- class Count < Aggregator
3372
- def least(); 0; end
3373
- def happens(memo, tuple) memo + 1; end
3374
- end # class Count
3375
-
3376
- #
3377
- # Defines a SUM aggregation operator
3378
- #
3379
- class Sum < Aggregator
3380
- def least(); 0; end
3381
- def _happens(memo, val) memo + val; end
3382
- end # class Sum
3383
-
3384
- #
3385
- # Defines an AVG aggregation operator
3386
- #
3387
- class Avg < Aggregator
3388
- def least(); [0.0, 0.0]; end
3389
- def _happens(memo, val) [memo.first + val, memo.last + 1]; end
3390
- def finalize(memo) memo.first / memo.last end
3391
- end # class Sum
3392
-
3393
- #
3394
- # Defines a MIN aggregation operator
3395
- #
3396
- class Min < Aggregator
3397
- def least(); nil; end
3398
- def _happens(memo, val)
3399
- memo.nil? ? val : (memo < val ? memo : val)
3400
- end
3401
- end # class Min
3402
-
3403
- #
3404
- # Defines a MAX aggregation operator
3405
- #
3406
- class Max < Aggregator
3407
- def least(); nil; end
3408
- def _happens(memo, val)
3409
- memo.nil? ? val : (memo > val ? memo : val)
3410
- end
3411
- end # class Max
3412
-
212
+ #
213
+ # Marker for all operators, relational and non-relational ones.
214
+ #
215
+ module Operator
216
+ include Iterator, Tools
217
+
3413
218
  #
3414
- # Defines a COLLECT aggregation operator
219
+ # Operator factory
3415
220
  #
3416
- class Group < Aggregator
3417
- def initialize(*attrs)
3418
- super(nil, {}){
3419
- Tools.tuple_collect(attrs){|k| [k, self.send(k)] }
3420
- }
3421
- end
3422
- def least(); Set.new; end
3423
- def _happens(memo, val)
3424
- memo << val
3425
- end
3426
- def finalize(memo)
3427
- Relation.coerce memo
221
+ def Alf.Operator()
222
+ Alf.Command() do |b|
223
+ b.instance_module Alf::Operator
3428
224
  end
3429
225
  end
226
+
227
+ require 'alf/operator/class_methods'
228
+ require 'alf/operator/signature'
229
+ require 'alf/operator/base'
230
+ require 'alf/operator/nullary'
231
+ require 'alf/operator/unary'
232
+ require 'alf/operator/binary'
233
+ require 'alf/operator/cesure'
234
+ require 'alf/operator/transform'
235
+ require 'alf/operator/shortcut'
236
+ require 'alf/operator/experimental'
237
+
3430
238
 
3431
239
  #
3432
- # Defines a COLLECT aggregation operator
3433
- #
3434
- class Collect < Aggregator
3435
- def least(); []; end
3436
- def _happens(memo, val)
3437
- memo << val
3438
- end
3439
- end
3440
-
3441
- #
3442
- # Defines a CONCAT aggregation operator
3443
- #
3444
- class Concat < Aggregator
3445
- def least(); ""; end
3446
- def default_options
3447
- {:before => "", :after => "", :between => ""}
3448
- end
3449
- def _happens(memo, val)
3450
- memo << options[:between].to_s unless memo.empty?
3451
- memo << val.to_s
3452
- end
3453
- def finalize(memo)
3454
- options[:before].to_s + memo + options[:after].to_s
3455
- end
3456
- end
3457
-
3458
- end # class Aggregator
3459
-
3460
- #
3461
- # Base class for implementing buffers.
3462
- #
3463
- class Buffer
3464
-
3465
- #
3466
- # Keeps tuples ordered on a specific key
3467
- #
3468
- # Example:
3469
- #
3470
- # sorted = Buffer::Sorted.new OrderingKey.new(...)
3471
- # sorted.add_all(...)
3472
- # sorted.each do |tuple|
3473
- # # tuples are ordered here
3474
- # end
240
+ # Marker module and namespace for non relational operators
3475
241
  #
3476
- class Sorted < Buffer
242
+ module NonRelational
243
+ require 'alf/operator/non_relational/autonum'
244
+ require 'alf/operator/non_relational/defaults'
245
+ require 'alf/operator/non_relational/compact'
246
+ require 'alf/operator/non_relational/sort'
247
+ require 'alf/operator/non_relational/clip'
248
+ require 'alf/operator/non_relational/coerce'
249
+ require 'alf/operator/non_relational/generator'
3477
250
 
3478
251
  #
3479
- # Creates a buffer instance with an ordering key
252
+ # Yields the block with each operator module in turn
3480
253
  #
3481
- def initialize(ordering_key)
3482
- @ordering_key = ordering_key
3483
- @buffer = []
3484
- end
3485
-
3486
- #
3487
- # Adds all elements of an iterator to the buffer
3488
- #
3489
- def add_all(enum)
3490
- sorter = @ordering_key.sorter
3491
- @buffer = merge_sort(@buffer, enum.to_a.sort(&sorter), sorter)
254
+ def self.each
255
+ constants.each do |c|
256
+ val = const_get(c)
257
+ yield(val) if val.ancestors.include?(Operator::NonRelational)
258
+ end
3492
259
  end
260
+
261
+ end # NonRelational
262
+
263
+ #
264
+ # Marker module and namespace for relational operators
265
+ #
266
+ module Relational
267
+ require 'alf/operator/relational/project'
268
+ require 'alf/operator/relational/extend'
269
+ require 'alf/operator/relational/rename'
270
+ require 'alf/operator/relational/restrict'
271
+ require 'alf/operator/relational/join'
272
+ require 'alf/operator/relational/intersect'
273
+ require 'alf/operator/relational/minus'
274
+ require 'alf/operator/relational/union'
275
+ require 'alf/operator/relational/matching'
276
+ require 'alf/operator/relational/not_matching'
277
+ require 'alf/operator/relational/wrap'
278
+ require 'alf/operator/relational/unwrap'
279
+ require 'alf/operator/relational/group'
280
+ require 'alf/operator/relational/ungroup'
281
+ require 'alf/operator/relational/summarize'
282
+ require 'alf/operator/relational/rank'
283
+ require 'alf/operator/relational/quota'
3493
284
 
285
+ #
286
+ # Yields the block with each operator module in turn
3494
287
  #
3495
- # (see Buffer#each)
3496
- #
3497
- def each
3498
- @buffer.each(&Proc.new)
288
+ def self.each
289
+ constants.each do |c|
290
+ val = const_get(c)
291
+ yield(val) if val.ancestors.include?(Operator::Relational)
292
+ end
3499
293
  end
3500
-
3501
- private
3502
294
 
3503
- # Implements a merge sort between two iterators s1 and s2
3504
- def merge_sort(s1, s2, sorter)
3505
- (s1 + s2).sort(&sorter)
3506
- end
3507
-
3508
- end # class Buffer::Sorted
295
+ end # module Relational
3509
296
 
3510
- end # class Buffer
297
+ end # module Operator
3511
298
 
3512
299
  #
3513
- # Defines a Heading, that is, a set of attribute (name,domain) pairs.
300
+ # Aggregation operator.
3514
301
  #
3515
- class Heading
3516
-
3517
- #
3518
- # Creates a Heading instance
3519
- #
3520
- # @param [Hash] a hash of attribute (name, type) pairs where name is
3521
- # a Symbol and type is a Class
3522
- #
3523
- def self.[](attributes)
3524
- Heading.new(attributes)
3525
- end
302
+ class Aggregator
303
+ require 'alf/aggregator/class_methods'
304
+ require 'alf/aggregator/base'
305
+ require 'alf/aggregator/aggregators'
3526
306
 
3527
- # @return [Hash] a (freezed) hash of (name, type) pairs
3528
- attr_reader :attributes
3529
-
3530
- #
3531
- # Creates a Heading instance
3532
- #
3533
- # @param [Hash] a hash of attribute (name, type) pairs where name is
3534
- # a Symbol and type is a Class
3535
- #
3536
- def initialize(attributes)
3537
- @attributes = attributes.dup.freeze
3538
- end
3539
-
3540
- #
3541
- # Returns heading's cardinality
3542
- #
3543
- def cardinality
3544
- attributes.size
3545
- end
3546
- alias :size :cardinality
3547
- alias :count :cardinality
3548
-
3549
- #
3550
- # Returns heading's hash code
3551
- #
3552
- def hash
3553
- @hash ||= attributes.hash
3554
- end
3555
-
3556
- #
3557
- # Checks equality with other heading
3558
- #
3559
- def ==(other)
3560
- other.is_a?(Heading) && (other.attributes == attributes)
3561
- end
3562
- alias :eql? :==
3563
-
3564
- #
3565
- # Converts this heading to a Hash of (name,type) pairs
3566
- #
3567
- def to_hash
3568
- attributes.dup
3569
- end
3570
-
3571
- #
3572
- # Returns a Heading literal
3573
- #
3574
- def to_ruby_literal
3575
- attributes.empty? ?
3576
- "Alf::Heading::EMPTY" :
3577
- "Alf::Heading[#{Myrrha.to_ruby_literal(attributes)[1...-1]}]"
3578
- end
3579
- alias :inspect :to_ruby_literal
3580
-
3581
- EMPTY = Alf::Heading.new({})
3582
- end # class Heading
307
+ end # class Aggregator
3583
308
 
309
+ #
310
+ # Base class for implementing buffers.
311
+ #
312
+ class Buffer
313
+ require 'alf/buffer/sorted'
314
+
315
+ end # class Buffer
316
+
3584
317
  #
3585
318
  # Defines an in-memory relation data structure.
3586
319
  #
@@ -3600,150 +333,10 @@ module Alf
3600
333
  #
3601
334
  class Relation
3602
335
  include Iterator
3603
-
3604
- protected
3605
-
3606
- # @return [Set] the set of tuples
3607
- attr_reader :tuples
3608
-
3609
- public
3610
-
3611
- #
3612
- # Creates a Relation instance.
3613
- #
3614
- # @param [Set] tuples a set of tuples
3615
- #
3616
- def initialize(tuples)
3617
- raise ArgumentError unless tuples.is_a?(Set)
3618
- @tuples = tuples
3619
- end
3620
-
3621
- #
3622
- # Coerces `val` to a relation.
3623
- #
3624
- # Recognized arguments are: Relation (identity coercion), Set of ruby hashes,
3625
- # Array of ruby hashes, Alf::Iterator.
3626
- #
3627
- # @return [Relation] a relation instance for the given set of tuples
3628
- # @raise [ArgumentError] when `val` is not recognized
3629
- #
3630
- def self.coerce(val)
3631
- case val
3632
- when Relation
3633
- val
3634
- when Set
3635
- Relation.new(val)
3636
- when Array
3637
- Relation.new val.to_set
3638
- when Iterator
3639
- Relation.new val.to_set
3640
- else
3641
- raise ArgumentError, "Unable to coerce #{val} to a Relation"
3642
- end
3643
- end
3644
-
3645
- # (see Relation.coerce)
3646
- def self.[](*tuples)
3647
- coerce(tuples)
3648
- end
3649
-
3650
- #
3651
- # (see Iterator#each)
3652
- #
3653
- def each(&block)
3654
- tuples.each(&block)
3655
- end
3656
-
3657
- #
3658
- # Returns relation's cardinality (number of tuples).
3659
- #
3660
- # @return [Integer] relation's cardinality
3661
- #
3662
- def cardinality
3663
- tuples.size
3664
- end
3665
- alias :size :cardinality
3666
- alias :count :cardinality
3667
-
3668
- # Returns true if this relation is empty
3669
- def empty?
3670
- cardinality == 0
3671
- end
3672
-
3673
- #
3674
- # Install the DSL through iteration over defined operators
3675
- #
3676
- Operator::each do |op_class|
3677
- meth_name = Tools.ruby_case(Tools.class_name(op_class)).to_sym
3678
- if op_class.unary?
3679
- define_method(meth_name) do |*args|
3680
- op = op_class.new(*args).pipe(self)
3681
- Relation.coerce(op)
3682
- end
3683
- elsif op_class.binary?
3684
- define_method(meth_name) do |right, *args|
3685
- op = op_class.new(*args).pipe([self, Iterator.coerce(right)])
3686
- Relation.coerce(op)
3687
- end
3688
- else
3689
- raise "Unexpected operator #{op_class}"
3690
- end
3691
- end # Operators::each
3692
-
3693
- alias :+ :union
3694
- alias :- :minus
3695
336
 
3696
- # Shortcut for project(attributes, true)
3697
- def allbut(attributes)
3698
- project(attributes, true)
3699
- end
3700
-
3701
- #
3702
- # (see Object#hash)
3703
- #
3704
- def hash
3705
- @tuples.hash
3706
- end
3707
-
3708
- #
3709
- # (see Object#==)
3710
- #
3711
- def ==(other)
3712
- return nil unless other.is_a?(Relation)
3713
- other.tuples == self.tuples
3714
- end
3715
- alias :eql? :==
3716
-
3717
- #
3718
- # Returns a textual representation of this relation
3719
- #
3720
- def to_s
3721
- Alf::Renderer.text(self).execute("")
3722
- end
3723
-
3724
- #
3725
- # Returns an array with all tuples in this relation.
3726
- #
3727
- # @param [Tools::OrderingKey] an optional ordering key (any argument
3728
- # recognized by OrderingKey.coerce is supported here).
3729
- # @return [Array] an array of hashes, in requested order (if specified)
3730
- #
3731
- def to_a(okey = nil)
3732
- okey = Tools::OrderingKey.coerce(okey) if okey
3733
- ary = tuples.to_a
3734
- ary.sort!(&okey.sorter) if okey
3735
- ary
3736
- end
3737
-
3738
- #
3739
- # Returns a literal representation of this relation
3740
- #
3741
- def to_ruby_literal
3742
- "Alf::Relation[" +
3743
- tuples.collect{|t| Myrrha.to_ruby_literal(t)}.join(', ') + "]"
3744
- end
3745
- alias :inspect :to_ruby_literal
3746
-
337
+ require "alf/relation/class_methods"
338
+ require "alf/relation/instance_methods"
339
+
3747
340
  DEE = Relation.coerce([{}])
3748
341
  DUM = Relation.coerce([])
3749
342
  end # class Relation
@@ -3764,123 +357,10 @@ module Alf
3764
357
  # is not intended to be directly included by third-party classes.
3765
358
  #
3766
359
  module Lispy
360
+ require 'alf/lispy/instance_methods'
3767
361
 
3768
- alias :ruby_extend :extend
3769
-
3770
- # The environment
3771
- attr_accessor :environment
3772
-
3773
- #
3774
- # Compiles a query expression given by a String or a block and returns
3775
- # the result (typically a tuple iterator)
3776
- #
3777
- # Example
3778
- #
3779
- # # with a string
3780
- # op = compile "(restrict :suppliers, lambda{ city == 'London' })"
3781
- #
3782
- # # or with a block
3783
- # op = compile {
3784
- # (restrict :suppliers, lambda{ city == 'London' })
3785
- # }
3786
- #
3787
- # @param [String] expr a Lispy expression to compile
3788
- # @return [Iterator] the iterator resulting from compilation
3789
- #
3790
- def compile(expr = nil, path = nil, &block)
3791
- if expr.nil?
3792
- instance_eval(&block)
3793
- else
3794
- b = _clean_binding
3795
- (path ? Kernel.eval(expr, b, path) : Kernel.eval(expr, b))
3796
- end
3797
- end
3798
-
3799
- #
3800
- # Evaluates a query expression given by a String or a block and returns
3801
- # the result as an in-memory relation (Alf::Relation)
3802
- #
3803
- # Example:
3804
- #
3805
- # # with a string
3806
- # rel = evaluate "(restrict :suppliers, lambda{ city == 'London' })"
3807
- #
3808
- # # or with a block
3809
- # rel = evaluate {
3810
- # (restrict :suppliers, lambda{ city == 'London' })
3811
- # }
3812
- #
3813
- def evaluate(expr = nil, path = nil, &block)
3814
- compile(expr, path, &block).to_rel
3815
- end
3816
-
3817
- #
3818
- # Delegated to the current environment
3819
- #
3820
- # This method returns the dataset associated to a given name. The result
3821
- # may depend on the current environment, but is generally an Iterator,
3822
- # often a Reader instance.
3823
- #
3824
- # @param [Symbol] name name of the dataset to retrieve
3825
- # @return [Iterator] the dataset as an iterator
3826
- # @see Environment#dataset
3827
- #
3828
- def dataset(name)
3829
- raise "Environment not set" unless @environment
3830
- @environment.dataset(name)
3831
- end
3832
-
3833
- # Functional equivalent to Alf::Relation[...]
3834
- def relation(*tuples)
3835
- Relation.coerce(tuples)
3836
- end
3837
-
3838
- #
3839
- # Install the DSL through iteration over defined operators
3840
- #
3841
- Operator::each do |op_class|
3842
- meth_name = Tools.ruby_case(Tools.class_name(op_class)).to_sym
3843
- if op_class.unary?
3844
- define_method(meth_name) do |child, *args|
3845
- child = Iterator.coerce(child, environment)
3846
- op_class.new(*args).pipe(child, environment)
3847
- end
3848
- elsif op_class.binary?
3849
- define_method(meth_name) do |left, right, *args|
3850
- operands = [left, right].collect{|x| Iterator.coerce(x, environment)}
3851
- op_class.new(*args).pipe(operands, environment)
3852
- end
3853
- else
3854
- raise "Unexpected operator #{op_class}"
3855
- end
3856
- end # Operators::each
3857
-
3858
- def allbut(child, attributes)
3859
- (project child, attributes, true)
3860
- end
3861
-
3862
- #
3863
- # Runs a command as in shell.
3864
- #
3865
- # Example:
3866
- #
3867
- # lispy = Alf.lispy(Alf::Environment.examples)
3868
- # op = lispy.run(['restrict', 'suppliers', '--', "city == 'Paris'"])
3869
- #
3870
- def run(argv, requester = nil)
3871
- Alf::Command::Main.new(environment).run(argv, requester)
3872
- end
3873
-
3874
- Agg = Alf::Aggregator
3875
362
  DUM = Relation::DUM
3876
363
  DEE = Relation::DEE
3877
-
3878
- private
3879
-
3880
- def _clean_binding
3881
- binding
3882
- end
3883
-
3884
364
  end # module Lispy
3885
365
 
3886
366
  #
@@ -3906,5 +386,4 @@ module Alf
3906
386
  end
3907
387
 
3908
388
  end # module Alf
3909
- require "alf/text"
3910
- require "alf/yaml"
389
+ require "alf/extra"