piglet 0.2.5 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -4
- data/Gemfile +10 -0
- data/Gemfile.lock +53 -0
- data/README.rdoc +74 -38
- data/Rakefile +10 -1
- data/lib/piglet.rb +5 -1
- data/lib/piglet/field/call_expression.rb +7 -2
- data/lib/piglet/field/direct_expression.rb +28 -0
- data/lib/piglet/field/field.rb +73 -3
- data/lib/piglet/field/infix_expression.rb +14 -9
- data/lib/piglet/field/map_value.rb +17 -0
- data/lib/piglet/field/prefix_expression.rb +6 -3
- data/lib/piglet/field/reference.rb +5 -7
- data/lib/piglet/field/rename.rb +7 -5
- data/lib/piglet/field/suffix_expression.rb +4 -2
- data/lib/piglet/field/udf_expression.rb +19 -2
- data/lib/piglet/inout/load.rb +2 -2
- data/lib/piglet/interpreter.rb +8 -18
- data/lib/piglet/relation/block_context.rb +41 -0
- data/lib/piglet/relation/cogroup.rb +2 -1
- data/lib/piglet/relation/cross.rb +2 -2
- data/lib/piglet/relation/distinct.rb +2 -2
- data/lib/piglet/relation/filter.rb +2 -2
- data/lib/piglet/relation/foreach.rb +2 -2
- data/lib/piglet/relation/group.rb +2 -2
- data/lib/piglet/relation/join.rb +2 -1
- data/lib/piglet/relation/limit.rb +2 -2
- data/lib/piglet/relation/nested_foreach.rb +60 -0
- data/lib/piglet/relation/order.rb +4 -2
- data/lib/piglet/relation/relation.rb +43 -32
- data/lib/piglet/relation/sample.rb +2 -2
- data/lib/piglet/relation/split.rb +5 -5
- data/lib/piglet/relation/stream.rb +2 -1
- data/lib/piglet/relation/union.rb +2 -2
- data/piglet.gemspec +126 -0
- data/spec/piglet/field/field_spec.rb +7 -2
- data/spec/piglet/interpreter_spec.rb +6 -6
- data/spec/piglet/relation/relation_spec.rb +7 -4
- data/spec/piglet/relation/split_spec.rb +3 -1
- data/spec/piglet/relation/union_spec.rb +5 -7
- data/spec/piglet_spec.rb +76 -31
- data/spec/spec_helper.rb +9 -0
- data/tasks/gem.rake +16 -19
- data/tasks/rdoc.rake +1 -3
- metadata +34 -11
- data/TODO +0 -2
data/.gitignore
CHANGED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
---
|
2
|
+
dependencies:
|
3
|
+
rake:
|
4
|
+
group:
|
5
|
+
- :development
|
6
|
+
version: ">= 0"
|
7
|
+
rspec:
|
8
|
+
group:
|
9
|
+
- :development
|
10
|
+
version: ">= 0"
|
11
|
+
sdoc:
|
12
|
+
group:
|
13
|
+
- :development
|
14
|
+
version: ">= 0"
|
15
|
+
rcov:
|
16
|
+
group:
|
17
|
+
- :development
|
18
|
+
version: ">= 0"
|
19
|
+
jeweler:
|
20
|
+
group:
|
21
|
+
- :development
|
22
|
+
version: ">= 0"
|
23
|
+
rdoc:
|
24
|
+
group:
|
25
|
+
- :development
|
26
|
+
version: ">= 2.4.0"
|
27
|
+
specs:
|
28
|
+
- rake:
|
29
|
+
version: 0.8.7
|
30
|
+
- json_pure:
|
31
|
+
version: 1.4.3
|
32
|
+
- gemcutter:
|
33
|
+
version: 0.5.0
|
34
|
+
- git:
|
35
|
+
version: 1.2.5
|
36
|
+
- rubyforge:
|
37
|
+
version: 2.0.4
|
38
|
+
- jeweler:
|
39
|
+
version: 1.4.0
|
40
|
+
- json:
|
41
|
+
version: 1.4.3
|
42
|
+
- rcov:
|
43
|
+
version: 0.9.8
|
44
|
+
- rdoc:
|
45
|
+
version: 2.5.8
|
46
|
+
- rspec:
|
47
|
+
version: 1.3.0
|
48
|
+
- sdoc:
|
49
|
+
version: 0.2.19
|
50
|
+
hash: 4f040144929b22ea17e9c74ab3cc8dd9db5a6fcf
|
51
|
+
sources:
|
52
|
+
- Rubygems:
|
53
|
+
uri: http://gemcutter.org
|
data/README.rdoc
CHANGED
@@ -71,7 +71,7 @@ to standard out.
|
|
71
71
|
|
72
72
|
a = load 'input', :schema => [:a, :b, :c]
|
73
73
|
b = a.group :c
|
74
|
-
c = b.foreach {
|
74
|
+
c = b.foreach { [self[0], self[1].a.max, self[1].b.max] }
|
75
75
|
store c, 'output'
|
76
76
|
|
77
77
|
will result in the following Pig Latin:
|
@@ -175,27 +175,29 @@ In light of the above +group+ works exactly as you would expect: <code>a.group(:
|
|
175
175
|
|
176
176
|
== +filter+
|
177
177
|
|
178
|
-
+filter+ works a little bit different from the operators discussed above. It takes a block in which you specify the arguments to the operator. The block
|
178
|
+
+filter+ works a little bit different from the operators discussed above. It takes a block in which you specify the arguments to the operator. The block is interpreted in the context of the relation it's performed on.
|
179
179
|
|
180
|
-
The thing that sets +filter+ apart from the operators above is it needs to support field expressions. For example the <code>x == 3</code> in <code>FILTER a BY x == 3</code>. Piglet supports simple field operators like <code>==</code> or <code>%</code> quite transparently, but more complex expressions can be less elegant, see ”Limitations” below. For example <code>a.filter {
|
180
|
+
The thing that sets +filter+ apart from the operators above is it needs to support field expressions. For example the <code>x == 3</code> in <code>FILTER a BY x == 3</code>. Piglet supports simple field operators like <code>==</code> or <code>%</code> quite transparently, but more complex expressions can be less elegant, see ”Limitations” below. For example <code>a.filter { x == 3 }</code> works fine, but <code>a.filter { x != 3 }</code> doesn't (it has to do with how Ruby parses expressions, unfortunately). To do not equals you can either do <code>x.ne(3)</code> or <code>(x == 3).not</code>. See “Limitations” below for more info on field expressions.
|
181
181
|
|
182
|
-
The way field expressions are done in Piglet is that you
|
182
|
+
The way field expressions are done in Piglet is that you simply use fields as if they were existing local variables, and then call methods on those to build up an expression. Some Ruby operators can be used, but other operations are only available as methods, again, see “Limitations” below for a complete reference.
|
183
183
|
|
184
|
-
a.filter {
|
185
|
-
a.filter {
|
184
|
+
a.filter { x == 3 } # => FILTER a BY x == 3
|
185
|
+
a.filter { (x > 4).or(y < 2) } # => FILTER a BY x > 4 OR r < 2
|
186
|
+
|
187
|
+
Be careful about the names of the fields. Ruby's scoping rules apply, which means that if there's already a variable defined outside of the block with the name +x+ Ruby will assume you meant that variable. If you get strange results, try prefixing with +self+, e.g. +self.x+.
|
186
188
|
|
187
189
|
== +foreach+
|
188
190
|
|
189
|
-
<code>FOREACH … GENERATE</code> is probably the most complex operator in Pig Latin. Piglet tries its best to support most of it, but there are things that are still missing -- see “Limitations”. Most things should work without problems though. The operator in Piglet is called simply +foreach+, and just as +filter+ it takes a block, which
|
191
|
+
<code>FOREACH … GENERATE</code> is probably the most complex operator in Pig Latin. Piglet tries its best to support most of it, but there are things that are still missing -- see “Limitations”. Most things should work without problems though. The operator in Piglet is called simply +foreach+, and just as +filter+ it takes a block, which is interpreted in the context of the relation +foreach+ was called on.
|
190
192
|
|
191
193
|
In contrast to +filter+, +foreach+ should return an array of field references and expressions. This array describes the schema of the new relation. The expressions used in +foreach+ are usually not the same as those used in +filter+, although all are of course available in both situations. In +foreach+ common operators to use are the aggregate functions (called “eval functions” in the Pig Latin manual) like +MAX+, +MIN+, +COUNT+, +SUM+, etc. In Piglet these are method calls on field objects. Let's look at an example (I like to use lots of whitespace and newlines for +foreach+ operations, because otherwise it gets very messy):
|
192
194
|
|
193
|
-
a.foreach do
|
195
|
+
a.foreach do
|
194
196
|
[
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
197
|
+
x.max,
|
198
|
+
y.min,
|
199
|
+
z.count,
|
200
|
+
w + q
|
199
201
|
]
|
200
202
|
end
|
201
203
|
|
@@ -207,27 +209,45 @@ this would be translated into:
|
|
207
209
|
COUNT(z),
|
208
210
|
w + q;
|
209
211
|
|
210
|
-
pretty straight forward. What if you want to give the fields of the new relation proper names? In Pig Latin you would write <code>MAX(x) AS (x_max)</code>, and in Piglet you can write <code>
|
212
|
+
pretty straight forward. What if you want to give the fields of the new relation proper names? In Pig Latin you would write <code>MAX(x) AS (x_max)</code>, and in Piglet you can write <code>x.max.as(:x_max)</code>. This is such a common thing to do that I'm thinking of adding some kind of feature that automatically adds <code>AS</code> clauses where appropriate, but it's not there yet.
|
213
|
+
|
214
|
+
If you want to access fields with $0, $1, etc. you can use +self[0]+, +self[1]+:
|
215
|
+
|
216
|
+
a.foreach { [self[0].as(:x)] } # => FOREACH a GENERATE $0
|
211
217
|
|
212
218
|
+foreach+ is a very complex beast, and this is just an overview, so I'll just give you a few more examples that are not obvious:
|
213
219
|
|
214
220
|
Literal values can be specified using +literal+:
|
215
221
|
|
216
|
-
a.foreach {
|
222
|
+
a.foreach { [literal('hello').as(:hello)] } # => FOREACH a GENERATE 'hello' AS hello
|
217
223
|
|
218
224
|
Binary conditionals, a.k.a. the ternary operator are supported through +test+ (unfortunately the Ruby ternary operator can't be overridden):
|
219
225
|
|
220
|
-
a.foreach {
|
226
|
+
a.foreach { [test(x == 3, y, z)] } # => FOREACH a GENERATE (x == 3 ? y : z)
|
221
227
|
|
222
228
|
The first argument to +test+ is the test expression, the second is the if-true expression and the third is the if-false expression.
|
223
229
|
|
224
|
-
|
230
|
+
== +nested_foreach+
|
231
|
+
|
232
|
+
In Pig Latin you can use a different syntax if you have a relation with an inner bag, e.g:
|
233
|
+
|
234
|
+
x = FOREACH b {
|
235
|
+
S = FILTER a BY c == 'xyz';
|
236
|
+
GENERATE COUNT(s.z);
|
237
|
+
}
|
238
|
+
|
239
|
+
In Piglet you would write this as
|
240
|
+
|
241
|
+
x.nested_foreach {
|
242
|
+
s = a.filter { c == 'xyz' }
|
243
|
+
[s.z.count]
|
244
|
+
}
|
225
245
|
|
226
246
|
== +split+
|
227
247
|
|
228
|
-
The syntax of +split+ shouldn't be surprising if you've read this far, but there's perhaps some details that aren't obvious. To split a relation into a number of parts you call +split+ on the relation and pass a block in which you specify the expressions describing each shard. Just as with +filter+ and +foreach+ the block
|
248
|
+
The syntax of +split+ shouldn't be surprising if you've read this far, but there's perhaps some details that aren't obvious. To split a relation into a number of parts you call +split+ on the relation and pass a block in which you specify the expressions describing each shard. Just as with +filter+ and +foreach+ the block operates in the context of the relation +split+ is called on. +split+ returns an array containing the relation shards and you can use parallel assignment to make it look really nice:
|
229
249
|
|
230
|
-
b, c = a.split {
|
250
|
+
b, c = a.split { [x > 2, y == 3] } # => SPLIT a INTO b IF x > 2, c IF y == 3
|
231
251
|
|
232
252
|
== +cogroup+ & +join+
|
233
253
|
|
@@ -269,7 +289,7 @@ When you define a UDF it becomes available as a method in the interpreter scope.
|
|
269
289
|
|
270
290
|
define :awesome, :function => 'my.awesome.Function' # => DEFINE awesome my.awesome.Function
|
271
291
|
…
|
272
|
-
b = a.foreach {
|
292
|
+
b = a.foreach { [awesome(self[0]).as(:something_special)] } # => b = FOREACH a GENERATE awesome($0) AS something_special
|
273
293
|
|
274
294
|
If you need to register a JAR you can use +register+:
|
275
295
|
|
@@ -307,10 +327,10 @@ If you want to quote the value with backticks, pass <code>:backticks => true</co
|
|
307
327
|
Let's look at a more complex example:
|
308
328
|
|
309
329
|
students = load('students.txt', :schema => [%w(student chararray), %w(age int), %w(grade int)])
|
310
|
-
top_acheivers = students.filter {
|
311
|
-
name_and_age = top_acheivers.foreach {
|
330
|
+
top_acheivers = students.filter { grade == 5 }
|
331
|
+
name_and_age = top_acheivers.foreach { [student.as(:name), age] }
|
312
332
|
name_by_age = name_and_age.group(:age)
|
313
|
-
count_by_age = name_by_age.foreach {
|
333
|
+
count_by_age = name_by_age.foreach { [self[0].as(:age), r[1].name.count.as(:count)]}
|
314
334
|
store(count_by_age, 'student_counts_by_age.txt', :using => :pig_storage)
|
315
335
|
|
316
336
|
We load the file <code>students.txt</code> as a relation with three fields: <code>student</code>, a string, <code>age</code> an integer and <code>grade</code> another integer. Next we filter out the top acheivers with +filter+. +filter+ takes a block and that block gets a referece to the relation (the one +filter+ was called on), the result of the block will be the filter expression, in this case it's <code>grade == 5</code>.
|
@@ -338,8 +358,8 @@ My goal with Piglet was to add control of flow and reuse mechanisms to Pig, so I
|
|
338
358
|
|
339
359
|
input = load('input', :schema => %w(country browser site visit_duration))
|
340
360
|
%w(country browser site).each do |dimension|
|
341
|
-
grouped = input.group(dimension).foreach do
|
342
|
-
[
|
361
|
+
grouped = input.group(dimension).foreach do
|
362
|
+
[self[0], self[1].visit_duration.sum]
|
343
363
|
end
|
344
364
|
store(grouped, "output-#{dimension}")
|
345
365
|
end
|
@@ -360,8 +380,8 @@ We load a file that contains an ID field, three dimensions (country, browser and
|
|
360
380
|
But in Piglet it's as simple as looping over the names of the dimensions. You could even define a method that encapsulates the grouping, summing and storing (although in this case it would be a bit overkill):
|
361
381
|
|
362
382
|
def sum_dimension(relation, dimension)
|
363
|
-
grouped = relation.group(dimension).foreach do
|
364
|
-
[
|
383
|
+
grouped = relation.group(dimension).foreach do
|
384
|
+
[self[0], self[1].visit_duration.sum]
|
365
385
|
end
|
366
386
|
store(grouped, "output-#{dimension}")
|
367
387
|
end
|
@@ -384,6 +404,19 @@ and then use them just as any other operator:
|
|
384
404
|
|
385
405
|
small, medium, large = input.samples(0.01, 0.1, 0.5)
|
386
406
|
|
407
|
+
or what about an operator that returns the top _n_ items by some field:
|
408
|
+
|
409
|
+
module Piglet::Relation::Relation
|
410
|
+
# Returns the top _n_ tuples from a relation, ordered by _field_
|
411
|
+
def top(n, field)
|
412
|
+
order([field, :desc]).limit(n)
|
413
|
+
end
|
414
|
+
end
|
415
|
+
|
416
|
+
which can be used as
|
417
|
+
|
418
|
+
input.top(10, :score)
|
419
|
+
|
387
420
|
nifty, huh?
|
388
421
|
|
389
422
|
=== Types & schemas
|
@@ -393,7 +426,7 @@ Piglet knows the schema of relations, so you can do something else that Pig lack
|
|
393
426
|
relation = load('in', :schema => [:a, :b, :c])
|
394
427
|
relation.schema.field_names.each do |field|
|
395
428
|
grouped = relation.group(field)
|
396
|
-
counted = grouped.foreach {
|
429
|
+
counted = grouped.foreach { [self[1].count] }
|
397
430
|
store(counted, "out-#{field}")
|
398
431
|
end
|
399
432
|
|
@@ -415,7 +448,7 @@ The following Pig operators are supported:
|
|
415
448
|
* +DUMP+
|
416
449
|
* +EXPLAIN+
|
417
450
|
* +FILTER+
|
418
|
-
* <code>FOREACH … GENERATE</code>
|
451
|
+
* <code>FOREACH … GENERATE</code> (including <code>FOREACH { … GENERATE }</code>)
|
419
452
|
* +GROUP+
|
420
453
|
* +ILLUSTRATE+
|
421
454
|
* +JOIN+
|
@@ -429,13 +462,9 @@ The following Pig operators are supported:
|
|
429
462
|
* +STREAM+
|
430
463
|
* +UNION+
|
431
464
|
|
432
|
-
The following is currently not supported (but will be soon):
|
433
|
-
|
434
|
-
* <code>FOREACH { … } GENERATE</code>
|
435
|
-
|
436
465
|
The file commands (+cd+, +cat+, etc.) will probably not be supported for the forseeable future.
|
437
466
|
|
438
|
-
All the aggregate functions except
|
467
|
+
All the aggregate functions except one are supported:
|
439
468
|
|
440
469
|
* +AVG+
|
441
470
|
* +CONCAT+
|
@@ -446,11 +475,9 @@ All the aggregate functions except two are supported:
|
|
446
475
|
* +SIZE+
|
447
476
|
* +SUM+
|
448
477
|
* +TOKENIZE+
|
449
|
-
|
450
|
-
These are not supported yet:
|
451
|
-
|
452
|
-
* +DIFF+
|
453
478
|
* +FLATTEN+
|
479
|
+
|
480
|
+
+DIFF+ is not supported yet.
|
454
481
|
|
455
482
|
Piglet only supports most arithmetic and logic operators (see below) on fields -- but check the output and make sure that it's doing what you expect because some it's tricky to see where Piglet hijacks the operators and when it's Ruby that is running the show. I'm doing the best I can, but there are many things that can't be done, at least not in Ruby 1.8.
|
456
483
|
|
@@ -491,6 +518,10 @@ In the future I may add a way of manually suggesting relation aliases, so that t
|
|
491
518
|
|
492
519
|
You may also wonder why the relation aliases aren't in consecutive order. The reason is that they get their names in the order they are evaluated, and the interpreter walks the relation ancestry upwards from a +store+ (and it only evaluates a relation once).
|
493
520
|
|
521
|
+
=== Why the verbosity in the code generated from a nested +FOREACH+?
|
522
|
+
|
523
|
+
I'm working on it.
|
524
|
+
|
494
525
|
=== Why aren’t all operations included in the output?
|
495
526
|
|
496
527
|
If you try this Piglet code:
|
@@ -508,6 +539,11 @@ As a side effect of using +store+ and the other output operators as the trigger
|
|
508
539
|
|
509
540
|
Please contact me and give me the Piglet code and what you think the output should be. I'll try to either fix your Piglet code, or fix Piglet to do what you expect it to do.
|
510
541
|
|
542
|
+
== Contributors
|
543
|
+
|
544
|
+
* Theo Hultberg
|
545
|
+
* Ning Liang
|
546
|
+
|
511
547
|
== Copyright
|
512
548
|
|
513
|
-
© 2009-2010 Theo Hultberg / Iconara. See LICENSE for details.
|
549
|
+
© 2009-2010 Theo Hultberg / Iconara and contributors. See LICENSE for details.
|
data/Rakefile
CHANGED
data/lib/piglet.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# :main: README.rdoc
|
4
4
|
module Piglet # :nodoc:
|
5
|
-
VERSION = '0.
|
5
|
+
VERSION = '0.3.0'
|
6
6
|
|
7
7
|
class PigletError < StandardError; end
|
8
8
|
class NotSupportedError < PigletError; end
|
@@ -21,11 +21,13 @@ module Piglet # :nodoc:
|
|
21
21
|
end
|
22
22
|
|
23
23
|
module Relation
|
24
|
+
autoload :BlockContext, 'piglet/relation/block_context'
|
24
25
|
autoload :Cogroup, 'piglet/relation/cogroup'
|
25
26
|
autoload :Cross, 'piglet/relation/cross'
|
26
27
|
autoload :Distinct, 'piglet/relation/distinct'
|
27
28
|
autoload :Filter, 'piglet/relation/filter'
|
28
29
|
autoload :Foreach, 'piglet/relation/foreach'
|
30
|
+
autoload :NestedForeach, 'piglet/relation/nested_foreach'
|
29
31
|
autoload :Group, 'piglet/relation/group'
|
30
32
|
autoload :Join, 'piglet/relation/join'
|
31
33
|
autoload :Limit, 'piglet/relation/limit'
|
@@ -41,8 +43,10 @@ module Piglet # :nodoc:
|
|
41
43
|
autoload :BinaryConditional, 'piglet/field/binary_conditional'
|
42
44
|
autoload :CallExpression, 'piglet/field/call_expression'
|
43
45
|
autoload :InfixExpression, 'piglet/field/infix_expression'
|
46
|
+
autoload :DirectExpression, 'piglet/field/direct_expression'
|
44
47
|
autoload :Literal, 'piglet/field/literal'
|
45
48
|
autoload :Field, 'piglet/field/field'
|
49
|
+
autoload :MapValue, 'piglet/field/map_value'
|
46
50
|
autoload :PrefixExpression, 'piglet/field/prefix_expression'
|
47
51
|
autoload :Reference, 'piglet/field/reference'
|
48
52
|
autoload :Rename, 'piglet/field/rename'
|
@@ -9,14 +9,19 @@ module Piglet
|
|
9
9
|
options ||= {}
|
10
10
|
@function_name, @inner_expression = function_name, inner_expression
|
11
11
|
@type = options[:type] || inner_expression.type
|
12
|
+
@predecessors = [inner_expression]
|
12
13
|
end
|
13
14
|
|
14
15
|
def simple?
|
15
16
|
false
|
16
17
|
end
|
17
18
|
|
18
|
-
def to_s
|
19
|
-
|
19
|
+
def to_s(inner=false)
|
20
|
+
if inner
|
21
|
+
"#{@function_name}(#{@inner_expression.field_alias})"
|
22
|
+
else
|
23
|
+
"#{@function_name}(#{@inner_expression})"
|
24
|
+
end
|
20
25
|
end
|
21
26
|
end
|
22
27
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Piglet
|
4
|
+
module Field
|
5
|
+
class DirectExpression
|
6
|
+
include Field
|
7
|
+
|
8
|
+
attr_reader :string
|
9
|
+
|
10
|
+
def initialize(string, predecessor)
|
11
|
+
@string = string
|
12
|
+
@predecessors = [predecessor]
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_s(inner=false)
|
16
|
+
@string
|
17
|
+
end
|
18
|
+
|
19
|
+
def method_missing(name, *args)
|
20
|
+
if name.to_s =~ /^\w+$/ && args.empty?
|
21
|
+
field(name)
|
22
|
+
else
|
23
|
+
super
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/piglet/field/field.rb
CHANGED
@@ -6,7 +6,7 @@ module Piglet
|
|
6
6
|
SYMBOLIC_OPERATORS = [:==, :>, :<, :>=, :<=, :%, :+, :-, :*, :/]
|
7
7
|
FUNCTIONS = [:avg, :count, :max, :min, :size, :sum, :tokenize]
|
8
8
|
|
9
|
-
attr_reader :name, :type
|
9
|
+
attr_reader :name, :type, :predecessors
|
10
10
|
|
11
11
|
FUNCTIONS.each do |fun|
|
12
12
|
define_method(fun) do
|
@@ -69,9 +69,63 @@ module Piglet
|
|
69
69
|
InfixExpression.new(op.to_s, self, other, :type => symbolic_operator_return_type(op, self, other))
|
70
70
|
end
|
71
71
|
end
|
72
|
+
|
73
|
+
def generate_field_alias
|
74
|
+
if @parent.respond_to?(:next_field_alias)
|
75
|
+
@parent.next_field_alias
|
76
|
+
elsif predecessors.first.respond_to?(:generate_field_alias)
|
77
|
+
predecessors.first.generate_field_alias
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def field_alias
|
82
|
+
@field_alias ||= generate_field_alias
|
83
|
+
end
|
84
|
+
|
85
|
+
def predecessors
|
86
|
+
@predecessors ||= []
|
87
|
+
end
|
88
|
+
|
89
|
+
def distinct
|
90
|
+
DirectExpression.new("DISTINCT #{field_alias}", self)
|
91
|
+
end
|
92
|
+
|
93
|
+
def limit(size)
|
94
|
+
DirectExpression.new("LIMIT #{field_alias} #{size}", self)
|
95
|
+
end
|
96
|
+
|
97
|
+
def sample(rate)
|
98
|
+
DirectExpression.new("SAMPLE #{field_alias} #{rate}", self)
|
99
|
+
end
|
100
|
+
|
101
|
+
def order(*args)
|
102
|
+
fields, options = split_at_options(args)
|
103
|
+
fields = *fields
|
104
|
+
expression = Relation::Order.new(self, @interpreter, fields, options).to_s
|
105
|
+
DirectExpression.new(expression, self)
|
106
|
+
end
|
107
|
+
|
108
|
+
def filter(&block)
|
109
|
+
dummy_relation = DummyRelation.new(self.send(:alias))
|
110
|
+
context = Relation::BlockContext.new(dummy_relation, @interpreter)
|
111
|
+
expression = context.instance_eval(&block)
|
112
|
+
DirectExpression.new("FILTER #{field_alias} BY #{expression}", self)
|
113
|
+
end
|
114
|
+
|
115
|
+
def flatten
|
116
|
+
DirectExpression.new("FLATTEN(#{field_alias})", self)
|
117
|
+
end
|
118
|
+
|
119
|
+
def field(name)
|
120
|
+
Reference.new(name, self, :explicit_ancestry => true)
|
121
|
+
end
|
122
|
+
|
123
|
+
def get(key)
|
124
|
+
MapValue.new(key, self)
|
125
|
+
end
|
72
126
|
|
73
127
|
protected
|
74
|
-
|
128
|
+
|
75
129
|
def parenthesise(expr)
|
76
130
|
if expr.respond_to?(:simple?) && ! expr.simple?
|
77
131
|
"(#{expr})"
|
@@ -131,6 +185,22 @@ module Piglet
|
|
131
185
|
nil
|
132
186
|
end
|
133
187
|
end
|
134
|
-
|
188
|
+
|
189
|
+
def split_at_options(parameters)
|
190
|
+
if parameters.last.is_a? Hash
|
191
|
+
[parameters[0..-2], parameters.last]
|
192
|
+
else
|
193
|
+
[parameters, nil]
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
class DummyRelation
|
198
|
+
include Relation::Relation
|
199
|
+
attr_reader :alias
|
200
|
+
def initialize(ali4s)
|
201
|
+
@alias = ali4s
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
135
205
|
end
|
136
206
|
end
|