piglet 0.2.5 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -4
- data/Gemfile +10 -0
- data/Gemfile.lock +53 -0
- data/README.rdoc +74 -38
- data/Rakefile +10 -1
- data/lib/piglet.rb +5 -1
- data/lib/piglet/field/call_expression.rb +7 -2
- data/lib/piglet/field/direct_expression.rb +28 -0
- data/lib/piglet/field/field.rb +73 -3
- data/lib/piglet/field/infix_expression.rb +14 -9
- data/lib/piglet/field/map_value.rb +17 -0
- data/lib/piglet/field/prefix_expression.rb +6 -3
- data/lib/piglet/field/reference.rb +5 -7
- data/lib/piglet/field/rename.rb +7 -5
- data/lib/piglet/field/suffix_expression.rb +4 -2
- data/lib/piglet/field/udf_expression.rb +19 -2
- data/lib/piglet/inout/load.rb +2 -2
- data/lib/piglet/interpreter.rb +8 -18
- data/lib/piglet/relation/block_context.rb +41 -0
- data/lib/piglet/relation/cogroup.rb +2 -1
- data/lib/piglet/relation/cross.rb +2 -2
- data/lib/piglet/relation/distinct.rb +2 -2
- data/lib/piglet/relation/filter.rb +2 -2
- data/lib/piglet/relation/foreach.rb +2 -2
- data/lib/piglet/relation/group.rb +2 -2
- data/lib/piglet/relation/join.rb +2 -1
- data/lib/piglet/relation/limit.rb +2 -2
- data/lib/piglet/relation/nested_foreach.rb +60 -0
- data/lib/piglet/relation/order.rb +4 -2
- data/lib/piglet/relation/relation.rb +43 -32
- data/lib/piglet/relation/sample.rb +2 -2
- data/lib/piglet/relation/split.rb +5 -5
- data/lib/piglet/relation/stream.rb +2 -1
- data/lib/piglet/relation/union.rb +2 -2
- data/piglet.gemspec +126 -0
- data/spec/piglet/field/field_spec.rb +7 -2
- data/spec/piglet/interpreter_spec.rb +6 -6
- data/spec/piglet/relation/relation_spec.rb +7 -4
- data/spec/piglet/relation/split_spec.rb +3 -1
- data/spec/piglet/relation/union_spec.rb +5 -7
- data/spec/piglet_spec.rb +76 -31
- data/spec/spec_helper.rb +9 -0
- data/tasks/gem.rake +16 -19
- data/tasks/rdoc.rake +1 -3
- metadata +34 -11
- data/TODO +0 -2
@@ -15,24 +15,29 @@ module Piglet
|
|
15
15
|
else
|
16
16
|
@type = determine_type(@left_expression, @right_expression)
|
17
17
|
end
|
18
|
+
@predecessors = [left_expression, right_expression]
|
18
19
|
end
|
19
20
|
|
20
21
|
def simple?
|
21
22
|
false
|
22
23
|
end
|
23
24
|
|
24
|
-
def to_s
|
25
|
-
|
26
|
-
|
25
|
+
def to_s(inner=false)
|
26
|
+
if inner
|
27
|
+
left = @left_expression.field_alias
|
28
|
+
right = @right_expression.field_alias
|
29
|
+
else
|
30
|
+
left = @left_expression
|
31
|
+
right = @right_expression
|
27
32
|
|
28
|
-
|
29
|
-
|
30
|
-
|
33
|
+
if left.respond_to?(:operator) && left.operator != @operator
|
34
|
+
left = parenthesise(left)
|
35
|
+
end
|
31
36
|
|
32
|
-
|
33
|
-
|
37
|
+
if right.respond_to?(:operator) && right.operator != @operator
|
38
|
+
right = parenthesise(right)
|
39
|
+
end
|
34
40
|
end
|
35
|
-
|
36
41
|
"#{left} #{@operator} #{right}"
|
37
42
|
end
|
38
43
|
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Piglet
|
3
|
+
module Field
|
4
|
+
class MapValue
|
5
|
+
include Field
|
6
|
+
|
7
|
+
def initialize(key, parent)
|
8
|
+
@key, @predecessors = key, [parent]
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_s(inner=false)
|
12
|
+
expr = if inner then @predecessors.first.field_alias else @predecessors.first end
|
13
|
+
"#{expr}##{@key}"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -11,17 +11,20 @@ module Piglet
|
|
11
11
|
options ||= {}
|
12
12
|
@operator, @expression, @space_between = operator, expression, space_between
|
13
13
|
@type = options[:type] || expression.type
|
14
|
+
@predecessors = [expression]
|
14
15
|
end
|
15
16
|
|
16
17
|
def simple?
|
17
18
|
false
|
18
19
|
end
|
19
20
|
|
20
|
-
def to_s
|
21
|
+
def to_s(inner=false)
|
22
|
+
expr = if inner then @expression.field_alias else @expression end
|
23
|
+
|
21
24
|
if @space_between
|
22
|
-
"#{@operator} #{parenthesise(
|
25
|
+
"#{@operator} #{parenthesise(expr)}"
|
23
26
|
else
|
24
|
-
"#{@operator}#{parenthesise(
|
27
|
+
"#{@operator}#{parenthesise(expr)}"
|
25
28
|
end
|
26
29
|
end
|
27
30
|
end
|
@@ -10,16 +10,13 @@ module Piglet
|
|
10
10
|
@name, @parent = name, relation
|
11
11
|
@explicit_ancestry = options[:explicit_ancestry] || false
|
12
12
|
@type = options[:type]
|
13
|
+
@predecessors = [relation] unless relation.nil?
|
13
14
|
end
|
14
15
|
|
15
16
|
def simple?
|
16
17
|
true
|
17
18
|
end
|
18
|
-
|
19
|
-
def field(name)
|
20
|
-
Reference.new(name, self, :explicit_ancestry => true)
|
21
|
-
end
|
22
|
-
|
19
|
+
|
23
20
|
def method_missing(name, *args)
|
24
21
|
if name.to_s =~ /^\w+$/ && args.empty?
|
25
22
|
field(name)
|
@@ -32,12 +29,13 @@ module Piglet
|
|
32
29
|
field("\$#{n}")
|
33
30
|
end
|
34
31
|
|
35
|
-
def to_s
|
32
|
+
def to_s(inner=false)
|
36
33
|
if @explicit_ancestry
|
37
34
|
if @parent.respond_to?(:alias)
|
38
35
|
"#{@parent.alias}.#{@name.to_s}"
|
39
36
|
else
|
40
|
-
|
37
|
+
expr = if inner then @parent.field_alias else @parent end
|
38
|
+
"#{expr}.#{@name.to_s}"
|
41
39
|
end
|
42
40
|
else
|
43
41
|
@name.to_s
|
data/lib/piglet/field/rename.rb
CHANGED
@@ -3,14 +3,16 @@
|
|
3
3
|
module Piglet
|
4
4
|
module Field
|
5
5
|
class Rename # :nodoc:
|
6
|
-
attr_reader :name, :type
|
6
|
+
attr_reader :name, :type, :predecessors
|
7
7
|
|
8
|
-
def initialize(
|
9
|
-
@name, @field_expression, @type =
|
8
|
+
def initialize(name, field_expression)
|
9
|
+
@name, @field_expression, @type = name, field_expression, field_expression.type
|
10
|
+
@predecessors = [field_expression]
|
10
11
|
end
|
11
12
|
|
12
|
-
def to_s
|
13
|
-
|
13
|
+
def to_s(inner=false)
|
14
|
+
expr = if inner then @field_expression.field_alias else @field_expression end
|
15
|
+
"#{expr} AS #{@name}"
|
14
16
|
end
|
15
17
|
end
|
16
18
|
end
|
@@ -9,14 +9,16 @@ module Piglet
|
|
9
9
|
options ||= {}
|
10
10
|
@operator, @expression = operator, expression
|
11
11
|
@type = options[:type] || expression.type
|
12
|
+
@predecessors = [expression]
|
12
13
|
end
|
13
14
|
|
14
15
|
def simple?
|
15
16
|
false
|
16
17
|
end
|
17
18
|
|
18
|
-
def to_s
|
19
|
-
|
19
|
+
def to_s(inner=false)
|
20
|
+
expr = if inner then @expression.field_alias else @expression end
|
21
|
+
"#{parenthesise(expr)} #{@operator}"
|
20
22
|
end
|
21
23
|
end
|
22
24
|
end
|
@@ -5,10 +5,15 @@ module Piglet
|
|
5
5
|
|
6
6
|
def initialize(ali4s, *args)
|
7
7
|
@alias, @args = ali4s, args
|
8
|
+
@predecessors = args.select { |arg| arg.respond_to? :field_alias }
|
8
9
|
end
|
9
10
|
|
10
|
-
def to_s
|
11
|
-
|
11
|
+
def to_s(inner=false)
|
12
|
+
if inner
|
13
|
+
"#{@alias}(#{args_to_inner_s(@args)})"
|
14
|
+
else
|
15
|
+
"#{@alias}(#{args_to_s(@args)})"
|
16
|
+
end
|
12
17
|
end
|
13
18
|
|
14
19
|
private
|
@@ -23,6 +28,18 @@ module Piglet
|
|
23
28
|
arg
|
24
29
|
end
|
25
30
|
end
|
31
|
+
|
32
|
+
def args_to_inner_s(arg)
|
33
|
+
if arg.is_a? String
|
34
|
+
"'#{escape(arg)}'"
|
35
|
+
elsif arg.is_a? Enumerable
|
36
|
+
arg.map { |a| args_to_inner_s(a) }.join(", ")
|
37
|
+
elsif arg.respond_to? :field_alias
|
38
|
+
arg.field_alias
|
39
|
+
else
|
40
|
+
arg.to_s
|
41
|
+
end
|
42
|
+
end
|
26
43
|
end
|
27
44
|
end
|
28
45
|
end
|
data/lib/piglet/inout/load.rb
CHANGED
@@ -6,9 +6,9 @@ module Piglet
|
|
6
6
|
include Piglet::Relation::Relation
|
7
7
|
include StorageTypes
|
8
8
|
|
9
|
-
def initialize(path, options={})
|
9
|
+
def initialize(path, interpreter, options={})
|
10
10
|
options ||= {}
|
11
|
-
@path, @using, @schema = path, options[:using], options[:schema]
|
11
|
+
@path, @interpreter, @using, @schema = path, interpreter, options[:using], options[:schema]
|
12
12
|
end
|
13
13
|
|
14
14
|
def schema
|
data/lib/piglet/interpreter.rb
CHANGED
@@ -38,7 +38,13 @@ module Piglet
|
|
38
38
|
|
39
39
|
statements.flatten.map { |s| s.to_s }.join(";\n") + ";\n"
|
40
40
|
end
|
41
|
-
|
41
|
+
|
42
|
+
def next_relation_alias
|
43
|
+
@counter ||= 0
|
44
|
+
@counter += 1
|
45
|
+
"relation_#{@counter}"
|
46
|
+
end
|
47
|
+
|
42
48
|
protected
|
43
49
|
|
44
50
|
# LOAD
|
@@ -55,7 +61,7 @@ module Piglet
|
|
55
61
|
# NOTE: the syntax load('path', :schema => {:a => :chararray, :b => :int})
|
56
62
|
# would be nice, but the order of the keys can't be guaranteed in Ruby 1.8.
|
57
63
|
def load(path, options={})
|
58
|
-
Inout::Load.new(path, options)
|
64
|
+
Inout::Load.new(path, self, options)
|
59
65
|
end
|
60
66
|
|
61
67
|
# STORE
|
@@ -151,22 +157,6 @@ module Piglet
|
|
151
157
|
@top_level_statements << Param::Default.new(name, value, options)
|
152
158
|
end
|
153
159
|
|
154
|
-
# Support for binary conditions, a.k.a. the ternary operator.
|
155
|
-
#
|
156
|
-
# x.test(x.a > x.b, x.a, x.b) # => (a > b ? a : b)
|
157
|
-
#
|
158
|
-
# Should only be used in the block given to #filter and #foreach
|
159
|
-
def test(test, if_true, if_false)
|
160
|
-
Field::BinaryConditional.new(test, if_true, if_false)
|
161
|
-
end
|
162
|
-
|
163
|
-
# Support for literals in FOREACH … GENERATE blocks.
|
164
|
-
#
|
165
|
-
# x.foreach { |r| [literal("hello").as(:hello)] } # => FOREACH x GENERATE 'hello' AS hello
|
166
|
-
def literal(obj)
|
167
|
-
Field::Literal.new(obj)
|
168
|
-
end
|
169
|
-
|
170
160
|
private
|
171
161
|
|
172
162
|
def assignments(relation, ignore_set)
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Piglet
|
4
|
+
module Relation
|
5
|
+
class BlockContext
|
6
|
+
def initialize(relation, interpreter)
|
7
|
+
@relation, @interpreter = relation, interpreter
|
8
|
+
end
|
9
|
+
|
10
|
+
# Support for literals in FOREACH … GENERATE blocks.
|
11
|
+
#
|
12
|
+
# x.foreach { |r| [literal("hello").as(:hello)] } # => FOREACH x GENERATE 'hello' AS hello
|
13
|
+
def literal(obj)
|
14
|
+
Field::Literal.new(obj)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Support for binary conditions, a.k.a. the ternary operator.
|
18
|
+
#
|
19
|
+
# x.test(x.a > x.b, x.a, x.b) # => (a > b ? a : b)
|
20
|
+
#
|
21
|
+
# Should only be used in the block given to #filter and #foreach
|
22
|
+
def test(test, if_true, if_false)
|
23
|
+
Field::BinaryConditional.new(test, if_true, if_false)
|
24
|
+
end
|
25
|
+
|
26
|
+
def [](n)
|
27
|
+
@relation.field("\$#{n}")
|
28
|
+
end
|
29
|
+
|
30
|
+
def method_missing(name, *args)
|
31
|
+
if args.size == 0
|
32
|
+
@relation.method_missing(name, *args)
|
33
|
+
elsif @interpreter.respond_to?(name)
|
34
|
+
@interpreter.send(name, *args)
|
35
|
+
else
|
36
|
+
super
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -5,7 +5,8 @@ module Piglet
|
|
5
5
|
class Cogroup # :nodoc:
|
6
6
|
include Relation
|
7
7
|
|
8
|
-
def initialize(relation, description)
|
8
|
+
def initialize(relation, interpreter, description)
|
9
|
+
@interpreter = interpreter
|
9
10
|
@join_fields = description.reject { |k, v| ! (k.is_a?(Relation)) }
|
10
11
|
@sources = @join_fields.keys
|
11
12
|
@parallel = description[:parallel]
|
@@ -5,9 +5,9 @@ module Piglet
|
|
5
5
|
class Cross # :nodoc:
|
6
6
|
include Relation
|
7
7
|
|
8
|
-
def initialize(relations, options={})
|
8
|
+
def initialize(relations, interpreter, options={})
|
9
9
|
options ||= {}
|
10
|
-
@sources, @parallel = relations, options[:parallel]
|
10
|
+
@sources, @interpreter, @parallel = relations, interpreter, options[:parallel]
|
11
11
|
end
|
12
12
|
|
13
13
|
def schema
|
@@ -5,9 +5,9 @@ module Piglet
|
|
5
5
|
class Distinct # :nodoc:
|
6
6
|
include Relation
|
7
7
|
|
8
|
-
def initialize(relation, options={})
|
8
|
+
def initialize(relation, interpreter, options={})
|
9
9
|
options ||= {}
|
10
|
-
@sources, @parallel = [relation], options[:parallel]
|
10
|
+
@sources, @interpreter, @parallel = [relation], interpreter, options[:parallel]
|
11
11
|
end
|
12
12
|
|
13
13
|
def to_s
|
@@ -5,8 +5,8 @@ module Piglet
|
|
5
5
|
class Filter # :nodoc:
|
6
6
|
include Relation
|
7
7
|
|
8
|
-
def initialize(relation, expression)
|
9
|
-
@sources, @expression = [relation], expression
|
8
|
+
def initialize(relation, interpreter, expression)
|
9
|
+
@sources, @interpreter, @expression = [relation], interpreter, expression
|
10
10
|
end
|
11
11
|
|
12
12
|
def to_s
|
@@ -5,8 +5,8 @@ module Piglet
|
|
5
5
|
class Foreach # :nodoc:
|
6
6
|
include Relation
|
7
7
|
|
8
|
-
def initialize(relation, field_expressions)
|
9
|
-
@sources, @field_expressions = [relation], [field_expressions].flatten
|
8
|
+
def initialize(relation, interpreter, field_expressions)
|
9
|
+
@sources, @interpreter, @field_expressions = [relation], interpreter, [field_expressions].flatten
|
10
10
|
end
|
11
11
|
|
12
12
|
def schema
|
@@ -5,9 +5,9 @@ module Piglet
|
|
5
5
|
class Group # :nodoc:
|
6
6
|
include Relation
|
7
7
|
|
8
|
-
def initialize(relation, grouping, options={})
|
8
|
+
def initialize(relation, interpreter, grouping, options={})
|
9
9
|
options ||= {}
|
10
|
-
@sources, @grouping, @parallel = [relation], grouping, options[:parallel]
|
10
|
+
@sources, @interpreter, @grouping, @parallel = [relation], interpreter, grouping, options[:parallel]
|
11
11
|
end
|
12
12
|
|
13
13
|
def schema
|
data/lib/piglet/relation/join.rb
CHANGED
@@ -5,7 +5,8 @@ module Piglet
|
|
5
5
|
class Join # :nodoc:
|
6
6
|
include Relation
|
7
7
|
|
8
|
-
def initialize(relation, description)
|
8
|
+
def initialize(relation, interpreter, description)
|
9
|
+
@interpreter = interpreter
|
9
10
|
@join_fields = Hash[*description.select { |k, v| k.is_a?(Relation) }.flatten]
|
10
11
|
@sources = @join_fields.keys
|
11
12
|
@using = description[:using]
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Piglet
|
4
|
+
module Relation
|
5
|
+
class NestedForeach
|
6
|
+
include Relation
|
7
|
+
|
8
|
+
def initialize(relation, interpreter, expressions)
|
9
|
+
@sources, @interpreter, @expressions = [relation], interpreter, expressions
|
10
|
+
end
|
11
|
+
|
12
|
+
def schema
|
13
|
+
description = @field_expressions.map { |expr| [expr.name, expr.type] }
|
14
|
+
Piglet::Schema::Tuple.parse(description)
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_s
|
18
|
+
block_assignments = block_expressions.map do |expression|
|
19
|
+
"\t#{expression.field_alias} = #{expression.to_s(true)};\n"
|
20
|
+
end
|
21
|
+
|
22
|
+
generate_fields = @expressions.map do |expression|
|
23
|
+
if expression.respond_to?(:field_alias)
|
24
|
+
expression.field_alias
|
25
|
+
else
|
26
|
+
expression.to_s(true)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
str = "FOREACH #{@sources.first.alias} {\n"
|
31
|
+
str << block_assignments.join
|
32
|
+
str << "\tGENERATE " + generate_fields.join(', ') + ";\n"
|
33
|
+
str << "}"
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def block_expressions
|
39
|
+
handled = Set.new
|
40
|
+
handled.add(@relation)
|
41
|
+
intermediates = @expressions.map { |expression| intermediates(expression, handled) }.flatten
|
42
|
+
end
|
43
|
+
|
44
|
+
def intermediates(expression, handled)
|
45
|
+
result = []
|
46
|
+
unless handled.member?(expression)
|
47
|
+
if expression.is_a? Field::Field or expression.is_a? Field::Rename
|
48
|
+
expression.predecessors.each { |predecessor| result += intermediates(predecessor, handled) }
|
49
|
+
handled.add(expression)
|
50
|
+
end
|
51
|
+
|
52
|
+
if expression.is_a?(Field::Field) && ! expression.is_a?(Field::Rename)
|
53
|
+
result << expression
|
54
|
+
end
|
55
|
+
end
|
56
|
+
result
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|