piglet 0.2.5 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -4
- data/Gemfile +10 -0
- data/Gemfile.lock +53 -0
- data/README.rdoc +74 -38
- data/Rakefile +10 -1
- data/lib/piglet.rb +5 -1
- data/lib/piglet/field/call_expression.rb +7 -2
- data/lib/piglet/field/direct_expression.rb +28 -0
- data/lib/piglet/field/field.rb +73 -3
- data/lib/piglet/field/infix_expression.rb +14 -9
- data/lib/piglet/field/map_value.rb +17 -0
- data/lib/piglet/field/prefix_expression.rb +6 -3
- data/lib/piglet/field/reference.rb +5 -7
- data/lib/piglet/field/rename.rb +7 -5
- data/lib/piglet/field/suffix_expression.rb +4 -2
- data/lib/piglet/field/udf_expression.rb +19 -2
- data/lib/piglet/inout/load.rb +2 -2
- data/lib/piglet/interpreter.rb +8 -18
- data/lib/piglet/relation/block_context.rb +41 -0
- data/lib/piglet/relation/cogroup.rb +2 -1
- data/lib/piglet/relation/cross.rb +2 -2
- data/lib/piglet/relation/distinct.rb +2 -2
- data/lib/piglet/relation/filter.rb +2 -2
- data/lib/piglet/relation/foreach.rb +2 -2
- data/lib/piglet/relation/group.rb +2 -2
- data/lib/piglet/relation/join.rb +2 -1
- data/lib/piglet/relation/limit.rb +2 -2
- data/lib/piglet/relation/nested_foreach.rb +60 -0
- data/lib/piglet/relation/order.rb +4 -2
- data/lib/piglet/relation/relation.rb +43 -32
- data/lib/piglet/relation/sample.rb +2 -2
- data/lib/piglet/relation/split.rb +5 -5
- data/lib/piglet/relation/stream.rb +2 -1
- data/lib/piglet/relation/union.rb +2 -2
- data/piglet.gemspec +126 -0
- data/spec/piglet/field/field_spec.rb +7 -2
- data/spec/piglet/interpreter_spec.rb +6 -6
- data/spec/piglet/relation/relation_spec.rb +7 -4
- data/spec/piglet/relation/split_spec.rb +3 -1
- data/spec/piglet/relation/union_spec.rb +5 -7
- data/spec/piglet_spec.rb +76 -31
- data/spec/spec_helper.rb +9 -0
- data/tasks/gem.rake +16 -19
- data/tasks/rdoc.rake +1 -3
- metadata +34 -11
- data/TODO +0 -2
@@ -15,24 +15,29 @@ module Piglet
|
|
15
15
|
else
|
16
16
|
@type = determine_type(@left_expression, @right_expression)
|
17
17
|
end
|
18
|
+
@predecessors = [left_expression, right_expression]
|
18
19
|
end
|
19
20
|
|
20
21
|
def simple?
|
21
22
|
false
|
22
23
|
end
|
23
24
|
|
24
|
-
def to_s
|
25
|
-
|
26
|
-
|
25
|
+
def to_s(inner=false)
|
26
|
+
if inner
|
27
|
+
left = @left_expression.field_alias
|
28
|
+
right = @right_expression.field_alias
|
29
|
+
else
|
30
|
+
left = @left_expression
|
31
|
+
right = @right_expression
|
27
32
|
|
28
|
-
|
29
|
-
|
30
|
-
|
33
|
+
if left.respond_to?(:operator) && left.operator != @operator
|
34
|
+
left = parenthesise(left)
|
35
|
+
end
|
31
36
|
|
32
|
-
|
33
|
-
|
37
|
+
if right.respond_to?(:operator) && right.operator != @operator
|
38
|
+
right = parenthesise(right)
|
39
|
+
end
|
34
40
|
end
|
35
|
-
|
36
41
|
"#{left} #{@operator} #{right}"
|
37
42
|
end
|
38
43
|
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Piglet
|
3
|
+
module Field
|
4
|
+
class MapValue
|
5
|
+
include Field
|
6
|
+
|
7
|
+
def initialize(key, parent)
|
8
|
+
@key, @predecessors = key, [parent]
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_s(inner=false)
|
12
|
+
expr = if inner then @predecessors.first.field_alias else @predecessors.first end
|
13
|
+
"#{expr}##{@key}"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -11,17 +11,20 @@ module Piglet
|
|
11
11
|
options ||= {}
|
12
12
|
@operator, @expression, @space_between = operator, expression, space_between
|
13
13
|
@type = options[:type] || expression.type
|
14
|
+
@predecessors = [expression]
|
14
15
|
end
|
15
16
|
|
16
17
|
def simple?
|
17
18
|
false
|
18
19
|
end
|
19
20
|
|
20
|
-
def to_s
|
21
|
+
def to_s(inner=false)
|
22
|
+
expr = if inner then @expression.field_alias else @expression end
|
23
|
+
|
21
24
|
if @space_between
|
22
|
-
"#{@operator} #{parenthesise(
|
25
|
+
"#{@operator} #{parenthesise(expr)}"
|
23
26
|
else
|
24
|
-
"#{@operator}#{parenthesise(
|
27
|
+
"#{@operator}#{parenthesise(expr)}"
|
25
28
|
end
|
26
29
|
end
|
27
30
|
end
|
@@ -10,16 +10,13 @@ module Piglet
|
|
10
10
|
@name, @parent = name, relation
|
11
11
|
@explicit_ancestry = options[:explicit_ancestry] || false
|
12
12
|
@type = options[:type]
|
13
|
+
@predecessors = [relation] unless relation.nil?
|
13
14
|
end
|
14
15
|
|
15
16
|
def simple?
|
16
17
|
true
|
17
18
|
end
|
18
|
-
|
19
|
-
def field(name)
|
20
|
-
Reference.new(name, self, :explicit_ancestry => true)
|
21
|
-
end
|
22
|
-
|
19
|
+
|
23
20
|
def method_missing(name, *args)
|
24
21
|
if name.to_s =~ /^\w+$/ && args.empty?
|
25
22
|
field(name)
|
@@ -32,12 +29,13 @@ module Piglet
|
|
32
29
|
field("\$#{n}")
|
33
30
|
end
|
34
31
|
|
35
|
-
def to_s
|
32
|
+
def to_s(inner=false)
|
36
33
|
if @explicit_ancestry
|
37
34
|
if @parent.respond_to?(:alias)
|
38
35
|
"#{@parent.alias}.#{@name.to_s}"
|
39
36
|
else
|
40
|
-
|
37
|
+
expr = if inner then @parent.field_alias else @parent end
|
38
|
+
"#{expr}.#{@name.to_s}"
|
41
39
|
end
|
42
40
|
else
|
43
41
|
@name.to_s
|
data/lib/piglet/field/rename.rb
CHANGED
@@ -3,14 +3,16 @@
|
|
3
3
|
module Piglet
|
4
4
|
module Field
|
5
5
|
class Rename # :nodoc:
|
6
|
-
attr_reader :name, :type
|
6
|
+
attr_reader :name, :type, :predecessors
|
7
7
|
|
8
|
-
def initialize(
|
9
|
-
@name, @field_expression, @type =
|
8
|
+
def initialize(name, field_expression)
|
9
|
+
@name, @field_expression, @type = name, field_expression, field_expression.type
|
10
|
+
@predecessors = [field_expression]
|
10
11
|
end
|
11
12
|
|
12
|
-
def to_s
|
13
|
-
|
13
|
+
def to_s(inner=false)
|
14
|
+
expr = if inner then @field_expression.field_alias else @field_expression end
|
15
|
+
"#{expr} AS #{@name}"
|
14
16
|
end
|
15
17
|
end
|
16
18
|
end
|
@@ -9,14 +9,16 @@ module Piglet
|
|
9
9
|
options ||= {}
|
10
10
|
@operator, @expression = operator, expression
|
11
11
|
@type = options[:type] || expression.type
|
12
|
+
@predecessors = [expression]
|
12
13
|
end
|
13
14
|
|
14
15
|
def simple?
|
15
16
|
false
|
16
17
|
end
|
17
18
|
|
18
|
-
def to_s
|
19
|
-
|
19
|
+
def to_s(inner=false)
|
20
|
+
expr = if inner then @expression.field_alias else @expression end
|
21
|
+
"#{parenthesise(expr)} #{@operator}"
|
20
22
|
end
|
21
23
|
end
|
22
24
|
end
|
@@ -5,10 +5,15 @@ module Piglet
|
|
5
5
|
|
6
6
|
def initialize(ali4s, *args)
|
7
7
|
@alias, @args = ali4s, args
|
8
|
+
@predecessors = args.select { |arg| arg.respond_to? :field_alias }
|
8
9
|
end
|
9
10
|
|
10
|
-
def to_s
|
11
|
-
|
11
|
+
def to_s(inner=false)
|
12
|
+
if inner
|
13
|
+
"#{@alias}(#{args_to_inner_s(@args)})"
|
14
|
+
else
|
15
|
+
"#{@alias}(#{args_to_s(@args)})"
|
16
|
+
end
|
12
17
|
end
|
13
18
|
|
14
19
|
private
|
@@ -23,6 +28,18 @@ module Piglet
|
|
23
28
|
arg
|
24
29
|
end
|
25
30
|
end
|
31
|
+
|
32
|
+
def args_to_inner_s(arg)
|
33
|
+
if arg.is_a? String
|
34
|
+
"'#{escape(arg)}'"
|
35
|
+
elsif arg.is_a? Enumerable
|
36
|
+
arg.map { |a| args_to_inner_s(a) }.join(", ")
|
37
|
+
elsif arg.respond_to? :field_alias
|
38
|
+
arg.field_alias
|
39
|
+
else
|
40
|
+
arg.to_s
|
41
|
+
end
|
42
|
+
end
|
26
43
|
end
|
27
44
|
end
|
28
45
|
end
|
data/lib/piglet/inout/load.rb
CHANGED
@@ -6,9 +6,9 @@ module Piglet
|
|
6
6
|
include Piglet::Relation::Relation
|
7
7
|
include StorageTypes
|
8
8
|
|
9
|
-
def initialize(path, options={})
|
9
|
+
def initialize(path, interpreter, options={})
|
10
10
|
options ||= {}
|
11
|
-
@path, @using, @schema = path, options[:using], options[:schema]
|
11
|
+
@path, @interpreter, @using, @schema = path, interpreter, options[:using], options[:schema]
|
12
12
|
end
|
13
13
|
|
14
14
|
def schema
|
data/lib/piglet/interpreter.rb
CHANGED
@@ -38,7 +38,13 @@ module Piglet
|
|
38
38
|
|
39
39
|
statements.flatten.map { |s| s.to_s }.join(";\n") + ";\n"
|
40
40
|
end
|
41
|
-
|
41
|
+
|
42
|
+
def next_relation_alias
|
43
|
+
@counter ||= 0
|
44
|
+
@counter += 1
|
45
|
+
"relation_#{@counter}"
|
46
|
+
end
|
47
|
+
|
42
48
|
protected
|
43
49
|
|
44
50
|
# LOAD
|
@@ -55,7 +61,7 @@ module Piglet
|
|
55
61
|
# NOTE: the syntax load('path', :schema => {:a => :chararray, :b => :int})
|
56
62
|
# would be nice, but the order of the keys can't be guaranteed in Ruby 1.8.
|
57
63
|
def load(path, options={})
|
58
|
-
Inout::Load.new(path, options)
|
64
|
+
Inout::Load.new(path, self, options)
|
59
65
|
end
|
60
66
|
|
61
67
|
# STORE
|
@@ -151,22 +157,6 @@ module Piglet
|
|
151
157
|
@top_level_statements << Param::Default.new(name, value, options)
|
152
158
|
end
|
153
159
|
|
154
|
-
# Support for binary conditions, a.k.a. the ternary operator.
|
155
|
-
#
|
156
|
-
# x.test(x.a > x.b, x.a, x.b) # => (a > b ? a : b)
|
157
|
-
#
|
158
|
-
# Should only be used in the block given to #filter and #foreach
|
159
|
-
def test(test, if_true, if_false)
|
160
|
-
Field::BinaryConditional.new(test, if_true, if_false)
|
161
|
-
end
|
162
|
-
|
163
|
-
# Support for literals in FOREACH … GENERATE blocks.
|
164
|
-
#
|
165
|
-
# x.foreach { |r| [literal("hello").as(:hello)] } # => FOREACH x GENERATE 'hello' AS hello
|
166
|
-
def literal(obj)
|
167
|
-
Field::Literal.new(obj)
|
168
|
-
end
|
169
|
-
|
170
160
|
private
|
171
161
|
|
172
162
|
def assignments(relation, ignore_set)
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Piglet
|
4
|
+
module Relation
|
5
|
+
class BlockContext
|
6
|
+
def initialize(relation, interpreter)
|
7
|
+
@relation, @interpreter = relation, interpreter
|
8
|
+
end
|
9
|
+
|
10
|
+
# Support for literals in FOREACH … GENERATE blocks.
|
11
|
+
#
|
12
|
+
# x.foreach { |r| [literal("hello").as(:hello)] } # => FOREACH x GENERATE 'hello' AS hello
|
13
|
+
def literal(obj)
|
14
|
+
Field::Literal.new(obj)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Support for binary conditions, a.k.a. the ternary operator.
|
18
|
+
#
|
19
|
+
# x.test(x.a > x.b, x.a, x.b) # => (a > b ? a : b)
|
20
|
+
#
|
21
|
+
# Should only be used in the block given to #filter and #foreach
|
22
|
+
def test(test, if_true, if_false)
|
23
|
+
Field::BinaryConditional.new(test, if_true, if_false)
|
24
|
+
end
|
25
|
+
|
26
|
+
def [](n)
|
27
|
+
@relation.field("\$#{n}")
|
28
|
+
end
|
29
|
+
|
30
|
+
def method_missing(name, *args)
|
31
|
+
if args.size == 0
|
32
|
+
@relation.method_missing(name, *args)
|
33
|
+
elsif @interpreter.respond_to?(name)
|
34
|
+
@interpreter.send(name, *args)
|
35
|
+
else
|
36
|
+
super
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -5,7 +5,8 @@ module Piglet
|
|
5
5
|
class Cogroup # :nodoc:
|
6
6
|
include Relation
|
7
7
|
|
8
|
-
def initialize(relation, description)
|
8
|
+
def initialize(relation, interpreter, description)
|
9
|
+
@interpreter = interpreter
|
9
10
|
@join_fields = description.reject { |k, v| ! (k.is_a?(Relation)) }
|
10
11
|
@sources = @join_fields.keys
|
11
12
|
@parallel = description[:parallel]
|
@@ -5,9 +5,9 @@ module Piglet
|
|
5
5
|
class Cross # :nodoc:
|
6
6
|
include Relation
|
7
7
|
|
8
|
-
def initialize(relations, options={})
|
8
|
+
def initialize(relations, interpreter, options={})
|
9
9
|
options ||= {}
|
10
|
-
@sources, @parallel = relations, options[:parallel]
|
10
|
+
@sources, @interpreter, @parallel = relations, interpreter, options[:parallel]
|
11
11
|
end
|
12
12
|
|
13
13
|
def schema
|
@@ -5,9 +5,9 @@ module Piglet
|
|
5
5
|
class Distinct # :nodoc:
|
6
6
|
include Relation
|
7
7
|
|
8
|
-
def initialize(relation, options={})
|
8
|
+
def initialize(relation, interpreter, options={})
|
9
9
|
options ||= {}
|
10
|
-
@sources, @parallel = [relation], options[:parallel]
|
10
|
+
@sources, @interpreter, @parallel = [relation], interpreter, options[:parallel]
|
11
11
|
end
|
12
12
|
|
13
13
|
def to_s
|
@@ -5,8 +5,8 @@ module Piglet
|
|
5
5
|
class Filter # :nodoc:
|
6
6
|
include Relation
|
7
7
|
|
8
|
-
def initialize(relation, expression)
|
9
|
-
@sources, @expression = [relation], expression
|
8
|
+
def initialize(relation, interpreter, expression)
|
9
|
+
@sources, @interpreter, @expression = [relation], interpreter, expression
|
10
10
|
end
|
11
11
|
|
12
12
|
def to_s
|
@@ -5,8 +5,8 @@ module Piglet
|
|
5
5
|
class Foreach # :nodoc:
|
6
6
|
include Relation
|
7
7
|
|
8
|
-
def initialize(relation, field_expressions)
|
9
|
-
@sources, @field_expressions = [relation], [field_expressions].flatten
|
8
|
+
def initialize(relation, interpreter, field_expressions)
|
9
|
+
@sources, @interpreter, @field_expressions = [relation], interpreter, [field_expressions].flatten
|
10
10
|
end
|
11
11
|
|
12
12
|
def schema
|
@@ -5,9 +5,9 @@ module Piglet
|
|
5
5
|
class Group # :nodoc:
|
6
6
|
include Relation
|
7
7
|
|
8
|
-
def initialize(relation, grouping, options={})
|
8
|
+
def initialize(relation, interpreter, grouping, options={})
|
9
9
|
options ||= {}
|
10
|
-
@sources, @grouping, @parallel = [relation], grouping, options[:parallel]
|
10
|
+
@sources, @interpreter, @grouping, @parallel = [relation], interpreter, grouping, options[:parallel]
|
11
11
|
end
|
12
12
|
|
13
13
|
def schema
|
data/lib/piglet/relation/join.rb
CHANGED
@@ -5,7 +5,8 @@ module Piglet
|
|
5
5
|
class Join # :nodoc:
|
6
6
|
include Relation
|
7
7
|
|
8
|
-
def initialize(relation, description)
|
8
|
+
def initialize(relation, interpreter, description)
|
9
|
+
@interpreter = interpreter
|
9
10
|
@join_fields = Hash[*description.select { |k, v| k.is_a?(Relation) }.flatten]
|
10
11
|
@sources = @join_fields.keys
|
11
12
|
@using = description[:using]
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Piglet
|
4
|
+
module Relation
|
5
|
+
class NestedForeach
|
6
|
+
include Relation
|
7
|
+
|
8
|
+
def initialize(relation, interpreter, expressions)
|
9
|
+
@sources, @interpreter, @expressions = [relation], interpreter, expressions
|
10
|
+
end
|
11
|
+
|
12
|
+
def schema
|
13
|
+
description = @field_expressions.map { |expr| [expr.name, expr.type] }
|
14
|
+
Piglet::Schema::Tuple.parse(description)
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_s
|
18
|
+
block_assignments = block_expressions.map do |expression|
|
19
|
+
"\t#{expression.field_alias} = #{expression.to_s(true)};\n"
|
20
|
+
end
|
21
|
+
|
22
|
+
generate_fields = @expressions.map do |expression|
|
23
|
+
if expression.respond_to?(:field_alias)
|
24
|
+
expression.field_alias
|
25
|
+
else
|
26
|
+
expression.to_s(true)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
str = "FOREACH #{@sources.first.alias} {\n"
|
31
|
+
str << block_assignments.join
|
32
|
+
str << "\tGENERATE " + generate_fields.join(', ') + ";\n"
|
33
|
+
str << "}"
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def block_expressions
|
39
|
+
handled = Set.new
|
40
|
+
handled.add(@relation)
|
41
|
+
intermediates = @expressions.map { |expression| intermediates(expression, handled) }.flatten
|
42
|
+
end
|
43
|
+
|
44
|
+
def intermediates(expression, handled)
|
45
|
+
result = []
|
46
|
+
unless handled.member?(expression)
|
47
|
+
if expression.is_a? Field::Field or expression.is_a? Field::Rename
|
48
|
+
expression.predecessors.each { |predecessor| result += intermediates(predecessor, handled) }
|
49
|
+
handled.add(expression)
|
50
|
+
end
|
51
|
+
|
52
|
+
if expression.is_a?(Field::Field) && ! expression.is_a?(Field::Rename)
|
53
|
+
result << expression
|
54
|
+
end
|
55
|
+
end
|
56
|
+
result
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|