piglet 0.2.5 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/.gitignore +5 -4
  2. data/Gemfile +10 -0
  3. data/Gemfile.lock +53 -0
  4. data/README.rdoc +74 -38
  5. data/Rakefile +10 -1
  6. data/lib/piglet.rb +5 -1
  7. data/lib/piglet/field/call_expression.rb +7 -2
  8. data/lib/piglet/field/direct_expression.rb +28 -0
  9. data/lib/piglet/field/field.rb +73 -3
  10. data/lib/piglet/field/infix_expression.rb +14 -9
  11. data/lib/piglet/field/map_value.rb +17 -0
  12. data/lib/piglet/field/prefix_expression.rb +6 -3
  13. data/lib/piglet/field/reference.rb +5 -7
  14. data/lib/piglet/field/rename.rb +7 -5
  15. data/lib/piglet/field/suffix_expression.rb +4 -2
  16. data/lib/piglet/field/udf_expression.rb +19 -2
  17. data/lib/piglet/inout/load.rb +2 -2
  18. data/lib/piglet/interpreter.rb +8 -18
  19. data/lib/piglet/relation/block_context.rb +41 -0
  20. data/lib/piglet/relation/cogroup.rb +2 -1
  21. data/lib/piglet/relation/cross.rb +2 -2
  22. data/lib/piglet/relation/distinct.rb +2 -2
  23. data/lib/piglet/relation/filter.rb +2 -2
  24. data/lib/piglet/relation/foreach.rb +2 -2
  25. data/lib/piglet/relation/group.rb +2 -2
  26. data/lib/piglet/relation/join.rb +2 -1
  27. data/lib/piglet/relation/limit.rb +2 -2
  28. data/lib/piglet/relation/nested_foreach.rb +60 -0
  29. data/lib/piglet/relation/order.rb +4 -2
  30. data/lib/piglet/relation/relation.rb +43 -32
  31. data/lib/piglet/relation/sample.rb +2 -2
  32. data/lib/piglet/relation/split.rb +5 -5
  33. data/lib/piglet/relation/stream.rb +2 -1
  34. data/lib/piglet/relation/union.rb +2 -2
  35. data/piglet.gemspec +126 -0
  36. data/spec/piglet/field/field_spec.rb +7 -2
  37. data/spec/piglet/interpreter_spec.rb +6 -6
  38. data/spec/piglet/relation/relation_spec.rb +7 -4
  39. data/spec/piglet/relation/split_spec.rb +3 -1
  40. data/spec/piglet/relation/union_spec.rb +5 -7
  41. data/spec/piglet_spec.rb +76 -31
  42. data/spec/spec_helper.rb +9 -0
  43. data/tasks/gem.rake +16 -19
  44. data/tasks/rdoc.rake +1 -3
  45. metadata +34 -11
  46. data/TODO +0 -2
@@ -15,24 +15,29 @@ module Piglet
15
15
  else
16
16
  @type = determine_type(@left_expression, @right_expression)
17
17
  end
18
+ @predecessors = [left_expression, right_expression]
18
19
  end
19
20
 
20
21
  def simple?
21
22
  false
22
23
  end
23
24
 
24
- def to_s
25
- left = @left_expression
26
- right = @right_expression
25
+ def to_s(inner=false)
26
+ if inner
27
+ left = @left_expression.field_alias
28
+ right = @right_expression.field_alias
29
+ else
30
+ left = @left_expression
31
+ right = @right_expression
27
32
 
28
- if left.respond_to?(:operator) && left.operator != @operator
29
- left = parenthesise(left)
30
- end
33
+ if left.respond_to?(:operator) && left.operator != @operator
34
+ left = parenthesise(left)
35
+ end
31
36
 
32
- if right.respond_to?(:operator) && right.operator != @operator
33
- right = parenthesise(right)
37
+ if right.respond_to?(:operator) && right.operator != @operator
38
+ right = parenthesise(right)
39
+ end
34
40
  end
35
-
36
41
  "#{left} #{@operator} #{right}"
37
42
  end
38
43
 
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ module Piglet
3
+ module Field
4
+ class MapValue
5
+ include Field
6
+
7
+ def initialize(key, parent)
8
+ @key, @predecessors = key, [parent]
9
+ end
10
+
11
+ def to_s(inner=false)
12
+ expr = if inner then @predecessors.first.field_alias else @predecessors.first end
13
+ "#{expr}##{@key}"
14
+ end
15
+ end
16
+ end
17
+ end
@@ -11,17 +11,20 @@ module Piglet
11
11
  options ||= {}
12
12
  @operator, @expression, @space_between = operator, expression, space_between
13
13
  @type = options[:type] || expression.type
14
+ @predecessors = [expression]
14
15
  end
15
16
 
16
17
  def simple?
17
18
  false
18
19
  end
19
20
 
20
- def to_s
21
+ def to_s(inner=false)
22
+ expr = if inner then @expression.field_alias else @expression end
23
+
21
24
  if @space_between
22
- "#{@operator} #{parenthesise(@expression)}"
25
+ "#{@operator} #{parenthesise(expr)}"
23
26
  else
24
- "#{@operator}#{parenthesise(@expression)}"
27
+ "#{@operator}#{parenthesise(expr)}"
25
28
  end
26
29
  end
27
30
  end
@@ -10,16 +10,13 @@ module Piglet
10
10
  @name, @parent = name, relation
11
11
  @explicit_ancestry = options[:explicit_ancestry] || false
12
12
  @type = options[:type]
13
+ @predecessors = [relation] unless relation.nil?
13
14
  end
14
15
 
15
16
  def simple?
16
17
  true
17
18
  end
18
-
19
- def field(name)
20
- Reference.new(name, self, :explicit_ancestry => true)
21
- end
22
-
19
+
23
20
  def method_missing(name, *args)
24
21
  if name.to_s =~ /^\w+$/ && args.empty?
25
22
  field(name)
@@ -32,12 +29,13 @@ module Piglet
32
29
  field("\$#{n}")
33
30
  end
34
31
 
35
- def to_s
32
+ def to_s(inner=false)
36
33
  if @explicit_ancestry
37
34
  if @parent.respond_to?(:alias)
38
35
  "#{@parent.alias}.#{@name.to_s}"
39
36
  else
40
- "#{@parent}.#{@name.to_s}"
37
+ expr = if inner then @parent.field_alias else @parent end
38
+ "#{expr}.#{@name.to_s}"
41
39
  end
42
40
  else
43
41
  @name.to_s
@@ -3,14 +3,16 @@
3
3
  module Piglet
4
4
  module Field
5
5
  class Rename # :nodoc:
6
- attr_reader :name, :type
6
+ attr_reader :name, :type, :predecessors
7
7
 
8
- def initialize(new_name, field_expression)
9
- @name, @field_expression, @type = new_name, field_expression, field_expression.type
8
+ def initialize(name, field_expression)
9
+ @name, @field_expression, @type = name, field_expression, field_expression.type
10
+ @predecessors = [field_expression]
10
11
  end
11
12
 
12
- def to_s
13
- "#{@field_expression} AS #{@name}"
13
+ def to_s(inner=false)
14
+ expr = if inner then @field_expression.field_alias else @field_expression end
15
+ "#{expr} AS #{@name}"
14
16
  end
15
17
  end
16
18
  end
@@ -9,14 +9,16 @@ module Piglet
9
9
  options ||= {}
10
10
  @operator, @expression = operator, expression
11
11
  @type = options[:type] || expression.type
12
+ @predecessors = [expression]
12
13
  end
13
14
 
14
15
  def simple?
15
16
  false
16
17
  end
17
18
 
18
- def to_s
19
- "#{parenthesise(@expression)} #{@operator}"
19
+ def to_s(inner=false)
20
+ expr = if inner then @expression.field_alias else @expression end
21
+ "#{parenthesise(expr)} #{@operator}"
20
22
  end
21
23
  end
22
24
  end
@@ -5,10 +5,15 @@ module Piglet
5
5
 
6
6
  def initialize(ali4s, *args)
7
7
  @alias, @args = ali4s, args
8
+ @predecessors = args.select { |arg| arg.respond_to? :field_alias }
8
9
  end
9
10
 
10
- def to_s
11
- "#{@alias}(#{args_to_s(@args)})"
11
+ def to_s(inner=false)
12
+ if inner
13
+ "#{@alias}(#{args_to_inner_s(@args)})"
14
+ else
15
+ "#{@alias}(#{args_to_s(@args)})"
16
+ end
12
17
  end
13
18
 
14
19
  private
@@ -23,6 +28,18 @@ module Piglet
23
28
  arg
24
29
  end
25
30
  end
31
+
32
+ def args_to_inner_s(arg)
33
+ if arg.is_a? String
34
+ "'#{escape(arg)}'"
35
+ elsif arg.is_a? Enumerable
36
+ arg.map { |a| args_to_inner_s(a) }.join(", ")
37
+ elsif arg.respond_to? :field_alias
38
+ arg.field_alias
39
+ else
40
+ arg.to_s
41
+ end
42
+ end
26
43
  end
27
44
  end
28
45
  end
@@ -6,9 +6,9 @@ module Piglet
6
6
  include Piglet::Relation::Relation
7
7
  include StorageTypes
8
8
 
9
- def initialize(path, options={})
9
+ def initialize(path, interpreter, options={})
10
10
  options ||= {}
11
- @path, @using, @schema = path, options[:using], options[:schema]
11
+ @path, @interpreter, @using, @schema = path, interpreter, options[:using], options[:schema]
12
12
  end
13
13
 
14
14
  def schema
@@ -38,7 +38,13 @@ module Piglet
38
38
 
39
39
  statements.flatten.map { |s| s.to_s }.join(";\n") + ";\n"
40
40
  end
41
-
41
+
42
+ def next_relation_alias
43
+ @counter ||= 0
44
+ @counter += 1
45
+ "relation_#{@counter}"
46
+ end
47
+
42
48
  protected
43
49
 
44
50
  # LOAD
@@ -55,7 +61,7 @@ module Piglet
55
61
  # NOTE: the syntax load('path', :schema => {:a => :chararray, :b => :int})
56
62
  # would be nice, but the order of the keys can't be guaranteed in Ruby 1.8.
57
63
  def load(path, options={})
58
- Inout::Load.new(path, options)
64
+ Inout::Load.new(path, self, options)
59
65
  end
60
66
 
61
67
  # STORE
@@ -151,22 +157,6 @@ module Piglet
151
157
  @top_level_statements << Param::Default.new(name, value, options)
152
158
  end
153
159
 
154
- # Support for binary conditions, a.k.a. the ternary operator.
155
- #
156
- # x.test(x.a > x.b, x.a, x.b) # => (a > b ? a : b)
157
- #
158
- # Should only be used in the block given to #filter and #foreach
159
- def test(test, if_true, if_false)
160
- Field::BinaryConditional.new(test, if_true, if_false)
161
- end
162
-
163
- # Support for literals in FOREACH … GENERATE blocks.
164
- #
165
- # x.foreach { |r| [literal("hello").as(:hello)] } # => FOREACH x GENERATE 'hello' AS hello
166
- def literal(obj)
167
- Field::Literal.new(obj)
168
- end
169
-
170
160
  private
171
161
 
172
162
  def assignments(relation, ignore_set)
@@ -0,0 +1,41 @@
1
+ # encoding: utf-8
2
+
3
+ module Piglet
4
+ module Relation
5
+ class BlockContext
6
+ def initialize(relation, interpreter)
7
+ @relation, @interpreter = relation, interpreter
8
+ end
9
+
10
+ # Support for literals in FOREACH … GENERATE blocks.
11
+ #
12
+ # x.foreach { |r| [literal("hello").as(:hello)] } # => FOREACH x GENERATE 'hello' AS hello
13
+ def literal(obj)
14
+ Field::Literal.new(obj)
15
+ end
16
+
17
+ # Support for binary conditions, a.k.a. the ternary operator.
18
+ #
19
+ # x.test(x.a > x.b, x.a, x.b) # => (a > b ? a : b)
20
+ #
21
+ # Should only be used in the block given to #filter and #foreach
22
+ def test(test, if_true, if_false)
23
+ Field::BinaryConditional.new(test, if_true, if_false)
24
+ end
25
+
26
+ def [](n)
27
+ @relation.field("\$#{n}")
28
+ end
29
+
30
+ def method_missing(name, *args)
31
+ if args.size == 0
32
+ @relation.method_missing(name, *args)
33
+ elsif @interpreter.respond_to?(name)
34
+ @interpreter.send(name, *args)
35
+ else
36
+ super
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -5,7 +5,8 @@ module Piglet
5
5
  class Cogroup # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, description)
8
+ def initialize(relation, interpreter, description)
9
+ @interpreter = interpreter
9
10
  @join_fields = description.reject { |k, v| ! (k.is_a?(Relation)) }
10
11
  @sources = @join_fields.keys
11
12
  @parallel = description[:parallel]
@@ -5,9 +5,9 @@ module Piglet
5
5
  class Cross # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relations, options={})
8
+ def initialize(relations, interpreter, options={})
9
9
  options ||= {}
10
- @sources, @parallel = relations, options[:parallel]
10
+ @sources, @interpreter, @parallel = relations, interpreter, options[:parallel]
11
11
  end
12
12
 
13
13
  def schema
@@ -5,9 +5,9 @@ module Piglet
5
5
  class Distinct # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, options={})
8
+ def initialize(relation, interpreter, options={})
9
9
  options ||= {}
10
- @sources, @parallel = [relation], options[:parallel]
10
+ @sources, @interpreter, @parallel = [relation], interpreter, options[:parallel]
11
11
  end
12
12
 
13
13
  def to_s
@@ -5,8 +5,8 @@ module Piglet
5
5
  class Filter # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, expression)
9
- @sources, @expression = [relation], expression
8
+ def initialize(relation, interpreter, expression)
9
+ @sources, @interpreter, @expression = [relation], interpreter, expression
10
10
  end
11
11
 
12
12
  def to_s
@@ -5,8 +5,8 @@ module Piglet
5
5
  class Foreach # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, field_expressions)
9
- @sources, @field_expressions = [relation], [field_expressions].flatten
8
+ def initialize(relation, interpreter, field_expressions)
9
+ @sources, @interpreter, @field_expressions = [relation], interpreter, [field_expressions].flatten
10
10
  end
11
11
 
12
12
  def schema
@@ -5,9 +5,9 @@ module Piglet
5
5
  class Group # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, grouping, options={})
8
+ def initialize(relation, interpreter, grouping, options={})
9
9
  options ||= {}
10
- @sources, @grouping, @parallel = [relation], grouping, options[:parallel]
10
+ @sources, @interpreter, @grouping, @parallel = [relation], interpreter, grouping, options[:parallel]
11
11
  end
12
12
 
13
13
  def schema
@@ -5,7 +5,8 @@ module Piglet
5
5
  class Join # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, description)
8
+ def initialize(relation, interpreter, description)
9
+ @interpreter = interpreter
9
10
  @join_fields = Hash[*description.select { |k, v| k.is_a?(Relation) }.flatten]
10
11
  @sources = @join_fields.keys
11
12
  @using = description[:using]
@@ -5,8 +5,8 @@ module Piglet
5
5
  class Limit # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, n)
9
- @sources, @n = [relation], n
8
+ def initialize(relation, interpreter, n)
9
+ @sources, @interpreter, @n = [relation], interpreter, n
10
10
  end
11
11
 
12
12
  def to_s
@@ -0,0 +1,60 @@
1
+ # encoding: utf-8
2
+
3
+ module Piglet
4
+ module Relation
5
+ class NestedForeach
6
+ include Relation
7
+
8
+ def initialize(relation, interpreter, expressions)
9
+ @sources, @interpreter, @expressions = [relation], interpreter, expressions
10
+ end
11
+
12
+ def schema
13
+ description = @field_expressions.map { |expr| [expr.name, expr.type] }
14
+ Piglet::Schema::Tuple.parse(description)
15
+ end
16
+
17
+ def to_s
18
+ block_assignments = block_expressions.map do |expression|
19
+ "\t#{expression.field_alias} = #{expression.to_s(true)};\n"
20
+ end
21
+
22
+ generate_fields = @expressions.map do |expression|
23
+ if expression.respond_to?(:field_alias)
24
+ expression.field_alias
25
+ else
26
+ expression.to_s(true)
27
+ end
28
+ end
29
+
30
+ str = "FOREACH #{@sources.first.alias} {\n"
31
+ str << block_assignments.join
32
+ str << "\tGENERATE " + generate_fields.join(', ') + ";\n"
33
+ str << "}"
34
+ end
35
+
36
+ private
37
+
38
+ def block_expressions
39
+ handled = Set.new
40
+ handled.add(@relation)
41
+ intermediates = @expressions.map { |expression| intermediates(expression, handled) }.flatten
42
+ end
43
+
44
+ def intermediates(expression, handled)
45
+ result = []
46
+ unless handled.member?(expression)
47
+ if expression.is_a? Field::Field or expression.is_a? Field::Rename
48
+ expression.predecessors.each { |predecessor| result += intermediates(predecessor, handled) }
49
+ handled.add(expression)
50
+ end
51
+
52
+ if expression.is_a?(Field::Field) && ! expression.is_a?(Field::Rename)
53
+ result << expression
54
+ end
55
+ end
56
+ result
57
+ end
58
+ end
59
+ end
60
+ end