piglet 0.2.5 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/.gitignore +5 -4
  2. data/Gemfile +10 -0
  3. data/Gemfile.lock +53 -0
  4. data/README.rdoc +74 -38
  5. data/Rakefile +10 -1
  6. data/lib/piglet.rb +5 -1
  7. data/lib/piglet/field/call_expression.rb +7 -2
  8. data/lib/piglet/field/direct_expression.rb +28 -0
  9. data/lib/piglet/field/field.rb +73 -3
  10. data/lib/piglet/field/infix_expression.rb +14 -9
  11. data/lib/piglet/field/map_value.rb +17 -0
  12. data/lib/piglet/field/prefix_expression.rb +6 -3
  13. data/lib/piglet/field/reference.rb +5 -7
  14. data/lib/piglet/field/rename.rb +7 -5
  15. data/lib/piglet/field/suffix_expression.rb +4 -2
  16. data/lib/piglet/field/udf_expression.rb +19 -2
  17. data/lib/piglet/inout/load.rb +2 -2
  18. data/lib/piglet/interpreter.rb +8 -18
  19. data/lib/piglet/relation/block_context.rb +41 -0
  20. data/lib/piglet/relation/cogroup.rb +2 -1
  21. data/lib/piglet/relation/cross.rb +2 -2
  22. data/lib/piglet/relation/distinct.rb +2 -2
  23. data/lib/piglet/relation/filter.rb +2 -2
  24. data/lib/piglet/relation/foreach.rb +2 -2
  25. data/lib/piglet/relation/group.rb +2 -2
  26. data/lib/piglet/relation/join.rb +2 -1
  27. data/lib/piglet/relation/limit.rb +2 -2
  28. data/lib/piglet/relation/nested_foreach.rb +60 -0
  29. data/lib/piglet/relation/order.rb +4 -2
  30. data/lib/piglet/relation/relation.rb +43 -32
  31. data/lib/piglet/relation/sample.rb +2 -2
  32. data/lib/piglet/relation/split.rb +5 -5
  33. data/lib/piglet/relation/stream.rb +2 -1
  34. data/lib/piglet/relation/union.rb +2 -2
  35. data/piglet.gemspec +126 -0
  36. data/spec/piglet/field/field_spec.rb +7 -2
  37. data/spec/piglet/interpreter_spec.rb +6 -6
  38. data/spec/piglet/relation/relation_spec.rb +7 -4
  39. data/spec/piglet/relation/split_spec.rb +3 -1
  40. data/spec/piglet/relation/union_spec.rb +5 -7
  41. data/spec/piglet_spec.rb +76 -31
  42. data/spec/spec_helper.rb +9 -0
  43. data/tasks/gem.rake +16 -19
  44. data/tasks/rdoc.rake +1 -3
  45. metadata +34 -11
  46. data/TODO +0 -2
@@ -15,24 +15,29 @@ module Piglet
15
15
  else
16
16
  @type = determine_type(@left_expression, @right_expression)
17
17
  end
18
+ @predecessors = [left_expression, right_expression]
18
19
  end
19
20
 
20
21
  def simple?
21
22
  false
22
23
  end
23
24
 
24
- def to_s
25
- left = @left_expression
26
- right = @right_expression
25
+ def to_s(inner=false)
26
+ if inner
27
+ left = @left_expression.field_alias
28
+ right = @right_expression.field_alias
29
+ else
30
+ left = @left_expression
31
+ right = @right_expression
27
32
 
28
- if left.respond_to?(:operator) && left.operator != @operator
29
- left = parenthesise(left)
30
- end
33
+ if left.respond_to?(:operator) && left.operator != @operator
34
+ left = parenthesise(left)
35
+ end
31
36
 
32
- if right.respond_to?(:operator) && right.operator != @operator
33
- right = parenthesise(right)
37
+ if right.respond_to?(:operator) && right.operator != @operator
38
+ right = parenthesise(right)
39
+ end
34
40
  end
35
-
36
41
  "#{left} #{@operator} #{right}"
37
42
  end
38
43
 
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ module Piglet
3
+ module Field
4
+ class MapValue
5
+ include Field
6
+
7
+ def initialize(key, parent)
8
+ @key, @predecessors = key, [parent]
9
+ end
10
+
11
+ def to_s(inner=false)
12
+ expr = if inner then @predecessors.first.field_alias else @predecessors.first end
13
+ "#{expr}##{@key}"
14
+ end
15
+ end
16
+ end
17
+ end
@@ -11,17 +11,20 @@ module Piglet
11
11
  options ||= {}
12
12
  @operator, @expression, @space_between = operator, expression, space_between
13
13
  @type = options[:type] || expression.type
14
+ @predecessors = [expression]
14
15
  end
15
16
 
16
17
  def simple?
17
18
  false
18
19
  end
19
20
 
20
- def to_s
21
+ def to_s(inner=false)
22
+ expr = if inner then @expression.field_alias else @expression end
23
+
21
24
  if @space_between
22
- "#{@operator} #{parenthesise(@expression)}"
25
+ "#{@operator} #{parenthesise(expr)}"
23
26
  else
24
- "#{@operator}#{parenthesise(@expression)}"
27
+ "#{@operator}#{parenthesise(expr)}"
25
28
  end
26
29
  end
27
30
  end
@@ -10,16 +10,13 @@ module Piglet
10
10
  @name, @parent = name, relation
11
11
  @explicit_ancestry = options[:explicit_ancestry] || false
12
12
  @type = options[:type]
13
+ @predecessors = [relation] unless relation.nil?
13
14
  end
14
15
 
15
16
  def simple?
16
17
  true
17
18
  end
18
-
19
- def field(name)
20
- Reference.new(name, self, :explicit_ancestry => true)
21
- end
22
-
19
+
23
20
  def method_missing(name, *args)
24
21
  if name.to_s =~ /^\w+$/ && args.empty?
25
22
  field(name)
@@ -32,12 +29,13 @@ module Piglet
32
29
  field("\$#{n}")
33
30
  end
34
31
 
35
- def to_s
32
+ def to_s(inner=false)
36
33
  if @explicit_ancestry
37
34
  if @parent.respond_to?(:alias)
38
35
  "#{@parent.alias}.#{@name.to_s}"
39
36
  else
40
- "#{@parent}.#{@name.to_s}"
37
+ expr = if inner then @parent.field_alias else @parent end
38
+ "#{expr}.#{@name.to_s}"
41
39
  end
42
40
  else
43
41
  @name.to_s
@@ -3,14 +3,16 @@
3
3
  module Piglet
4
4
  module Field
5
5
  class Rename # :nodoc:
6
- attr_reader :name, :type
6
+ attr_reader :name, :type, :predecessors
7
7
 
8
- def initialize(new_name, field_expression)
9
- @name, @field_expression, @type = new_name, field_expression, field_expression.type
8
+ def initialize(name, field_expression)
9
+ @name, @field_expression, @type = name, field_expression, field_expression.type
10
+ @predecessors = [field_expression]
10
11
  end
11
12
 
12
- def to_s
13
- "#{@field_expression} AS #{@name}"
13
+ def to_s(inner=false)
14
+ expr = if inner then @field_expression.field_alias else @field_expression end
15
+ "#{expr} AS #{@name}"
14
16
  end
15
17
  end
16
18
  end
@@ -9,14 +9,16 @@ module Piglet
9
9
  options ||= {}
10
10
  @operator, @expression = operator, expression
11
11
  @type = options[:type] || expression.type
12
+ @predecessors = [expression]
12
13
  end
13
14
 
14
15
  def simple?
15
16
  false
16
17
  end
17
18
 
18
- def to_s
19
- "#{parenthesise(@expression)} #{@operator}"
19
+ def to_s(inner=false)
20
+ expr = if inner then @expression.field_alias else @expression end
21
+ "#{parenthesise(expr)} #{@operator}"
20
22
  end
21
23
  end
22
24
  end
@@ -5,10 +5,15 @@ module Piglet
5
5
 
6
6
  def initialize(ali4s, *args)
7
7
  @alias, @args = ali4s, args
8
+ @predecessors = args.select { |arg| arg.respond_to? :field_alias }
8
9
  end
9
10
 
10
- def to_s
11
- "#{@alias}(#{args_to_s(@args)})"
11
+ def to_s(inner=false)
12
+ if inner
13
+ "#{@alias}(#{args_to_inner_s(@args)})"
14
+ else
15
+ "#{@alias}(#{args_to_s(@args)})"
16
+ end
12
17
  end
13
18
 
14
19
  private
@@ -23,6 +28,18 @@ module Piglet
23
28
  arg
24
29
  end
25
30
  end
31
+
32
+ def args_to_inner_s(arg)
33
+ if arg.is_a? String
34
+ "'#{escape(arg)}'"
35
+ elsif arg.is_a? Enumerable
36
+ arg.map { |a| args_to_inner_s(a) }.join(", ")
37
+ elsif arg.respond_to? :field_alias
38
+ arg.field_alias
39
+ else
40
+ arg.to_s
41
+ end
42
+ end
26
43
  end
27
44
  end
28
45
  end
@@ -6,9 +6,9 @@ module Piglet
6
6
  include Piglet::Relation::Relation
7
7
  include StorageTypes
8
8
 
9
- def initialize(path, options={})
9
+ def initialize(path, interpreter, options={})
10
10
  options ||= {}
11
- @path, @using, @schema = path, options[:using], options[:schema]
11
+ @path, @interpreter, @using, @schema = path, interpreter, options[:using], options[:schema]
12
12
  end
13
13
 
14
14
  def schema
@@ -38,7 +38,13 @@ module Piglet
38
38
 
39
39
  statements.flatten.map { |s| s.to_s }.join(";\n") + ";\n"
40
40
  end
41
-
41
+
42
+ def next_relation_alias
43
+ @counter ||= 0
44
+ @counter += 1
45
+ "relation_#{@counter}"
46
+ end
47
+
42
48
  protected
43
49
 
44
50
  # LOAD
@@ -55,7 +61,7 @@ module Piglet
55
61
  # NOTE: the syntax load('path', :schema => {:a => :chararray, :b => :int})
56
62
  # would be nice, but the order of the keys can't be guaranteed in Ruby 1.8.
57
63
  def load(path, options={})
58
- Inout::Load.new(path, options)
64
+ Inout::Load.new(path, self, options)
59
65
  end
60
66
 
61
67
  # STORE
@@ -151,22 +157,6 @@ module Piglet
151
157
  @top_level_statements << Param::Default.new(name, value, options)
152
158
  end
153
159
 
154
- # Support for binary conditions, a.k.a. the ternary operator.
155
- #
156
- # x.test(x.a > x.b, x.a, x.b) # => (a > b ? a : b)
157
- #
158
- # Should only be used in the block given to #filter and #foreach
159
- def test(test, if_true, if_false)
160
- Field::BinaryConditional.new(test, if_true, if_false)
161
- end
162
-
163
- # Support for literals in FOREACH … GENERATE blocks.
164
- #
165
- # x.foreach { |r| [literal("hello").as(:hello)] } # => FOREACH x GENERATE 'hello' AS hello
166
- def literal(obj)
167
- Field::Literal.new(obj)
168
- end
169
-
170
160
  private
171
161
 
172
162
  def assignments(relation, ignore_set)
@@ -0,0 +1,41 @@
1
+ # encoding: utf-8
2
+
3
+ module Piglet
4
+ module Relation
5
+ class BlockContext
6
+ def initialize(relation, interpreter)
7
+ @relation, @interpreter = relation, interpreter
8
+ end
9
+
10
+ # Support for literals in FOREACH … GENERATE blocks.
11
+ #
12
+ # x.foreach { |r| [literal("hello").as(:hello)] } # => FOREACH x GENERATE 'hello' AS hello
13
+ def literal(obj)
14
+ Field::Literal.new(obj)
15
+ end
16
+
17
+ # Support for binary conditions, a.k.a. the ternary operator.
18
+ #
19
+ # x.test(x.a > x.b, x.a, x.b) # => (a > b ? a : b)
20
+ #
21
+ # Should only be used in the block given to #filter and #foreach
22
+ def test(test, if_true, if_false)
23
+ Field::BinaryConditional.new(test, if_true, if_false)
24
+ end
25
+
26
+ def [](n)
27
+ @relation.field("\$#{n}")
28
+ end
29
+
30
+ def method_missing(name, *args)
31
+ if args.size == 0
32
+ @relation.method_missing(name, *args)
33
+ elsif @interpreter.respond_to?(name)
34
+ @interpreter.send(name, *args)
35
+ else
36
+ super
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -5,7 +5,8 @@ module Piglet
5
5
  class Cogroup # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, description)
8
+ def initialize(relation, interpreter, description)
9
+ @interpreter = interpreter
9
10
  @join_fields = description.reject { |k, v| ! (k.is_a?(Relation)) }
10
11
  @sources = @join_fields.keys
11
12
  @parallel = description[:parallel]
@@ -5,9 +5,9 @@ module Piglet
5
5
  class Cross # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relations, options={})
8
+ def initialize(relations, interpreter, options={})
9
9
  options ||= {}
10
- @sources, @parallel = relations, options[:parallel]
10
+ @sources, @interpreter, @parallel = relations, interpreter, options[:parallel]
11
11
  end
12
12
 
13
13
  def schema
@@ -5,9 +5,9 @@ module Piglet
5
5
  class Distinct # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, options={})
8
+ def initialize(relation, interpreter, options={})
9
9
  options ||= {}
10
- @sources, @parallel = [relation], options[:parallel]
10
+ @sources, @interpreter, @parallel = [relation], interpreter, options[:parallel]
11
11
  end
12
12
 
13
13
  def to_s
@@ -5,8 +5,8 @@ module Piglet
5
5
  class Filter # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, expression)
9
- @sources, @expression = [relation], expression
8
+ def initialize(relation, interpreter, expression)
9
+ @sources, @interpreter, @expression = [relation], interpreter, expression
10
10
  end
11
11
 
12
12
  def to_s
@@ -5,8 +5,8 @@ module Piglet
5
5
  class Foreach # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, field_expressions)
9
- @sources, @field_expressions = [relation], [field_expressions].flatten
8
+ def initialize(relation, interpreter, field_expressions)
9
+ @sources, @interpreter, @field_expressions = [relation], interpreter, [field_expressions].flatten
10
10
  end
11
11
 
12
12
  def schema
@@ -5,9 +5,9 @@ module Piglet
5
5
  class Group # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, grouping, options={})
8
+ def initialize(relation, interpreter, grouping, options={})
9
9
  options ||= {}
10
- @sources, @grouping, @parallel = [relation], grouping, options[:parallel]
10
+ @sources, @interpreter, @grouping, @parallel = [relation], interpreter, grouping, options[:parallel]
11
11
  end
12
12
 
13
13
  def schema
@@ -5,7 +5,8 @@ module Piglet
5
5
  class Join # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, description)
8
+ def initialize(relation, interpreter, description)
9
+ @interpreter = interpreter
9
10
  @join_fields = Hash[*description.select { |k, v| k.is_a?(Relation) }.flatten]
10
11
  @sources = @join_fields.keys
11
12
  @using = description[:using]
@@ -5,8 +5,8 @@ module Piglet
5
5
  class Limit # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, n)
9
- @sources, @n = [relation], n
8
+ def initialize(relation, interpreter, n)
9
+ @sources, @interpreter, @n = [relation], interpreter, n
10
10
  end
11
11
 
12
12
  def to_s
@@ -0,0 +1,60 @@
1
+ # encoding: utf-8
2
+
3
+ module Piglet
4
+ module Relation
5
+ class NestedForeach
6
+ include Relation
7
+
8
+ def initialize(relation, interpreter, expressions)
9
+ @sources, @interpreter, @expressions = [relation], interpreter, expressions
10
+ end
11
+
12
+ def schema
13
+ description = @field_expressions.map { |expr| [expr.name, expr.type] }
14
+ Piglet::Schema::Tuple.parse(description)
15
+ end
16
+
17
+ def to_s
18
+ block_assignments = block_expressions.map do |expression|
19
+ "\t#{expression.field_alias} = #{expression.to_s(true)};\n"
20
+ end
21
+
22
+ generate_fields = @expressions.map do |expression|
23
+ if expression.respond_to?(:field_alias)
24
+ expression.field_alias
25
+ else
26
+ expression.to_s(true)
27
+ end
28
+ end
29
+
30
+ str = "FOREACH #{@sources.first.alias} {\n"
31
+ str << block_assignments.join
32
+ str << "\tGENERATE " + generate_fields.join(', ') + ";\n"
33
+ str << "}"
34
+ end
35
+
36
+ private
37
+
38
+ def block_expressions
39
+ handled = Set.new
40
+ handled.add(@relation)
41
+ intermediates = @expressions.map { |expression| intermediates(expression, handled) }.flatten
42
+ end
43
+
44
+ def intermediates(expression, handled)
45
+ result = []
46
+ unless handled.member?(expression)
47
+ if expression.is_a? Field::Field or expression.is_a? Field::Rename
48
+ expression.predecessors.each { |predecessor| result += intermediates(predecessor, handled) }
49
+ handled.add(expression)
50
+ end
51
+
52
+ if expression.is_a?(Field::Field) && ! expression.is_a?(Field::Rename)
53
+ result << expression
54
+ end
55
+ end
56
+ result
57
+ end
58
+ end
59
+ end
60
+ end