piglet 0.2.5 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/.gitignore +5 -4
  2. data/Gemfile +10 -0
  3. data/Gemfile.lock +53 -0
  4. data/README.rdoc +74 -38
  5. data/Rakefile +10 -1
  6. data/lib/piglet.rb +5 -1
  7. data/lib/piglet/field/call_expression.rb +7 -2
  8. data/lib/piglet/field/direct_expression.rb +28 -0
  9. data/lib/piglet/field/field.rb +73 -3
  10. data/lib/piglet/field/infix_expression.rb +14 -9
  11. data/lib/piglet/field/map_value.rb +17 -0
  12. data/lib/piglet/field/prefix_expression.rb +6 -3
  13. data/lib/piglet/field/reference.rb +5 -7
  14. data/lib/piglet/field/rename.rb +7 -5
  15. data/lib/piglet/field/suffix_expression.rb +4 -2
  16. data/lib/piglet/field/udf_expression.rb +19 -2
  17. data/lib/piglet/inout/load.rb +2 -2
  18. data/lib/piglet/interpreter.rb +8 -18
  19. data/lib/piglet/relation/block_context.rb +41 -0
  20. data/lib/piglet/relation/cogroup.rb +2 -1
  21. data/lib/piglet/relation/cross.rb +2 -2
  22. data/lib/piglet/relation/distinct.rb +2 -2
  23. data/lib/piglet/relation/filter.rb +2 -2
  24. data/lib/piglet/relation/foreach.rb +2 -2
  25. data/lib/piglet/relation/group.rb +2 -2
  26. data/lib/piglet/relation/join.rb +2 -1
  27. data/lib/piglet/relation/limit.rb +2 -2
  28. data/lib/piglet/relation/nested_foreach.rb +60 -0
  29. data/lib/piglet/relation/order.rb +4 -2
  30. data/lib/piglet/relation/relation.rb +43 -32
  31. data/lib/piglet/relation/sample.rb +2 -2
  32. data/lib/piglet/relation/split.rb +5 -5
  33. data/lib/piglet/relation/stream.rb +2 -1
  34. data/lib/piglet/relation/union.rb +2 -2
  35. data/piglet.gemspec +126 -0
  36. data/spec/piglet/field/field_spec.rb +7 -2
  37. data/spec/piglet/interpreter_spec.rb +6 -6
  38. data/spec/piglet/relation/relation_spec.rb +7 -4
  39. data/spec/piglet/relation/split_spec.rb +3 -1
  40. data/spec/piglet/relation/union_spec.rb +5 -7
  41. data/spec/piglet_spec.rb +76 -31
  42. data/spec/spec_helper.rb +9 -0
  43. data/tasks/gem.rake +16 -19
  44. data/tasks/rdoc.rake +1 -3
  45. metadata +34 -11
  46. data/TODO +0 -2
@@ -5,14 +5,16 @@ module Piglet
5
5
  class Order # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, fields, options)
8
+ def initialize(relation, interpreter, fields, options)
9
9
  options ||= {}
10
+ @interpreter = interpreter
10
11
  @sources, @parallel = [relation], options[:parallel]
11
12
  @fields = fields.is_a?(Enumerable) ? fields : [fields]
12
13
  end
13
14
 
14
15
  def to_s
15
- "ORDER #{@sources.first.alias} BY #{field_strings}"
16
+ target = @sources.first.respond_to?(:field_alias) ? @sources.first.field_alias : @sources.first.alias
17
+ "ORDER #{target} BY #{field_strings}"
16
18
  end
17
19
 
18
20
  private
@@ -8,7 +8,13 @@ module Piglet
8
8
  # The name this relation will get in Pig Latin. Then name is generated when
9
9
  # the relation is outputed by the interpreter, and will be unique.
10
10
  def alias
11
- @alias ||= Relation.next_alias
11
+ @alias ||= @interpreter.next_relation_alias
12
+ end
13
+
14
+ def next_field_alias
15
+ @field_counter ||= 0
16
+ @field_counter += 1
17
+ "#{self.alias}_field_#{@field_counter}"
12
18
  end
13
19
 
14
20
  # GROUP
@@ -18,7 +24,7 @@ module Piglet
18
24
  # x.group([:a, :b, :c], :parallel => 3) # => GROUP x BY (a, b, c) PARALLEL 3
19
25
  def group(*args)
20
26
  grouping, options = split_at_options(args)
21
- Group.new(self, [grouping].flatten, options)
27
+ Group.new(self, @interpreter, [grouping].flatten, options)
22
28
  end
23
29
 
24
30
  # DISTINCT
@@ -26,7 +32,7 @@ module Piglet
26
32
  # x.distinct # => DISTINCT x
27
33
  # x.distinct(:parallel => 5) # => DISTINCT x PARALLEL 5
28
34
  def distinct(options={})
29
- Distinct.new(self, options)
35
+ Distinct.new(self, @interpreter, options)
30
36
  end
31
37
 
32
38
  # COGROUP
@@ -37,7 +43,7 @@ module Piglet
37
43
  # x.cogroup(x => :a, y => [:b, :inner]) # => COGROUP x BY a, y BY b INNER
38
44
  # x.cogroup(x => :a, y => :b, :parallel => 5) # => COGROUP x BY a, y BY b PARALLEL 5
39
45
  def cogroup(description)
40
- Cogroup.new(self, description)
46
+ Cogroup.new(self, @interpreter, description)
41
47
  end
42
48
 
43
49
  # CROSS
@@ -47,29 +53,39 @@ module Piglet
47
53
  # x.cross([y, z], :parallel => 5) # => CROSS x, y, z, w PARALLEL 5
48
54
  def cross(*args)
49
55
  relations, options = split_at_options(args)
50
- Cross.new(([self] + relations).flatten, options)
56
+ Cross.new(([self] + relations).flatten, @interpreter, options)
51
57
  end
52
58
 
53
59
  # FILTER
54
60
  #
55
- # x.filter { |r| r.a == r.b } # => FILTER x BY a == b
56
- # x.filter { |r| r.a > r.b && r.c != 3 } # => FILTER x BY a > b AND c != 3
57
- def filter
58
- Filter.new(self, yield(self))
61
+ # x.filter { a == b } # => FILTER x BY a == b
62
+ # x.filter { a > b && c == 3 } # => FILTER x BY a > b AND c == 3
63
+ def filter(&block)
64
+ context = BlockContext.new(self, @interpreter)
65
+ Filter.new(self, @interpreter, context.instance_eval(&block))
59
66
  end
60
67
 
61
68
  # FOREACH ... GENERATE
62
69
  #
63
- # x.foreach { |r| r.a } # => FOREACH x GENERATE a
64
- # x.foreach { |r| [r.a, r.b] } # => FOREACH x GENERATE a, b
65
- # x.foreach { |r| r.a.max } # => FOREACH x GENERATE MAX(a)
66
- # x.foreach { |r| r.a.avg.as(:b) } # => FOREACH x GENERATE AVG(a) AS b
70
+ # x.foreach { a } # => FOREACH x GENERATE a
71
+ # x.foreach { [a, b] } # => FOREACH x GENERATE a, b
72
+ # x.foreach { a.max } # => FOREACH x GENERATE MAX(a)
73
+ # x.foreach { a.avg.as(:b) } # => FOREACH x GENERATE AVG(a) AS b
67
74
  #
68
- #--
75
+ # See #nested_foreach for FOREACH ... { ... GENERATE }
76
+ def foreach(&block)
77
+ context = BlockContext.new(self, @interpreter)
78
+ Foreach.new(self, @interpreter, context.instance_eval(&block))
79
+ end
80
+
81
+ # FOREACH ... { ... GENERATE }
82
+ #
83
+ # x.nested_foreach { [a.distinct] } # => FOREACH x { a1 = DISTINCT a; GENERATE a1 }
69
84
  #
70
- # TODO: FOREACH a { b GENERATE c }
71
- def foreach
72
- Foreach.new(self, yield(self))
85
+ # See #foreach for FOREACH ... GENERATE
86
+ def nested_foreach(&block)
87
+ context = BlockContext.new(self, @interpreter)
88
+ NestedForeach.new(self, @interpreter, context.instance_eval(&block))
73
89
  end
74
90
 
75
91
  # JOIN
@@ -79,14 +95,14 @@ module Piglet
79
95
  # x.join(x => :a, y => :b, :using => :replicated) # => JOIN x BY a, y BY b USING "replicated"
80
96
  # x.join(x => :a, y => :b, :parallel => 5) # => JOIN x BY a, y BY b PARALLEL 5
81
97
  def join(description)
82
- Join.new(self, description)
98
+ Join.new(self, @interpreter, description)
83
99
  end
84
100
 
85
101
  # LIMIT
86
102
  #
87
103
  # x.limit(10) # => LIMIT x 10
88
104
  def limit(n)
89
- Limit.new(self, n)
105
+ Limit.new(self, @interpreter, n)
90
106
  end
91
107
 
92
108
  # ORDER
@@ -103,21 +119,22 @@ module Piglet
103
119
  def order(*args)
104
120
  fields, options = split_at_options(args)
105
121
  fields = *fields
106
- Order.new(self, fields, options)
122
+ Order.new(self, @interpreter, fields, options)
107
123
  end
108
124
 
109
125
  # SAMPLE
110
126
  #
111
127
  # x.sample(5) # => SAMPLE x 5;
112
128
  def sample(n)
113
- Sample.new(self, n)
129
+ Sample.new(self, @interpreter, n)
114
130
  end
115
131
 
116
132
  # SPLIT
117
133
  #
118
- # y, z = x.split { |r| [r.a <= 3, r.b > 4]} # => SPLIT x INTO y IF a <= 3, z IF a > 4
119
- def split
120
- Split.new(self, yield(self)).shards
134
+ # y, z = x.split { [a <= 3, b > 4] } # => SPLIT x INTO y IF a <= 3, z IF a > 4
135
+ def split(&block)
136
+ context = BlockContext.new(self, @interpreter)
137
+ Split.new(self, @interpreter, context.instance_eval(&block)).shards
121
138
  end
122
139
 
123
140
  # STREAM
@@ -128,7 +145,7 @@ module Piglet
128
145
  # x.stream(:cmd, :schema => [%w(a int)]) # => STREAM x THROUGH cmd AS (a:int)
129
146
  def stream(*args)
130
147
  fields, options = split_at_options(args)
131
- Stream.new(self, fields, options)
148
+ Stream.new(self, @interpreter, fields, options)
132
149
  end
133
150
 
134
151
  # UNION
@@ -136,7 +153,7 @@ module Piglet
136
153
  # x.union(y) # => UNION x, y
137
154
  # x.union(y, z) # => UNION x, y, z
138
155
  def union(*relations)
139
- Union.new(*([self] + relations))
156
+ Union.new(([self] + relations).flatten, @interpreter)
140
157
  end
141
158
 
142
159
  def field(name)
@@ -183,12 +200,6 @@ module Piglet
183
200
  [parameters, nil]
184
201
  end
185
202
  end
186
-
187
- def self.next_alias
188
- @counter ||= 0
189
- @counter += 1
190
- "relation_#{@counter}"
191
- end
192
203
  end
193
204
  end
194
205
  end
@@ -5,8 +5,8 @@ module Piglet
5
5
  class Sample # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, n)
9
- @sources, @n = [relation], n
8
+ def initialize(relation, interpreter, n)
9
+ @sources, @interpreter, @n = [relation], interpreter, n
10
10
  end
11
11
 
12
12
  def to_s
@@ -5,8 +5,8 @@ module Piglet
5
5
  class Split # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, expressions)
9
- @sources, @expressions = [relation], expressions
8
+ def initialize(relation, interpreter, expressions)
9
+ @sources, @interpreter, @expressions = [relation], interpreter, expressions
10
10
  @shard_map = create_shards
11
11
  end
12
12
 
@@ -26,7 +26,7 @@ module Piglet
26
26
 
27
27
  def create_shards
28
28
  @expressions.inject({}) do |map, expr|
29
- map[expr] = RelationShard.new(self)
29
+ map[expr] = RelationShard.new(self, @interpreter)
30
30
  map
31
31
  end
32
32
  end
@@ -35,8 +35,8 @@ module Piglet
35
35
  class RelationShard # :nodoc:
36
36
  include Relation
37
37
 
38
- def initialize(split)
39
- @sources = [split]
38
+ def initialize(split, interpreter)
39
+ @sources, @interpreter = [split], interpreter
40
40
  end
41
41
 
42
42
  def to_s
@@ -5,7 +5,8 @@ module Piglet
5
5
  class Stream # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(source, args, options=nil)
8
+ def initialize(source, interpreter, args, options=nil)
9
+ @interpreter = interpreter
9
10
  options ||= {}
10
11
  @sources = [source]
11
12
  args.each do |arg|
@@ -5,8 +5,8 @@ module Piglet
5
5
  class Union # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(*relations)
9
- @sources = [relations].flatten
8
+ def initialize(relations, interpreter)
9
+ @sources, @interpreter = relations, interpreter
10
10
  end
11
11
 
12
12
  def to_s
@@ -0,0 +1,126 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{piglet}
8
+ s.version = "0.3.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Theo Hultberg"]
12
+ s.date = %q{2010-07-09}
13
+ s.default_executable = %q{piglet}
14
+ s.description = %q{Piglet aims to look like Pig Latin while allowing for things like loops and control of flow that are missing from Pig.}
15
+ s.email = %q{theo@iconara.net}
16
+ s.executables = ["piglet"]
17
+ s.extra_rdoc_files = [
18
+ "LICENSE",
19
+ "README.rdoc"
20
+ ]
21
+ s.files = [
22
+ ".document",
23
+ ".gitignore",
24
+ "Gemfile",
25
+ "Gemfile.lock",
26
+ "LICENSE",
27
+ "README.rdoc",
28
+ "Rakefile",
29
+ "bin/piglet",
30
+ "lib/piglet.rb",
31
+ "lib/piglet/field/binary_conditional.rb",
32
+ "lib/piglet/field/call_expression.rb",
33
+ "lib/piglet/field/direct_expression.rb",
34
+ "lib/piglet/field/field.rb",
35
+ "lib/piglet/field/infix_expression.rb",
36
+ "lib/piglet/field/literal.rb",
37
+ "lib/piglet/field/map_value.rb",
38
+ "lib/piglet/field/prefix_expression.rb",
39
+ "lib/piglet/field/reference.rb",
40
+ "lib/piglet/field/rename.rb",
41
+ "lib/piglet/field/suffix_expression.rb",
42
+ "lib/piglet/field/udf_expression.rb",
43
+ "lib/piglet/inout/describe.rb",
44
+ "lib/piglet/inout/dump.rb",
45
+ "lib/piglet/inout/explain.rb",
46
+ "lib/piglet/inout/illustrate.rb",
47
+ "lib/piglet/inout/load.rb",
48
+ "lib/piglet/inout/output.rb",
49
+ "lib/piglet/inout/storage_types.rb",
50
+ "lib/piglet/inout/store.rb",
51
+ "lib/piglet/interpreter.rb",
52
+ "lib/piglet/param/declare.rb",
53
+ "lib/piglet/param/default.rb",
54
+ "lib/piglet/param/parameter_statement.rb",
55
+ "lib/piglet/relation/block_context.rb",
56
+ "lib/piglet/relation/cogroup.rb",
57
+ "lib/piglet/relation/cross.rb",
58
+ "lib/piglet/relation/distinct.rb",
59
+ "lib/piglet/relation/filter.rb",
60
+ "lib/piglet/relation/foreach.rb",
61
+ "lib/piglet/relation/group.rb",
62
+ "lib/piglet/relation/join.rb",
63
+ "lib/piglet/relation/limit.rb",
64
+ "lib/piglet/relation/nested_foreach.rb",
65
+ "lib/piglet/relation/order.rb",
66
+ "lib/piglet/relation/relation.rb",
67
+ "lib/piglet/relation/sample.rb",
68
+ "lib/piglet/relation/split.rb",
69
+ "lib/piglet/relation/stream.rb",
70
+ "lib/piglet/relation/union.rb",
71
+ "lib/piglet/schema/bag.rb",
72
+ "lib/piglet/schema/tuple.rb",
73
+ "lib/piglet/udf/define.rb",
74
+ "lib/piglet/udf/register.rb",
75
+ "piglet.gemspec",
76
+ "spec/piglet/field/binary_conditional_spec.rb",
77
+ "spec/piglet/field/field_spec.rb",
78
+ "spec/piglet/field/infix_expression_spec.rb",
79
+ "spec/piglet/field/literal_spec.rb",
80
+ "spec/piglet/field/reference_spec.rb",
81
+ "spec/piglet/interpreter_spec.rb",
82
+ "spec/piglet/relation/relation_spec.rb",
83
+ "spec/piglet/relation/split_spec.rb",
84
+ "spec/piglet/relation/union_spec.rb",
85
+ "spec/piglet/schema/tuple_spec.rb",
86
+ "spec/piglet_spec.rb",
87
+ "spec/spec.opts",
88
+ "spec/spec_helper.rb",
89
+ "tasks/gem.rake",
90
+ "tasks/rdoc.rake",
91
+ "tasks/spec.rake"
92
+ ]
93
+ s.homepage = %q{http://github.com/iconara/piglet}
94
+ s.rdoc_options = ["--charset=UTF-8"]
95
+ s.require_paths = ["lib"]
96
+ s.rubygems_version = %q{1.3.7}
97
+ s.summary = %q{Piglet is a DSL for Pig scripts}
98
+ s.test_files = [
99
+ "spec/piglet/field/binary_conditional_spec.rb",
100
+ "spec/piglet/field/field_spec.rb",
101
+ "spec/piglet/field/infix_expression_spec.rb",
102
+ "spec/piglet/field/literal_spec.rb",
103
+ "spec/piglet/field/reference_spec.rb",
104
+ "spec/piglet/interpreter_spec.rb",
105
+ "spec/piglet/relation/relation_spec.rb",
106
+ "spec/piglet/relation/split_spec.rb",
107
+ "spec/piglet/relation/union_spec.rb",
108
+ "spec/piglet/schema/tuple_spec.rb",
109
+ "spec/piglet_spec.rb",
110
+ "spec/spec_helper.rb"
111
+ ]
112
+
113
+ if s.respond_to? :specification_version then
114
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
115
+ s.specification_version = 3
116
+
117
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
118
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
119
+ else
120
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
121
+ end
122
+ else
123
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
124
+ end
125
+ end
126
+
@@ -9,8 +9,9 @@ include Piglet::Field
9
9
  describe Field do
10
10
 
11
11
  before do
12
- @field = mock('field')
13
- @field.extend Field
12
+ @field = Object.new
13
+ @field.extend(Field)
14
+ @field.stub(:predecessors).and_return([stub(:generate_field_alias => 'xyz')])
14
15
  @expressions = {}
15
16
  [:int, :long, :float, :double, :chararray, :bytearray, :bag, :tuple].each do |type|
16
17
  @expressions[type] = mock("#{type} expression")
@@ -19,6 +20,10 @@ describe Field do
19
20
  end
20
21
  end
21
22
 
23
+ it 'should have an alias' do
24
+ @field.field_alias.should_not be_nil
25
+ end
26
+
22
27
  describe '#type' do
23
28
  [:==, :ne, :<, :>, :<=, :>=, :and, :or].each do |op|
24
29
  op_str = (op == :ne ? '!=' : op).to_s.upcase
@@ -32,7 +32,7 @@ describe Piglet::Interpreter do
32
32
  describe '#test' do
33
33
  it 'outputs a binary conditional' do
34
34
  @interpreter.interpret do
35
- dump(load('in').foreach { |r| [test(r.a == r.b, r.a, r.b)]})
35
+ dump(load('in').foreach { [test(self.a == self.b, self.a, self.b)]})
36
36
  end
37
37
  @interpreter.to_pig_latin.should include('(a == b ? a : b)')
38
38
  end
@@ -41,35 +41,35 @@ describe Piglet::Interpreter do
41
41
  describe '#literal' do
42
42
  it 'outputs a literal string' do
43
43
  @interpreter.interpret do
44
- dump(load('in').foreach { |r| [literal('hello').as(:world)]})
44
+ dump(load('in').foreach { [literal('hello').as(:world)]})
45
45
  end
46
46
  @interpreter.to_pig_latin.should include("'hello' AS world")
47
47
  end
48
48
 
49
49
  it 'outputs a literal integer' do
50
50
  @interpreter.interpret do
51
- dump(load('in').foreach { |r| [literal(3).as(:n)]})
51
+ dump(load('in').foreach { [literal(3).as(:n)]})
52
52
  end
53
53
  @interpreter.to_pig_latin.should include("3 AS n")
54
54
  end
55
55
 
56
56
  it 'outputs a literal float' do
57
57
  @interpreter.interpret do
58
- dump(load('in').foreach { |r| [literal(3.14).as(:pi)]})
58
+ dump(load('in').foreach { [literal(3.14).as(:pi)]})
59
59
  end
60
60
  @interpreter.to_pig_latin.should include("3.14 AS pi")
61
61
  end
62
62
 
63
63
  it 'outputs a literal string when passed an arbitrary object' do
64
64
  @interpreter.interpret do
65
- dump(load('in').foreach { |r| [literal(self).as(:interpreter)]})
65
+ dump(load('in').foreach { [literal(self).as(:interpreter)]})
66
66
  end
67
67
  @interpreter.to_pig_latin.should match(/'[^']+' AS interpreter/)
68
68
  end
69
69
 
70
70
  it 'escapes single quotes' do
71
71
  @interpreter.interpret do
72
- dump(load('in').foreach { |r| [literal("hello 'world'").as(:str)]})
72
+ dump(load('in').foreach { [literal("hello 'world'").as(:str)]})
73
73
  end
74
74
  @interpreter.to_pig_latin.should include("'hello \\'world\\'' AS str")
75
75
  end