piglet 0.2.5 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/.gitignore +5 -4
  2. data/Gemfile +10 -0
  3. data/Gemfile.lock +53 -0
  4. data/README.rdoc +74 -38
  5. data/Rakefile +10 -1
  6. data/lib/piglet.rb +5 -1
  7. data/lib/piglet/field/call_expression.rb +7 -2
  8. data/lib/piglet/field/direct_expression.rb +28 -0
  9. data/lib/piglet/field/field.rb +73 -3
  10. data/lib/piglet/field/infix_expression.rb +14 -9
  11. data/lib/piglet/field/map_value.rb +17 -0
  12. data/lib/piglet/field/prefix_expression.rb +6 -3
  13. data/lib/piglet/field/reference.rb +5 -7
  14. data/lib/piglet/field/rename.rb +7 -5
  15. data/lib/piglet/field/suffix_expression.rb +4 -2
  16. data/lib/piglet/field/udf_expression.rb +19 -2
  17. data/lib/piglet/inout/load.rb +2 -2
  18. data/lib/piglet/interpreter.rb +8 -18
  19. data/lib/piglet/relation/block_context.rb +41 -0
  20. data/lib/piglet/relation/cogroup.rb +2 -1
  21. data/lib/piglet/relation/cross.rb +2 -2
  22. data/lib/piglet/relation/distinct.rb +2 -2
  23. data/lib/piglet/relation/filter.rb +2 -2
  24. data/lib/piglet/relation/foreach.rb +2 -2
  25. data/lib/piglet/relation/group.rb +2 -2
  26. data/lib/piglet/relation/join.rb +2 -1
  27. data/lib/piglet/relation/limit.rb +2 -2
  28. data/lib/piglet/relation/nested_foreach.rb +60 -0
  29. data/lib/piglet/relation/order.rb +4 -2
  30. data/lib/piglet/relation/relation.rb +43 -32
  31. data/lib/piglet/relation/sample.rb +2 -2
  32. data/lib/piglet/relation/split.rb +5 -5
  33. data/lib/piglet/relation/stream.rb +2 -1
  34. data/lib/piglet/relation/union.rb +2 -2
  35. data/piglet.gemspec +126 -0
  36. data/spec/piglet/field/field_spec.rb +7 -2
  37. data/spec/piglet/interpreter_spec.rb +6 -6
  38. data/spec/piglet/relation/relation_spec.rb +7 -4
  39. data/spec/piglet/relation/split_spec.rb +3 -1
  40. data/spec/piglet/relation/union_spec.rb +5 -7
  41. data/spec/piglet_spec.rb +76 -31
  42. data/spec/spec_helper.rb +9 -0
  43. data/tasks/gem.rake +16 -19
  44. data/tasks/rdoc.rake +1 -3
  45. metadata +34 -11
  46. data/TODO +0 -2
@@ -5,14 +5,16 @@ module Piglet
5
5
  class Order # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, fields, options)
8
+ def initialize(relation, interpreter, fields, options)
9
9
  options ||= {}
10
+ @interpreter = interpreter
10
11
  @sources, @parallel = [relation], options[:parallel]
11
12
  @fields = fields.is_a?(Enumerable) ? fields : [fields]
12
13
  end
13
14
 
14
15
  def to_s
15
- "ORDER #{@sources.first.alias} BY #{field_strings}"
16
+ target = @sources.first.respond_to?(:field_alias) ? @sources.first.field_alias : @sources.first.alias
17
+ "ORDER #{target} BY #{field_strings}"
16
18
  end
17
19
 
18
20
  private
@@ -8,7 +8,13 @@ module Piglet
8
8
  # The name this relation will get in Pig Latin. Then name is generated when
9
9
  # the relation is outputed by the interpreter, and will be unique.
10
10
  def alias
11
- @alias ||= Relation.next_alias
11
+ @alias ||= @interpreter.next_relation_alias
12
+ end
13
+
14
+ def next_field_alias
15
+ @field_counter ||= 0
16
+ @field_counter += 1
17
+ "#{self.alias}_field_#{@field_counter}"
12
18
  end
13
19
 
14
20
  # GROUP
@@ -18,7 +24,7 @@ module Piglet
18
24
  # x.group([:a, :b, :c], :parallel => 3) # => GROUP x BY (a, b, c) PARALLEL 3
19
25
  def group(*args)
20
26
  grouping, options = split_at_options(args)
21
- Group.new(self, [grouping].flatten, options)
27
+ Group.new(self, @interpreter, [grouping].flatten, options)
22
28
  end
23
29
 
24
30
  # DISTINCT
@@ -26,7 +32,7 @@ module Piglet
26
32
  # x.distinct # => DISTINCT x
27
33
  # x.distinct(:parallel => 5) # => DISTINCT x PARALLEL 5
28
34
  def distinct(options={})
29
- Distinct.new(self, options)
35
+ Distinct.new(self, @interpreter, options)
30
36
  end
31
37
 
32
38
  # COGROUP
@@ -37,7 +43,7 @@ module Piglet
37
43
  # x.cogroup(x => :a, y => [:b, :inner]) # => COGROUP x BY a, y BY b INNER
38
44
  # x.cogroup(x => :a, y => :b, :parallel => 5) # => COGROUP x BY a, y BY b PARALLEL 5
39
45
  def cogroup(description)
40
- Cogroup.new(self, description)
46
+ Cogroup.new(self, @interpreter, description)
41
47
  end
42
48
 
43
49
  # CROSS
@@ -47,29 +53,39 @@ module Piglet
47
53
  # x.cross([y, z], :parallel => 5) # => CROSS x, y, z, w PARALLEL 5
48
54
  def cross(*args)
49
55
  relations, options = split_at_options(args)
50
- Cross.new(([self] + relations).flatten, options)
56
+ Cross.new(([self] + relations).flatten, @interpreter, options)
51
57
  end
52
58
 
53
59
  # FILTER
54
60
  #
55
- # x.filter { |r| r.a == r.b } # => FILTER x BY a == b
56
- # x.filter { |r| r.a > r.b && r.c != 3 } # => FILTER x BY a > b AND c != 3
57
- def filter
58
- Filter.new(self, yield(self))
61
+ # x.filter { a == b } # => FILTER x BY a == b
62
+ # x.filter { a > b && c == 3 } # => FILTER x BY a > b AND c == 3
63
+ def filter(&block)
64
+ context = BlockContext.new(self, @interpreter)
65
+ Filter.new(self, @interpreter, context.instance_eval(&block))
59
66
  end
60
67
 
61
68
  # FOREACH ... GENERATE
62
69
  #
63
- # x.foreach { |r| r.a } # => FOREACH x GENERATE a
64
- # x.foreach { |r| [r.a, r.b] } # => FOREACH x GENERATE a, b
65
- # x.foreach { |r| r.a.max } # => FOREACH x GENERATE MAX(a)
66
- # x.foreach { |r| r.a.avg.as(:b) } # => FOREACH x GENERATE AVG(a) AS b
70
+ # x.foreach { a } # => FOREACH x GENERATE a
71
+ # x.foreach { [a, b] } # => FOREACH x GENERATE a, b
72
+ # x.foreach { a.max } # => FOREACH x GENERATE MAX(a)
73
+ # x.foreach { a.avg.as(:b) } # => FOREACH x GENERATE AVG(a) AS b
67
74
  #
68
- #--
75
+ # See #nested_foreach for FOREACH ... { ... GENERATE }
76
+ def foreach(&block)
77
+ context = BlockContext.new(self, @interpreter)
78
+ Foreach.new(self, @interpreter, context.instance_eval(&block))
79
+ end
80
+
81
+ # FOREACH ... { ... GENERATE }
82
+ #
83
+ # x.nested_foreach { [a.distinct] } # => FOREACH x { a1 = DISTINCT a; GENERATE a1 }
69
84
  #
70
- # TODO: FOREACH a { b GENERATE c }
71
- def foreach
72
- Foreach.new(self, yield(self))
85
+ # See #foreach for FOREACH ... GENERATE
86
+ def nested_foreach(&block)
87
+ context = BlockContext.new(self, @interpreter)
88
+ NestedForeach.new(self, @interpreter, context.instance_eval(&block))
73
89
  end
74
90
 
75
91
  # JOIN
@@ -79,14 +95,14 @@ module Piglet
79
95
  # x.join(x => :a, y => :b, :using => :replicated) # => JOIN x BY a, y BY b USING "replicated"
80
96
  # x.join(x => :a, y => :b, :parallel => 5) # => JOIN x BY a, y BY b PARALLEL 5
81
97
  def join(description)
82
- Join.new(self, description)
98
+ Join.new(self, @interpreter, description)
83
99
  end
84
100
 
85
101
  # LIMIT
86
102
  #
87
103
  # x.limit(10) # => LIMIT x 10
88
104
  def limit(n)
89
- Limit.new(self, n)
105
+ Limit.new(self, @interpreter, n)
90
106
  end
91
107
 
92
108
  # ORDER
@@ -103,21 +119,22 @@ module Piglet
103
119
  def order(*args)
104
120
  fields, options = split_at_options(args)
105
121
  fields = *fields
106
- Order.new(self, fields, options)
122
+ Order.new(self, @interpreter, fields, options)
107
123
  end
108
124
 
109
125
  # SAMPLE
110
126
  #
111
127
  # x.sample(5) # => SAMPLE x 5;
112
128
  def sample(n)
113
- Sample.new(self, n)
129
+ Sample.new(self, @interpreter, n)
114
130
  end
115
131
 
116
132
  # SPLIT
117
133
  #
118
- # y, z = x.split { |r| [r.a <= 3, r.b > 4]} # => SPLIT x INTO y IF a <= 3, z IF a > 4
119
- def split
120
- Split.new(self, yield(self)).shards
134
+ # y, z = x.split { [a <= 3, b > 4] } # => SPLIT x INTO y IF a <= 3, z IF a > 4
135
+ def split(&block)
136
+ context = BlockContext.new(self, @interpreter)
137
+ Split.new(self, @interpreter, context.instance_eval(&block)).shards
121
138
  end
122
139
 
123
140
  # STREAM
@@ -128,7 +145,7 @@ module Piglet
128
145
  # x.stream(:cmd, :schema => [%w(a int)]) # => STREAM x THROUGH cmd AS (a:int)
129
146
  def stream(*args)
130
147
  fields, options = split_at_options(args)
131
- Stream.new(self, fields, options)
148
+ Stream.new(self, @interpreter, fields, options)
132
149
  end
133
150
 
134
151
  # UNION
@@ -136,7 +153,7 @@ module Piglet
136
153
  # x.union(y) # => UNION x, y
137
154
  # x.union(y, z) # => UNION x, y, z
138
155
  def union(*relations)
139
- Union.new(*([self] + relations))
156
+ Union.new(([self] + relations).flatten, @interpreter)
140
157
  end
141
158
 
142
159
  def field(name)
@@ -183,12 +200,6 @@ module Piglet
183
200
  [parameters, nil]
184
201
  end
185
202
  end
186
-
187
- def self.next_alias
188
- @counter ||= 0
189
- @counter += 1
190
- "relation_#{@counter}"
191
- end
192
203
  end
193
204
  end
194
205
  end
@@ -5,8 +5,8 @@ module Piglet
5
5
  class Sample # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, n)
9
- @sources, @n = [relation], n
8
+ def initialize(relation, interpreter, n)
9
+ @sources, @interpreter, @n = [relation], interpreter, n
10
10
  end
11
11
 
12
12
  def to_s
@@ -5,8 +5,8 @@ module Piglet
5
5
  class Split # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(relation, expressions)
9
- @sources, @expressions = [relation], expressions
8
+ def initialize(relation, interpreter, expressions)
9
+ @sources, @interpreter, @expressions = [relation], interpreter, expressions
10
10
  @shard_map = create_shards
11
11
  end
12
12
 
@@ -26,7 +26,7 @@ module Piglet
26
26
 
27
27
  def create_shards
28
28
  @expressions.inject({}) do |map, expr|
29
- map[expr] = RelationShard.new(self)
29
+ map[expr] = RelationShard.new(self, @interpreter)
30
30
  map
31
31
  end
32
32
  end
@@ -35,8 +35,8 @@ module Piglet
35
35
  class RelationShard # :nodoc:
36
36
  include Relation
37
37
 
38
- def initialize(split)
39
- @sources = [split]
38
+ def initialize(split, interpreter)
39
+ @sources, @interpreter = [split], interpreter
40
40
  end
41
41
 
42
42
  def to_s
@@ -5,7 +5,8 @@ module Piglet
5
5
  class Stream # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(source, args, options=nil)
8
+ def initialize(source, interpreter, args, options=nil)
9
+ @interpreter = interpreter
9
10
  options ||= {}
10
11
  @sources = [source]
11
12
  args.each do |arg|
@@ -5,8 +5,8 @@ module Piglet
5
5
  class Union # :nodoc:
6
6
  include Relation
7
7
 
8
- def initialize(*relations)
9
- @sources = [relations].flatten
8
+ def initialize(relations, interpreter)
9
+ @sources, @interpreter = relations, interpreter
10
10
  end
11
11
 
12
12
  def to_s
@@ -0,0 +1,126 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{piglet}
8
+ s.version = "0.3.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Theo Hultberg"]
12
+ s.date = %q{2010-07-09}
13
+ s.default_executable = %q{piglet}
14
+ s.description = %q{Piglet aims to look like Pig Latin while allowing for things like loops and control of flow that are missing from Pig.}
15
+ s.email = %q{theo@iconara.net}
16
+ s.executables = ["piglet"]
17
+ s.extra_rdoc_files = [
18
+ "LICENSE",
19
+ "README.rdoc"
20
+ ]
21
+ s.files = [
22
+ ".document",
23
+ ".gitignore",
24
+ "Gemfile",
25
+ "Gemfile.lock",
26
+ "LICENSE",
27
+ "README.rdoc",
28
+ "Rakefile",
29
+ "bin/piglet",
30
+ "lib/piglet.rb",
31
+ "lib/piglet/field/binary_conditional.rb",
32
+ "lib/piglet/field/call_expression.rb",
33
+ "lib/piglet/field/direct_expression.rb",
34
+ "lib/piglet/field/field.rb",
35
+ "lib/piglet/field/infix_expression.rb",
36
+ "lib/piglet/field/literal.rb",
37
+ "lib/piglet/field/map_value.rb",
38
+ "lib/piglet/field/prefix_expression.rb",
39
+ "lib/piglet/field/reference.rb",
40
+ "lib/piglet/field/rename.rb",
41
+ "lib/piglet/field/suffix_expression.rb",
42
+ "lib/piglet/field/udf_expression.rb",
43
+ "lib/piglet/inout/describe.rb",
44
+ "lib/piglet/inout/dump.rb",
45
+ "lib/piglet/inout/explain.rb",
46
+ "lib/piglet/inout/illustrate.rb",
47
+ "lib/piglet/inout/load.rb",
48
+ "lib/piglet/inout/output.rb",
49
+ "lib/piglet/inout/storage_types.rb",
50
+ "lib/piglet/inout/store.rb",
51
+ "lib/piglet/interpreter.rb",
52
+ "lib/piglet/param/declare.rb",
53
+ "lib/piglet/param/default.rb",
54
+ "lib/piglet/param/parameter_statement.rb",
55
+ "lib/piglet/relation/block_context.rb",
56
+ "lib/piglet/relation/cogroup.rb",
57
+ "lib/piglet/relation/cross.rb",
58
+ "lib/piglet/relation/distinct.rb",
59
+ "lib/piglet/relation/filter.rb",
60
+ "lib/piglet/relation/foreach.rb",
61
+ "lib/piglet/relation/group.rb",
62
+ "lib/piglet/relation/join.rb",
63
+ "lib/piglet/relation/limit.rb",
64
+ "lib/piglet/relation/nested_foreach.rb",
65
+ "lib/piglet/relation/order.rb",
66
+ "lib/piglet/relation/relation.rb",
67
+ "lib/piglet/relation/sample.rb",
68
+ "lib/piglet/relation/split.rb",
69
+ "lib/piglet/relation/stream.rb",
70
+ "lib/piglet/relation/union.rb",
71
+ "lib/piglet/schema/bag.rb",
72
+ "lib/piglet/schema/tuple.rb",
73
+ "lib/piglet/udf/define.rb",
74
+ "lib/piglet/udf/register.rb",
75
+ "piglet.gemspec",
76
+ "spec/piglet/field/binary_conditional_spec.rb",
77
+ "spec/piglet/field/field_spec.rb",
78
+ "spec/piglet/field/infix_expression_spec.rb",
79
+ "spec/piglet/field/literal_spec.rb",
80
+ "spec/piglet/field/reference_spec.rb",
81
+ "spec/piglet/interpreter_spec.rb",
82
+ "spec/piglet/relation/relation_spec.rb",
83
+ "spec/piglet/relation/split_spec.rb",
84
+ "spec/piglet/relation/union_spec.rb",
85
+ "spec/piglet/schema/tuple_spec.rb",
86
+ "spec/piglet_spec.rb",
87
+ "spec/spec.opts",
88
+ "spec/spec_helper.rb",
89
+ "tasks/gem.rake",
90
+ "tasks/rdoc.rake",
91
+ "tasks/spec.rake"
92
+ ]
93
+ s.homepage = %q{http://github.com/iconara/piglet}
94
+ s.rdoc_options = ["--charset=UTF-8"]
95
+ s.require_paths = ["lib"]
96
+ s.rubygems_version = %q{1.3.7}
97
+ s.summary = %q{Piglet is a DSL for Pig scripts}
98
+ s.test_files = [
99
+ "spec/piglet/field/binary_conditional_spec.rb",
100
+ "spec/piglet/field/field_spec.rb",
101
+ "spec/piglet/field/infix_expression_spec.rb",
102
+ "spec/piglet/field/literal_spec.rb",
103
+ "spec/piglet/field/reference_spec.rb",
104
+ "spec/piglet/interpreter_spec.rb",
105
+ "spec/piglet/relation/relation_spec.rb",
106
+ "spec/piglet/relation/split_spec.rb",
107
+ "spec/piglet/relation/union_spec.rb",
108
+ "spec/piglet/schema/tuple_spec.rb",
109
+ "spec/piglet_spec.rb",
110
+ "spec/spec_helper.rb"
111
+ ]
112
+
113
+ if s.respond_to? :specification_version then
114
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
115
+ s.specification_version = 3
116
+
117
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
118
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
119
+ else
120
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
121
+ end
122
+ else
123
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
124
+ end
125
+ end
126
+
@@ -9,8 +9,9 @@ include Piglet::Field
9
9
  describe Field do
10
10
 
11
11
  before do
12
- @field = mock('field')
13
- @field.extend Field
12
+ @field = Object.new
13
+ @field.extend(Field)
14
+ @field.stub(:predecessors).and_return([stub(:generate_field_alias => 'xyz')])
14
15
  @expressions = {}
15
16
  [:int, :long, :float, :double, :chararray, :bytearray, :bag, :tuple].each do |type|
16
17
  @expressions[type] = mock("#{type} expression")
@@ -19,6 +20,10 @@ describe Field do
19
20
  end
20
21
  end
21
22
 
23
+ it 'should have an alias' do
24
+ @field.field_alias.should_not be_nil
25
+ end
26
+
22
27
  describe '#type' do
23
28
  [:==, :ne, :<, :>, :<=, :>=, :and, :or].each do |op|
24
29
  op_str = (op == :ne ? '!=' : op).to_s.upcase
@@ -32,7 +32,7 @@ describe Piglet::Interpreter do
32
32
  describe '#test' do
33
33
  it 'outputs a binary conditional' do
34
34
  @interpreter.interpret do
35
- dump(load('in').foreach { |r| [test(r.a == r.b, r.a, r.b)]})
35
+ dump(load('in').foreach { [test(self.a == self.b, self.a, self.b)]})
36
36
  end
37
37
  @interpreter.to_pig_latin.should include('(a == b ? a : b)')
38
38
  end
@@ -41,35 +41,35 @@ describe Piglet::Interpreter do
41
41
  describe '#literal' do
42
42
  it 'outputs a literal string' do
43
43
  @interpreter.interpret do
44
- dump(load('in').foreach { |r| [literal('hello').as(:world)]})
44
+ dump(load('in').foreach { [literal('hello').as(:world)]})
45
45
  end
46
46
  @interpreter.to_pig_latin.should include("'hello' AS world")
47
47
  end
48
48
 
49
49
  it 'outputs a literal integer' do
50
50
  @interpreter.interpret do
51
- dump(load('in').foreach { |r| [literal(3).as(:n)]})
51
+ dump(load('in').foreach { [literal(3).as(:n)]})
52
52
  end
53
53
  @interpreter.to_pig_latin.should include("3 AS n")
54
54
  end
55
55
 
56
56
  it 'outputs a literal float' do
57
57
  @interpreter.interpret do
58
- dump(load('in').foreach { |r| [literal(3.14).as(:pi)]})
58
+ dump(load('in').foreach { [literal(3.14).as(:pi)]})
59
59
  end
60
60
  @interpreter.to_pig_latin.should include("3.14 AS pi")
61
61
  end
62
62
 
63
63
  it 'outputs a literal string when passed an arbitrary object' do
64
64
  @interpreter.interpret do
65
- dump(load('in').foreach { |r| [literal(self).as(:interpreter)]})
65
+ dump(load('in').foreach { [literal(self).as(:interpreter)]})
66
66
  end
67
67
  @interpreter.to_pig_latin.should match(/'[^']+' AS interpreter/)
68
68
  end
69
69
 
70
70
  it 'escapes single quotes' do
71
71
  @interpreter.interpret do
72
- dump(load('in').foreach { |r| [literal("hello 'world'").as(:str)]})
72
+ dump(load('in').foreach { [literal("hello 'world'").as(:str)]})
73
73
  end
74
74
  @interpreter.to_pig_latin.should include("'hello \\'world\\'' AS str")
75
75
  end