piglet 0.2.5 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -4
- data/Gemfile +10 -0
- data/Gemfile.lock +53 -0
- data/README.rdoc +74 -38
- data/Rakefile +10 -1
- data/lib/piglet.rb +5 -1
- data/lib/piglet/field/call_expression.rb +7 -2
- data/lib/piglet/field/direct_expression.rb +28 -0
- data/lib/piglet/field/field.rb +73 -3
- data/lib/piglet/field/infix_expression.rb +14 -9
- data/lib/piglet/field/map_value.rb +17 -0
- data/lib/piglet/field/prefix_expression.rb +6 -3
- data/lib/piglet/field/reference.rb +5 -7
- data/lib/piglet/field/rename.rb +7 -5
- data/lib/piglet/field/suffix_expression.rb +4 -2
- data/lib/piglet/field/udf_expression.rb +19 -2
- data/lib/piglet/inout/load.rb +2 -2
- data/lib/piglet/interpreter.rb +8 -18
- data/lib/piglet/relation/block_context.rb +41 -0
- data/lib/piglet/relation/cogroup.rb +2 -1
- data/lib/piglet/relation/cross.rb +2 -2
- data/lib/piglet/relation/distinct.rb +2 -2
- data/lib/piglet/relation/filter.rb +2 -2
- data/lib/piglet/relation/foreach.rb +2 -2
- data/lib/piglet/relation/group.rb +2 -2
- data/lib/piglet/relation/join.rb +2 -1
- data/lib/piglet/relation/limit.rb +2 -2
- data/lib/piglet/relation/nested_foreach.rb +60 -0
- data/lib/piglet/relation/order.rb +4 -2
- data/lib/piglet/relation/relation.rb +43 -32
- data/lib/piglet/relation/sample.rb +2 -2
- data/lib/piglet/relation/split.rb +5 -5
- data/lib/piglet/relation/stream.rb +2 -1
- data/lib/piglet/relation/union.rb +2 -2
- data/piglet.gemspec +126 -0
- data/spec/piglet/field/field_spec.rb +7 -2
- data/spec/piglet/interpreter_spec.rb +6 -6
- data/spec/piglet/relation/relation_spec.rb +7 -4
- data/spec/piglet/relation/split_spec.rb +3 -1
- data/spec/piglet/relation/union_spec.rb +5 -7
- data/spec/piglet_spec.rb +76 -31
- data/spec/spec_helper.rb +9 -0
- data/tasks/gem.rake +16 -19
- data/tasks/rdoc.rake +1 -3
- metadata +34 -11
- data/TODO +0 -2
@@ -5,14 +5,16 @@ module Piglet
|
|
5
5
|
class Order # :nodoc:
|
6
6
|
include Relation
|
7
7
|
|
8
|
-
def initialize(relation, fields, options)
|
8
|
+
def initialize(relation, interpreter, fields, options)
|
9
9
|
options ||= {}
|
10
|
+
@interpreter = interpreter
|
10
11
|
@sources, @parallel = [relation], options[:parallel]
|
11
12
|
@fields = fields.is_a?(Enumerable) ? fields : [fields]
|
12
13
|
end
|
13
14
|
|
14
15
|
def to_s
|
15
|
-
|
16
|
+
target = @sources.first.respond_to?(:field_alias) ? @sources.first.field_alias : @sources.first.alias
|
17
|
+
"ORDER #{target} BY #{field_strings}"
|
16
18
|
end
|
17
19
|
|
18
20
|
private
|
@@ -8,7 +8,13 @@ module Piglet
|
|
8
8
|
# The name this relation will get in Pig Latin. Then name is generated when
|
9
9
|
# the relation is outputed by the interpreter, and will be unique.
|
10
10
|
def alias
|
11
|
-
@alias ||=
|
11
|
+
@alias ||= @interpreter.next_relation_alias
|
12
|
+
end
|
13
|
+
|
14
|
+
def next_field_alias
|
15
|
+
@field_counter ||= 0
|
16
|
+
@field_counter += 1
|
17
|
+
"#{self.alias}_field_#{@field_counter}"
|
12
18
|
end
|
13
19
|
|
14
20
|
# GROUP
|
@@ -18,7 +24,7 @@ module Piglet
|
|
18
24
|
# x.group([:a, :b, :c], :parallel => 3) # => GROUP x BY (a, b, c) PARALLEL 3
|
19
25
|
def group(*args)
|
20
26
|
grouping, options = split_at_options(args)
|
21
|
-
Group.new(self, [grouping].flatten, options)
|
27
|
+
Group.new(self, @interpreter, [grouping].flatten, options)
|
22
28
|
end
|
23
29
|
|
24
30
|
# DISTINCT
|
@@ -26,7 +32,7 @@ module Piglet
|
|
26
32
|
# x.distinct # => DISTINCT x
|
27
33
|
# x.distinct(:parallel => 5) # => DISTINCT x PARALLEL 5
|
28
34
|
def distinct(options={})
|
29
|
-
Distinct.new(self, options)
|
35
|
+
Distinct.new(self, @interpreter, options)
|
30
36
|
end
|
31
37
|
|
32
38
|
# COGROUP
|
@@ -37,7 +43,7 @@ module Piglet
|
|
37
43
|
# x.cogroup(x => :a, y => [:b, :inner]) # => COGROUP x BY a, y BY b INNER
|
38
44
|
# x.cogroup(x => :a, y => :b, :parallel => 5) # => COGROUP x BY a, y BY b PARALLEL 5
|
39
45
|
def cogroup(description)
|
40
|
-
Cogroup.new(self, description)
|
46
|
+
Cogroup.new(self, @interpreter, description)
|
41
47
|
end
|
42
48
|
|
43
49
|
# CROSS
|
@@ -47,29 +53,39 @@ module Piglet
|
|
47
53
|
# x.cross([y, z], :parallel => 5) # => CROSS x, y, z, w PARALLEL 5
|
48
54
|
def cross(*args)
|
49
55
|
relations, options = split_at_options(args)
|
50
|
-
Cross.new(([self] + relations).flatten, options)
|
56
|
+
Cross.new(([self] + relations).flatten, @interpreter, options)
|
51
57
|
end
|
52
58
|
|
53
59
|
# FILTER
|
54
60
|
#
|
55
|
-
# x.filter {
|
56
|
-
# x.filter {
|
57
|
-
def filter
|
58
|
-
|
61
|
+
# x.filter { a == b } # => FILTER x BY a == b
|
62
|
+
# x.filter { a > b && c == 3 } # => FILTER x BY a > b AND c == 3
|
63
|
+
def filter(&block)
|
64
|
+
context = BlockContext.new(self, @interpreter)
|
65
|
+
Filter.new(self, @interpreter, context.instance_eval(&block))
|
59
66
|
end
|
60
67
|
|
61
68
|
# FOREACH ... GENERATE
|
62
69
|
#
|
63
|
-
# x.foreach {
|
64
|
-
# x.foreach {
|
65
|
-
# x.foreach {
|
66
|
-
# x.foreach {
|
70
|
+
# x.foreach { a } # => FOREACH x GENERATE a
|
71
|
+
# x.foreach { [a, b] } # => FOREACH x GENERATE a, b
|
72
|
+
# x.foreach { a.max } # => FOREACH x GENERATE MAX(a)
|
73
|
+
# x.foreach { a.avg.as(:b) } # => FOREACH x GENERATE AVG(a) AS b
|
67
74
|
#
|
68
|
-
|
75
|
+
# See #nested_foreach for FOREACH ... { ... GENERATE }
|
76
|
+
def foreach(&block)
|
77
|
+
context = BlockContext.new(self, @interpreter)
|
78
|
+
Foreach.new(self, @interpreter, context.instance_eval(&block))
|
79
|
+
end
|
80
|
+
|
81
|
+
# FOREACH ... { ... GENERATE }
|
82
|
+
#
|
83
|
+
# x.nested_foreach { [a.distinct] } # => FOREACH x { a1 = DISTINCT a; GENERATE a1 }
|
69
84
|
#
|
70
|
-
#
|
71
|
-
def
|
72
|
-
|
85
|
+
# See #foreach for FOREACH ... GENERATE
|
86
|
+
def nested_foreach(&block)
|
87
|
+
context = BlockContext.new(self, @interpreter)
|
88
|
+
NestedForeach.new(self, @interpreter, context.instance_eval(&block))
|
73
89
|
end
|
74
90
|
|
75
91
|
# JOIN
|
@@ -79,14 +95,14 @@ module Piglet
|
|
79
95
|
# x.join(x => :a, y => :b, :using => :replicated) # => JOIN x BY a, y BY b USING "replicated"
|
80
96
|
# x.join(x => :a, y => :b, :parallel => 5) # => JOIN x BY a, y BY b PARALLEL 5
|
81
97
|
def join(description)
|
82
|
-
Join.new(self, description)
|
98
|
+
Join.new(self, @interpreter, description)
|
83
99
|
end
|
84
100
|
|
85
101
|
# LIMIT
|
86
102
|
#
|
87
103
|
# x.limit(10) # => LIMIT x 10
|
88
104
|
def limit(n)
|
89
|
-
Limit.new(self, n)
|
105
|
+
Limit.new(self, @interpreter, n)
|
90
106
|
end
|
91
107
|
|
92
108
|
# ORDER
|
@@ -103,21 +119,22 @@ module Piglet
|
|
103
119
|
def order(*args)
|
104
120
|
fields, options = split_at_options(args)
|
105
121
|
fields = *fields
|
106
|
-
Order.new(self, fields, options)
|
122
|
+
Order.new(self, @interpreter, fields, options)
|
107
123
|
end
|
108
124
|
|
109
125
|
# SAMPLE
|
110
126
|
#
|
111
127
|
# x.sample(5) # => SAMPLE x 5;
|
112
128
|
def sample(n)
|
113
|
-
Sample.new(self, n)
|
129
|
+
Sample.new(self, @interpreter, n)
|
114
130
|
end
|
115
131
|
|
116
132
|
# SPLIT
|
117
133
|
#
|
118
|
-
# y, z = x.split {
|
119
|
-
def split
|
120
|
-
|
134
|
+
# y, z = x.split { [a <= 3, b > 4] } # => SPLIT x INTO y IF a <= 3, z IF a > 4
|
135
|
+
def split(&block)
|
136
|
+
context = BlockContext.new(self, @interpreter)
|
137
|
+
Split.new(self, @interpreter, context.instance_eval(&block)).shards
|
121
138
|
end
|
122
139
|
|
123
140
|
# STREAM
|
@@ -128,7 +145,7 @@ module Piglet
|
|
128
145
|
# x.stream(:cmd, :schema => [%w(a int)]) # => STREAM x THROUGH cmd AS (a:int)
|
129
146
|
def stream(*args)
|
130
147
|
fields, options = split_at_options(args)
|
131
|
-
Stream.new(self, fields, options)
|
148
|
+
Stream.new(self, @interpreter, fields, options)
|
132
149
|
end
|
133
150
|
|
134
151
|
# UNION
|
@@ -136,7 +153,7 @@ module Piglet
|
|
136
153
|
# x.union(y) # => UNION x, y
|
137
154
|
# x.union(y, z) # => UNION x, y, z
|
138
155
|
def union(*relations)
|
139
|
-
Union.new(
|
156
|
+
Union.new(([self] + relations).flatten, @interpreter)
|
140
157
|
end
|
141
158
|
|
142
159
|
def field(name)
|
@@ -183,12 +200,6 @@ module Piglet
|
|
183
200
|
[parameters, nil]
|
184
201
|
end
|
185
202
|
end
|
186
|
-
|
187
|
-
def self.next_alias
|
188
|
-
@counter ||= 0
|
189
|
-
@counter += 1
|
190
|
-
"relation_#{@counter}"
|
191
|
-
end
|
192
203
|
end
|
193
204
|
end
|
194
205
|
end
|
@@ -5,8 +5,8 @@ module Piglet
|
|
5
5
|
class Split # :nodoc:
|
6
6
|
include Relation
|
7
7
|
|
8
|
-
def initialize(relation, expressions)
|
9
|
-
@sources, @expressions = [relation], expressions
|
8
|
+
def initialize(relation, interpreter, expressions)
|
9
|
+
@sources, @interpreter, @expressions = [relation], interpreter, expressions
|
10
10
|
@shard_map = create_shards
|
11
11
|
end
|
12
12
|
|
@@ -26,7 +26,7 @@ module Piglet
|
|
26
26
|
|
27
27
|
def create_shards
|
28
28
|
@expressions.inject({}) do |map, expr|
|
29
|
-
map[expr] = RelationShard.new(self)
|
29
|
+
map[expr] = RelationShard.new(self, @interpreter)
|
30
30
|
map
|
31
31
|
end
|
32
32
|
end
|
@@ -35,8 +35,8 @@ module Piglet
|
|
35
35
|
class RelationShard # :nodoc:
|
36
36
|
include Relation
|
37
37
|
|
38
|
-
def initialize(split)
|
39
|
-
@sources = [split]
|
38
|
+
def initialize(split, interpreter)
|
39
|
+
@sources, @interpreter = [split], interpreter
|
40
40
|
end
|
41
41
|
|
42
42
|
def to_s
|
data/piglet.gemspec
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{piglet}
|
8
|
+
s.version = "0.3.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Theo Hultberg"]
|
12
|
+
s.date = %q{2010-07-09}
|
13
|
+
s.default_executable = %q{piglet}
|
14
|
+
s.description = %q{Piglet aims to look like Pig Latin while allowing for things like loops and control of flow that are missing from Pig.}
|
15
|
+
s.email = %q{theo@iconara.net}
|
16
|
+
s.executables = ["piglet"]
|
17
|
+
s.extra_rdoc_files = [
|
18
|
+
"LICENSE",
|
19
|
+
"README.rdoc"
|
20
|
+
]
|
21
|
+
s.files = [
|
22
|
+
".document",
|
23
|
+
".gitignore",
|
24
|
+
"Gemfile",
|
25
|
+
"Gemfile.lock",
|
26
|
+
"LICENSE",
|
27
|
+
"README.rdoc",
|
28
|
+
"Rakefile",
|
29
|
+
"bin/piglet",
|
30
|
+
"lib/piglet.rb",
|
31
|
+
"lib/piglet/field/binary_conditional.rb",
|
32
|
+
"lib/piglet/field/call_expression.rb",
|
33
|
+
"lib/piglet/field/direct_expression.rb",
|
34
|
+
"lib/piglet/field/field.rb",
|
35
|
+
"lib/piglet/field/infix_expression.rb",
|
36
|
+
"lib/piglet/field/literal.rb",
|
37
|
+
"lib/piglet/field/map_value.rb",
|
38
|
+
"lib/piglet/field/prefix_expression.rb",
|
39
|
+
"lib/piglet/field/reference.rb",
|
40
|
+
"lib/piglet/field/rename.rb",
|
41
|
+
"lib/piglet/field/suffix_expression.rb",
|
42
|
+
"lib/piglet/field/udf_expression.rb",
|
43
|
+
"lib/piglet/inout/describe.rb",
|
44
|
+
"lib/piglet/inout/dump.rb",
|
45
|
+
"lib/piglet/inout/explain.rb",
|
46
|
+
"lib/piglet/inout/illustrate.rb",
|
47
|
+
"lib/piglet/inout/load.rb",
|
48
|
+
"lib/piglet/inout/output.rb",
|
49
|
+
"lib/piglet/inout/storage_types.rb",
|
50
|
+
"lib/piglet/inout/store.rb",
|
51
|
+
"lib/piglet/interpreter.rb",
|
52
|
+
"lib/piglet/param/declare.rb",
|
53
|
+
"lib/piglet/param/default.rb",
|
54
|
+
"lib/piglet/param/parameter_statement.rb",
|
55
|
+
"lib/piglet/relation/block_context.rb",
|
56
|
+
"lib/piglet/relation/cogroup.rb",
|
57
|
+
"lib/piglet/relation/cross.rb",
|
58
|
+
"lib/piglet/relation/distinct.rb",
|
59
|
+
"lib/piglet/relation/filter.rb",
|
60
|
+
"lib/piglet/relation/foreach.rb",
|
61
|
+
"lib/piglet/relation/group.rb",
|
62
|
+
"lib/piglet/relation/join.rb",
|
63
|
+
"lib/piglet/relation/limit.rb",
|
64
|
+
"lib/piglet/relation/nested_foreach.rb",
|
65
|
+
"lib/piglet/relation/order.rb",
|
66
|
+
"lib/piglet/relation/relation.rb",
|
67
|
+
"lib/piglet/relation/sample.rb",
|
68
|
+
"lib/piglet/relation/split.rb",
|
69
|
+
"lib/piglet/relation/stream.rb",
|
70
|
+
"lib/piglet/relation/union.rb",
|
71
|
+
"lib/piglet/schema/bag.rb",
|
72
|
+
"lib/piglet/schema/tuple.rb",
|
73
|
+
"lib/piglet/udf/define.rb",
|
74
|
+
"lib/piglet/udf/register.rb",
|
75
|
+
"piglet.gemspec",
|
76
|
+
"spec/piglet/field/binary_conditional_spec.rb",
|
77
|
+
"spec/piglet/field/field_spec.rb",
|
78
|
+
"spec/piglet/field/infix_expression_spec.rb",
|
79
|
+
"spec/piglet/field/literal_spec.rb",
|
80
|
+
"spec/piglet/field/reference_spec.rb",
|
81
|
+
"spec/piglet/interpreter_spec.rb",
|
82
|
+
"spec/piglet/relation/relation_spec.rb",
|
83
|
+
"spec/piglet/relation/split_spec.rb",
|
84
|
+
"spec/piglet/relation/union_spec.rb",
|
85
|
+
"spec/piglet/schema/tuple_spec.rb",
|
86
|
+
"spec/piglet_spec.rb",
|
87
|
+
"spec/spec.opts",
|
88
|
+
"spec/spec_helper.rb",
|
89
|
+
"tasks/gem.rake",
|
90
|
+
"tasks/rdoc.rake",
|
91
|
+
"tasks/spec.rake"
|
92
|
+
]
|
93
|
+
s.homepage = %q{http://github.com/iconara/piglet}
|
94
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
95
|
+
s.require_paths = ["lib"]
|
96
|
+
s.rubygems_version = %q{1.3.7}
|
97
|
+
s.summary = %q{Piglet is a DSL for Pig scripts}
|
98
|
+
s.test_files = [
|
99
|
+
"spec/piglet/field/binary_conditional_spec.rb",
|
100
|
+
"spec/piglet/field/field_spec.rb",
|
101
|
+
"spec/piglet/field/infix_expression_spec.rb",
|
102
|
+
"spec/piglet/field/literal_spec.rb",
|
103
|
+
"spec/piglet/field/reference_spec.rb",
|
104
|
+
"spec/piglet/interpreter_spec.rb",
|
105
|
+
"spec/piglet/relation/relation_spec.rb",
|
106
|
+
"spec/piglet/relation/split_spec.rb",
|
107
|
+
"spec/piglet/relation/union_spec.rb",
|
108
|
+
"spec/piglet/schema/tuple_spec.rb",
|
109
|
+
"spec/piglet_spec.rb",
|
110
|
+
"spec/spec_helper.rb"
|
111
|
+
]
|
112
|
+
|
113
|
+
if s.respond_to? :specification_version then
|
114
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
115
|
+
s.specification_version = 3
|
116
|
+
|
117
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
118
|
+
s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
|
119
|
+
else
|
120
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
121
|
+
end
|
122
|
+
else
|
123
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
@@ -9,8 +9,9 @@ include Piglet::Field
|
|
9
9
|
describe Field do
|
10
10
|
|
11
11
|
before do
|
12
|
-
@field =
|
13
|
-
@field.extend
|
12
|
+
@field = Object.new
|
13
|
+
@field.extend(Field)
|
14
|
+
@field.stub(:predecessors).and_return([stub(:generate_field_alias => 'xyz')])
|
14
15
|
@expressions = {}
|
15
16
|
[:int, :long, :float, :double, :chararray, :bytearray, :bag, :tuple].each do |type|
|
16
17
|
@expressions[type] = mock("#{type} expression")
|
@@ -19,6 +20,10 @@ describe Field do
|
|
19
20
|
end
|
20
21
|
end
|
21
22
|
|
23
|
+
it 'should have an alias' do
|
24
|
+
@field.field_alias.should_not be_nil
|
25
|
+
end
|
26
|
+
|
22
27
|
describe '#type' do
|
23
28
|
[:==, :ne, :<, :>, :<=, :>=, :and, :or].each do |op|
|
24
29
|
op_str = (op == :ne ? '!=' : op).to_s.upcase
|
@@ -32,7 +32,7 @@ describe Piglet::Interpreter do
|
|
32
32
|
describe '#test' do
|
33
33
|
it 'outputs a binary conditional' do
|
34
34
|
@interpreter.interpret do
|
35
|
-
dump(load('in').foreach {
|
35
|
+
dump(load('in').foreach { [test(self.a == self.b, self.a, self.b)]})
|
36
36
|
end
|
37
37
|
@interpreter.to_pig_latin.should include('(a == b ? a : b)')
|
38
38
|
end
|
@@ -41,35 +41,35 @@ describe Piglet::Interpreter do
|
|
41
41
|
describe '#literal' do
|
42
42
|
it 'outputs a literal string' do
|
43
43
|
@interpreter.interpret do
|
44
|
-
dump(load('in').foreach {
|
44
|
+
dump(load('in').foreach { [literal('hello').as(:world)]})
|
45
45
|
end
|
46
46
|
@interpreter.to_pig_latin.should include("'hello' AS world")
|
47
47
|
end
|
48
48
|
|
49
49
|
it 'outputs a literal integer' do
|
50
50
|
@interpreter.interpret do
|
51
|
-
dump(load('in').foreach {
|
51
|
+
dump(load('in').foreach { [literal(3).as(:n)]})
|
52
52
|
end
|
53
53
|
@interpreter.to_pig_latin.should include("3 AS n")
|
54
54
|
end
|
55
55
|
|
56
56
|
it 'outputs a literal float' do
|
57
57
|
@interpreter.interpret do
|
58
|
-
dump(load('in').foreach {
|
58
|
+
dump(load('in').foreach { [literal(3.14).as(:pi)]})
|
59
59
|
end
|
60
60
|
@interpreter.to_pig_latin.should include("3.14 AS pi")
|
61
61
|
end
|
62
62
|
|
63
63
|
it 'outputs a literal string when passed an arbitrary object' do
|
64
64
|
@interpreter.interpret do
|
65
|
-
dump(load('in').foreach {
|
65
|
+
dump(load('in').foreach { [literal(self).as(:interpreter)]})
|
66
66
|
end
|
67
67
|
@interpreter.to_pig_latin.should match(/'[^']+' AS interpreter/)
|
68
68
|
end
|
69
69
|
|
70
70
|
it 'escapes single quotes' do
|
71
71
|
@interpreter.interpret do
|
72
|
-
dump(load('in').foreach {
|
72
|
+
dump(load('in').foreach { [literal("hello 'world'").as(:str)]})
|
73
73
|
end
|
74
74
|
@interpreter.to_pig_latin.should include("'hello \\'world\\'' AS str")
|
75
75
|
end
|