piglet 0.2.5 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -4
- data/Gemfile +10 -0
- data/Gemfile.lock +53 -0
- data/README.rdoc +74 -38
- data/Rakefile +10 -1
- data/lib/piglet.rb +5 -1
- data/lib/piglet/field/call_expression.rb +7 -2
- data/lib/piglet/field/direct_expression.rb +28 -0
- data/lib/piglet/field/field.rb +73 -3
- data/lib/piglet/field/infix_expression.rb +14 -9
- data/lib/piglet/field/map_value.rb +17 -0
- data/lib/piglet/field/prefix_expression.rb +6 -3
- data/lib/piglet/field/reference.rb +5 -7
- data/lib/piglet/field/rename.rb +7 -5
- data/lib/piglet/field/suffix_expression.rb +4 -2
- data/lib/piglet/field/udf_expression.rb +19 -2
- data/lib/piglet/inout/load.rb +2 -2
- data/lib/piglet/interpreter.rb +8 -18
- data/lib/piglet/relation/block_context.rb +41 -0
- data/lib/piglet/relation/cogroup.rb +2 -1
- data/lib/piglet/relation/cross.rb +2 -2
- data/lib/piglet/relation/distinct.rb +2 -2
- data/lib/piglet/relation/filter.rb +2 -2
- data/lib/piglet/relation/foreach.rb +2 -2
- data/lib/piglet/relation/group.rb +2 -2
- data/lib/piglet/relation/join.rb +2 -1
- data/lib/piglet/relation/limit.rb +2 -2
- data/lib/piglet/relation/nested_foreach.rb +60 -0
- data/lib/piglet/relation/order.rb +4 -2
- data/lib/piglet/relation/relation.rb +43 -32
- data/lib/piglet/relation/sample.rb +2 -2
- data/lib/piglet/relation/split.rb +5 -5
- data/lib/piglet/relation/stream.rb +2 -1
- data/lib/piglet/relation/union.rb +2 -2
- data/piglet.gemspec +126 -0
- data/spec/piglet/field/field_spec.rb +7 -2
- data/spec/piglet/interpreter_spec.rb +6 -6
- data/spec/piglet/relation/relation_spec.rb +7 -4
- data/spec/piglet/relation/split_spec.rb +3 -1
- data/spec/piglet/relation/union_spec.rb +5 -7
- data/spec/piglet_spec.rb +76 -31
- data/spec/spec_helper.rb +9 -0
- data/tasks/gem.rake +16 -19
- data/tasks/rdoc.rake +1 -3
- metadata +34 -11
- data/TODO +0 -2
@@ -5,14 +5,16 @@ module Piglet
|
|
5
5
|
class Order # :nodoc:
|
6
6
|
include Relation
|
7
7
|
|
8
|
-
def initialize(relation, fields, options)
|
8
|
+
def initialize(relation, interpreter, fields, options)
|
9
9
|
options ||= {}
|
10
|
+
@interpreter = interpreter
|
10
11
|
@sources, @parallel = [relation], options[:parallel]
|
11
12
|
@fields = fields.is_a?(Enumerable) ? fields : [fields]
|
12
13
|
end
|
13
14
|
|
14
15
|
def to_s
|
15
|
-
|
16
|
+
target = @sources.first.respond_to?(:field_alias) ? @sources.first.field_alias : @sources.first.alias
|
17
|
+
"ORDER #{target} BY #{field_strings}"
|
16
18
|
end
|
17
19
|
|
18
20
|
private
|
@@ -8,7 +8,13 @@ module Piglet
|
|
8
8
|
# The name this relation will get in Pig Latin. Then name is generated when
|
9
9
|
# the relation is outputed by the interpreter, and will be unique.
|
10
10
|
def alias
|
11
|
-
@alias ||=
|
11
|
+
@alias ||= @interpreter.next_relation_alias
|
12
|
+
end
|
13
|
+
|
14
|
+
def next_field_alias
|
15
|
+
@field_counter ||= 0
|
16
|
+
@field_counter += 1
|
17
|
+
"#{self.alias}_field_#{@field_counter}"
|
12
18
|
end
|
13
19
|
|
14
20
|
# GROUP
|
@@ -18,7 +24,7 @@ module Piglet
|
|
18
24
|
# x.group([:a, :b, :c], :parallel => 3) # => GROUP x BY (a, b, c) PARALLEL 3
|
19
25
|
def group(*args)
|
20
26
|
grouping, options = split_at_options(args)
|
21
|
-
Group.new(self, [grouping].flatten, options)
|
27
|
+
Group.new(self, @interpreter, [grouping].flatten, options)
|
22
28
|
end
|
23
29
|
|
24
30
|
# DISTINCT
|
@@ -26,7 +32,7 @@ module Piglet
|
|
26
32
|
# x.distinct # => DISTINCT x
|
27
33
|
# x.distinct(:parallel => 5) # => DISTINCT x PARALLEL 5
|
28
34
|
def distinct(options={})
|
29
|
-
Distinct.new(self, options)
|
35
|
+
Distinct.new(self, @interpreter, options)
|
30
36
|
end
|
31
37
|
|
32
38
|
# COGROUP
|
@@ -37,7 +43,7 @@ module Piglet
|
|
37
43
|
# x.cogroup(x => :a, y => [:b, :inner]) # => COGROUP x BY a, y BY b INNER
|
38
44
|
# x.cogroup(x => :a, y => :b, :parallel => 5) # => COGROUP x BY a, y BY b PARALLEL 5
|
39
45
|
def cogroup(description)
|
40
|
-
Cogroup.new(self, description)
|
46
|
+
Cogroup.new(self, @interpreter, description)
|
41
47
|
end
|
42
48
|
|
43
49
|
# CROSS
|
@@ -47,29 +53,39 @@ module Piglet
|
|
47
53
|
# x.cross([y, z], :parallel => 5) # => CROSS x, y, z, w PARALLEL 5
|
48
54
|
def cross(*args)
|
49
55
|
relations, options = split_at_options(args)
|
50
|
-
Cross.new(([self] + relations).flatten, options)
|
56
|
+
Cross.new(([self] + relations).flatten, @interpreter, options)
|
51
57
|
end
|
52
58
|
|
53
59
|
# FILTER
|
54
60
|
#
|
55
|
-
# x.filter {
|
56
|
-
# x.filter {
|
57
|
-
def filter
|
58
|
-
|
61
|
+
# x.filter { a == b } # => FILTER x BY a == b
|
62
|
+
# x.filter { a > b && c == 3 } # => FILTER x BY a > b AND c == 3
|
63
|
+
def filter(&block)
|
64
|
+
context = BlockContext.new(self, @interpreter)
|
65
|
+
Filter.new(self, @interpreter, context.instance_eval(&block))
|
59
66
|
end
|
60
67
|
|
61
68
|
# FOREACH ... GENERATE
|
62
69
|
#
|
63
|
-
# x.foreach {
|
64
|
-
# x.foreach {
|
65
|
-
# x.foreach {
|
66
|
-
# x.foreach {
|
70
|
+
# x.foreach { a } # => FOREACH x GENERATE a
|
71
|
+
# x.foreach { [a, b] } # => FOREACH x GENERATE a, b
|
72
|
+
# x.foreach { a.max } # => FOREACH x GENERATE MAX(a)
|
73
|
+
# x.foreach { a.avg.as(:b) } # => FOREACH x GENERATE AVG(a) AS b
|
67
74
|
#
|
68
|
-
|
75
|
+
# See #nested_foreach for FOREACH ... { ... GENERATE }
|
76
|
+
def foreach(&block)
|
77
|
+
context = BlockContext.new(self, @interpreter)
|
78
|
+
Foreach.new(self, @interpreter, context.instance_eval(&block))
|
79
|
+
end
|
80
|
+
|
81
|
+
# FOREACH ... { ... GENERATE }
|
82
|
+
#
|
83
|
+
# x.nested_foreach { [a.distinct] } # => FOREACH x { a1 = DISTINCT a; GENERATE a1 }
|
69
84
|
#
|
70
|
-
#
|
71
|
-
def
|
72
|
-
|
85
|
+
# See #foreach for FOREACH ... GENERATE
|
86
|
+
def nested_foreach(&block)
|
87
|
+
context = BlockContext.new(self, @interpreter)
|
88
|
+
NestedForeach.new(self, @interpreter, context.instance_eval(&block))
|
73
89
|
end
|
74
90
|
|
75
91
|
# JOIN
|
@@ -79,14 +95,14 @@ module Piglet
|
|
79
95
|
# x.join(x => :a, y => :b, :using => :replicated) # => JOIN x BY a, y BY b USING "replicated"
|
80
96
|
# x.join(x => :a, y => :b, :parallel => 5) # => JOIN x BY a, y BY b PARALLEL 5
|
81
97
|
def join(description)
|
82
|
-
Join.new(self, description)
|
98
|
+
Join.new(self, @interpreter, description)
|
83
99
|
end
|
84
100
|
|
85
101
|
# LIMIT
|
86
102
|
#
|
87
103
|
# x.limit(10) # => LIMIT x 10
|
88
104
|
def limit(n)
|
89
|
-
Limit.new(self, n)
|
105
|
+
Limit.new(self, @interpreter, n)
|
90
106
|
end
|
91
107
|
|
92
108
|
# ORDER
|
@@ -103,21 +119,22 @@ module Piglet
|
|
103
119
|
def order(*args)
|
104
120
|
fields, options = split_at_options(args)
|
105
121
|
fields = *fields
|
106
|
-
Order.new(self, fields, options)
|
122
|
+
Order.new(self, @interpreter, fields, options)
|
107
123
|
end
|
108
124
|
|
109
125
|
# SAMPLE
|
110
126
|
#
|
111
127
|
# x.sample(5) # => SAMPLE x 5;
|
112
128
|
def sample(n)
|
113
|
-
Sample.new(self, n)
|
129
|
+
Sample.new(self, @interpreter, n)
|
114
130
|
end
|
115
131
|
|
116
132
|
# SPLIT
|
117
133
|
#
|
118
|
-
# y, z = x.split {
|
119
|
-
def split
|
120
|
-
|
134
|
+
# y, z = x.split { [a <= 3, b > 4] } # => SPLIT x INTO y IF a <= 3, z IF a > 4
|
135
|
+
def split(&block)
|
136
|
+
context = BlockContext.new(self, @interpreter)
|
137
|
+
Split.new(self, @interpreter, context.instance_eval(&block)).shards
|
121
138
|
end
|
122
139
|
|
123
140
|
# STREAM
|
@@ -128,7 +145,7 @@ module Piglet
|
|
128
145
|
# x.stream(:cmd, :schema => [%w(a int)]) # => STREAM x THROUGH cmd AS (a:int)
|
129
146
|
def stream(*args)
|
130
147
|
fields, options = split_at_options(args)
|
131
|
-
Stream.new(self, fields, options)
|
148
|
+
Stream.new(self, @interpreter, fields, options)
|
132
149
|
end
|
133
150
|
|
134
151
|
# UNION
|
@@ -136,7 +153,7 @@ module Piglet
|
|
136
153
|
# x.union(y) # => UNION x, y
|
137
154
|
# x.union(y, z) # => UNION x, y, z
|
138
155
|
def union(*relations)
|
139
|
-
Union.new(
|
156
|
+
Union.new(([self] + relations).flatten, @interpreter)
|
140
157
|
end
|
141
158
|
|
142
159
|
def field(name)
|
@@ -183,12 +200,6 @@ module Piglet
|
|
183
200
|
[parameters, nil]
|
184
201
|
end
|
185
202
|
end
|
186
|
-
|
187
|
-
def self.next_alias
|
188
|
-
@counter ||= 0
|
189
|
-
@counter += 1
|
190
|
-
"relation_#{@counter}"
|
191
|
-
end
|
192
203
|
end
|
193
204
|
end
|
194
205
|
end
|
@@ -5,8 +5,8 @@ module Piglet
|
|
5
5
|
class Split # :nodoc:
|
6
6
|
include Relation
|
7
7
|
|
8
|
-
def initialize(relation, expressions)
|
9
|
-
@sources, @expressions = [relation], expressions
|
8
|
+
def initialize(relation, interpreter, expressions)
|
9
|
+
@sources, @interpreter, @expressions = [relation], interpreter, expressions
|
10
10
|
@shard_map = create_shards
|
11
11
|
end
|
12
12
|
|
@@ -26,7 +26,7 @@ module Piglet
|
|
26
26
|
|
27
27
|
def create_shards
|
28
28
|
@expressions.inject({}) do |map, expr|
|
29
|
-
map[expr] = RelationShard.new(self)
|
29
|
+
map[expr] = RelationShard.new(self, @interpreter)
|
30
30
|
map
|
31
31
|
end
|
32
32
|
end
|
@@ -35,8 +35,8 @@ module Piglet
|
|
35
35
|
class RelationShard # :nodoc:
|
36
36
|
include Relation
|
37
37
|
|
38
|
-
def initialize(split)
|
39
|
-
@sources = [split]
|
38
|
+
def initialize(split, interpreter)
|
39
|
+
@sources, @interpreter = [split], interpreter
|
40
40
|
end
|
41
41
|
|
42
42
|
def to_s
|
data/piglet.gemspec
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{piglet}
|
8
|
+
s.version = "0.3.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Theo Hultberg"]
|
12
|
+
s.date = %q{2010-07-09}
|
13
|
+
s.default_executable = %q{piglet}
|
14
|
+
s.description = %q{Piglet aims to look like Pig Latin while allowing for things like loops and control of flow that are missing from Pig.}
|
15
|
+
s.email = %q{theo@iconara.net}
|
16
|
+
s.executables = ["piglet"]
|
17
|
+
s.extra_rdoc_files = [
|
18
|
+
"LICENSE",
|
19
|
+
"README.rdoc"
|
20
|
+
]
|
21
|
+
s.files = [
|
22
|
+
".document",
|
23
|
+
".gitignore",
|
24
|
+
"Gemfile",
|
25
|
+
"Gemfile.lock",
|
26
|
+
"LICENSE",
|
27
|
+
"README.rdoc",
|
28
|
+
"Rakefile",
|
29
|
+
"bin/piglet",
|
30
|
+
"lib/piglet.rb",
|
31
|
+
"lib/piglet/field/binary_conditional.rb",
|
32
|
+
"lib/piglet/field/call_expression.rb",
|
33
|
+
"lib/piglet/field/direct_expression.rb",
|
34
|
+
"lib/piglet/field/field.rb",
|
35
|
+
"lib/piglet/field/infix_expression.rb",
|
36
|
+
"lib/piglet/field/literal.rb",
|
37
|
+
"lib/piglet/field/map_value.rb",
|
38
|
+
"lib/piglet/field/prefix_expression.rb",
|
39
|
+
"lib/piglet/field/reference.rb",
|
40
|
+
"lib/piglet/field/rename.rb",
|
41
|
+
"lib/piglet/field/suffix_expression.rb",
|
42
|
+
"lib/piglet/field/udf_expression.rb",
|
43
|
+
"lib/piglet/inout/describe.rb",
|
44
|
+
"lib/piglet/inout/dump.rb",
|
45
|
+
"lib/piglet/inout/explain.rb",
|
46
|
+
"lib/piglet/inout/illustrate.rb",
|
47
|
+
"lib/piglet/inout/load.rb",
|
48
|
+
"lib/piglet/inout/output.rb",
|
49
|
+
"lib/piglet/inout/storage_types.rb",
|
50
|
+
"lib/piglet/inout/store.rb",
|
51
|
+
"lib/piglet/interpreter.rb",
|
52
|
+
"lib/piglet/param/declare.rb",
|
53
|
+
"lib/piglet/param/default.rb",
|
54
|
+
"lib/piglet/param/parameter_statement.rb",
|
55
|
+
"lib/piglet/relation/block_context.rb",
|
56
|
+
"lib/piglet/relation/cogroup.rb",
|
57
|
+
"lib/piglet/relation/cross.rb",
|
58
|
+
"lib/piglet/relation/distinct.rb",
|
59
|
+
"lib/piglet/relation/filter.rb",
|
60
|
+
"lib/piglet/relation/foreach.rb",
|
61
|
+
"lib/piglet/relation/group.rb",
|
62
|
+
"lib/piglet/relation/join.rb",
|
63
|
+
"lib/piglet/relation/limit.rb",
|
64
|
+
"lib/piglet/relation/nested_foreach.rb",
|
65
|
+
"lib/piglet/relation/order.rb",
|
66
|
+
"lib/piglet/relation/relation.rb",
|
67
|
+
"lib/piglet/relation/sample.rb",
|
68
|
+
"lib/piglet/relation/split.rb",
|
69
|
+
"lib/piglet/relation/stream.rb",
|
70
|
+
"lib/piglet/relation/union.rb",
|
71
|
+
"lib/piglet/schema/bag.rb",
|
72
|
+
"lib/piglet/schema/tuple.rb",
|
73
|
+
"lib/piglet/udf/define.rb",
|
74
|
+
"lib/piglet/udf/register.rb",
|
75
|
+
"piglet.gemspec",
|
76
|
+
"spec/piglet/field/binary_conditional_spec.rb",
|
77
|
+
"spec/piglet/field/field_spec.rb",
|
78
|
+
"spec/piglet/field/infix_expression_spec.rb",
|
79
|
+
"spec/piglet/field/literal_spec.rb",
|
80
|
+
"spec/piglet/field/reference_spec.rb",
|
81
|
+
"spec/piglet/interpreter_spec.rb",
|
82
|
+
"spec/piglet/relation/relation_spec.rb",
|
83
|
+
"spec/piglet/relation/split_spec.rb",
|
84
|
+
"spec/piglet/relation/union_spec.rb",
|
85
|
+
"spec/piglet/schema/tuple_spec.rb",
|
86
|
+
"spec/piglet_spec.rb",
|
87
|
+
"spec/spec.opts",
|
88
|
+
"spec/spec_helper.rb",
|
89
|
+
"tasks/gem.rake",
|
90
|
+
"tasks/rdoc.rake",
|
91
|
+
"tasks/spec.rake"
|
92
|
+
]
|
93
|
+
s.homepage = %q{http://github.com/iconara/piglet}
|
94
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
95
|
+
s.require_paths = ["lib"]
|
96
|
+
s.rubygems_version = %q{1.3.7}
|
97
|
+
s.summary = %q{Piglet is a DSL for Pig scripts}
|
98
|
+
s.test_files = [
|
99
|
+
"spec/piglet/field/binary_conditional_spec.rb",
|
100
|
+
"spec/piglet/field/field_spec.rb",
|
101
|
+
"spec/piglet/field/infix_expression_spec.rb",
|
102
|
+
"spec/piglet/field/literal_spec.rb",
|
103
|
+
"spec/piglet/field/reference_spec.rb",
|
104
|
+
"spec/piglet/interpreter_spec.rb",
|
105
|
+
"spec/piglet/relation/relation_spec.rb",
|
106
|
+
"spec/piglet/relation/split_spec.rb",
|
107
|
+
"spec/piglet/relation/union_spec.rb",
|
108
|
+
"spec/piglet/schema/tuple_spec.rb",
|
109
|
+
"spec/piglet_spec.rb",
|
110
|
+
"spec/spec_helper.rb"
|
111
|
+
]
|
112
|
+
|
113
|
+
if s.respond_to? :specification_version then
|
114
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
115
|
+
s.specification_version = 3
|
116
|
+
|
117
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
118
|
+
s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
|
119
|
+
else
|
120
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
121
|
+
end
|
122
|
+
else
|
123
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
@@ -9,8 +9,9 @@ include Piglet::Field
|
|
9
9
|
describe Field do
|
10
10
|
|
11
11
|
before do
|
12
|
-
@field =
|
13
|
-
@field.extend
|
12
|
+
@field = Object.new
|
13
|
+
@field.extend(Field)
|
14
|
+
@field.stub(:predecessors).and_return([stub(:generate_field_alias => 'xyz')])
|
14
15
|
@expressions = {}
|
15
16
|
[:int, :long, :float, :double, :chararray, :bytearray, :bag, :tuple].each do |type|
|
16
17
|
@expressions[type] = mock("#{type} expression")
|
@@ -19,6 +20,10 @@ describe Field do
|
|
19
20
|
end
|
20
21
|
end
|
21
22
|
|
23
|
+
it 'should have an alias' do
|
24
|
+
@field.field_alias.should_not be_nil
|
25
|
+
end
|
26
|
+
|
22
27
|
describe '#type' do
|
23
28
|
[:==, :ne, :<, :>, :<=, :>=, :and, :or].each do |op|
|
24
29
|
op_str = (op == :ne ? '!=' : op).to_s.upcase
|
@@ -32,7 +32,7 @@ describe Piglet::Interpreter do
|
|
32
32
|
describe '#test' do
|
33
33
|
it 'outputs a binary conditional' do
|
34
34
|
@interpreter.interpret do
|
35
|
-
dump(load('in').foreach {
|
35
|
+
dump(load('in').foreach { [test(self.a == self.b, self.a, self.b)]})
|
36
36
|
end
|
37
37
|
@interpreter.to_pig_latin.should include('(a == b ? a : b)')
|
38
38
|
end
|
@@ -41,35 +41,35 @@ describe Piglet::Interpreter do
|
|
41
41
|
describe '#literal' do
|
42
42
|
it 'outputs a literal string' do
|
43
43
|
@interpreter.interpret do
|
44
|
-
dump(load('in').foreach {
|
44
|
+
dump(load('in').foreach { [literal('hello').as(:world)]})
|
45
45
|
end
|
46
46
|
@interpreter.to_pig_latin.should include("'hello' AS world")
|
47
47
|
end
|
48
48
|
|
49
49
|
it 'outputs a literal integer' do
|
50
50
|
@interpreter.interpret do
|
51
|
-
dump(load('in').foreach {
|
51
|
+
dump(load('in').foreach { [literal(3).as(:n)]})
|
52
52
|
end
|
53
53
|
@interpreter.to_pig_latin.should include("3 AS n")
|
54
54
|
end
|
55
55
|
|
56
56
|
it 'outputs a literal float' do
|
57
57
|
@interpreter.interpret do
|
58
|
-
dump(load('in').foreach {
|
58
|
+
dump(load('in').foreach { [literal(3.14).as(:pi)]})
|
59
59
|
end
|
60
60
|
@interpreter.to_pig_latin.should include("3.14 AS pi")
|
61
61
|
end
|
62
62
|
|
63
63
|
it 'outputs a literal string when passed an arbitrary object' do
|
64
64
|
@interpreter.interpret do
|
65
|
-
dump(load('in').foreach {
|
65
|
+
dump(load('in').foreach { [literal(self).as(:interpreter)]})
|
66
66
|
end
|
67
67
|
@interpreter.to_pig_latin.should match(/'[^']+' AS interpreter/)
|
68
68
|
end
|
69
69
|
|
70
70
|
it 'escapes single quotes' do
|
71
71
|
@interpreter.interpret do
|
72
|
-
dump(load('in').foreach {
|
72
|
+
dump(load('in').foreach { [literal("hello 'world'").as(:str)]})
|
73
73
|
end
|
74
74
|
@interpreter.to_pig_latin.should include("'hello \\'world\\'' AS str")
|
75
75
|
end
|