piglet 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -4
- data/lib/piglet/field/binary_conditional.rb +15 -0
- data/lib/piglet/field/call_expression.rb +21 -0
- data/lib/piglet/field/infix_expression.rb +19 -0
- data/lib/piglet/field/literal.rb +20 -0
- data/lib/piglet/field/operators.rb +80 -0
- data/lib/piglet/field/prefix_expression.rb +23 -0
- data/lib/piglet/field/reference.rb +41 -0
- data/lib/piglet/field/rename.rb +13 -0
- data/lib/piglet/field/suffix_expression.rb +19 -0
- data/lib/piglet/inout/describe.rb +7 -0
- data/lib/piglet/inout/dump.rb +7 -0
- data/lib/piglet/inout/explain.rb +15 -0
- data/lib/piglet/inout/illustrate.rb +7 -0
- data/lib/piglet/inout/load.rb +31 -0
- data/lib/piglet/inout/output.rb +15 -0
- data/lib/piglet/inout/storage_types.rb +18 -0
- data/lib/piglet/inout/store.rb +19 -0
- data/lib/piglet/interpreter.rb +39 -7
- data/lib/piglet/relation/cogroup.rb +33 -0
- data/lib/piglet/relation/cross.rb +24 -0
- data/lib/piglet/relation/distinct.rb +18 -0
- data/lib/piglet/relation/filter.rb +15 -0
- data/lib/piglet/relation/foreach.rb +21 -0
- data/lib/piglet/relation/group.rb +23 -0
- data/lib/piglet/relation/join.rb +22 -0
- data/lib/piglet/relation/limit.rb +15 -0
- data/lib/piglet/relation/order.rb +31 -0
- data/lib/piglet/relation/relation.rb +179 -0
- data/lib/piglet/relation/sample.rb +15 -0
- data/lib/piglet/relation/split.rb +45 -0
- data/lib/piglet/relation/stream.rb +7 -0
- data/lib/piglet/relation/union.rb +21 -0
- data/lib/piglet.rb +40 -38
- data/spec/piglet/{field_spec.rb → field/reference_spec.rb} +22 -6
- data/spec/piglet/interpreter_spec.rb +51 -5
- data/spec/piglet/{relation_spec.rb → relation/relation_spec.rb} +6 -6
- data/spec/piglet/{split_spec.rb → relation/split_spec.rb} +8 -8
- data/spec/spec_helper.rb +0 -2
- metadata +39 -40
- data/examples/spike1.rb +0 -43
- data/examples/spike2.rb +0 -40
- data/lib/piglet/assignment.rb +0 -13
- data/lib/piglet/cogroup.rb +0 -31
- data/lib/piglet/cross.rb +0 -22
- data/lib/piglet/describe.rb +0 -5
- data/lib/piglet/distinct.rb +0 -16
- data/lib/piglet/dump.rb +0 -5
- data/lib/piglet/explain.rb +0 -13
- data/lib/piglet/field.rb +0 -40
- data/lib/piglet/field_expression_functions.rb +0 -62
- data/lib/piglet/field_function_expression.rb +0 -19
- data/lib/piglet/field_infix_expression.rb +0 -17
- data/lib/piglet/field_prefix_expression.rb +0 -21
- data/lib/piglet/field_rename.rb +0 -11
- data/lib/piglet/field_suffix_expression.rb +0 -17
- data/lib/piglet/filter.rb +0 -13
- data/lib/piglet/foreach.rb +0 -19
- data/lib/piglet/group.rb +0 -21
- data/lib/piglet/illustrate.rb +0 -5
- data/lib/piglet/join.rb +0 -20
- data/lib/piglet/limit.rb +0 -13
- data/lib/piglet/load.rb +0 -31
- data/lib/piglet/load_and_store.rb +0 -16
- data/lib/piglet/order.rb +0 -29
- data/lib/piglet/relation.rb +0 -177
- data/lib/piglet/sample.rb +0 -13
- data/lib/piglet/split.rb +0 -41
- data/lib/piglet/store.rb +0 -17
- data/lib/piglet/storing.rb +0 -13
- data/lib/piglet/stream.rb +0 -5
- data/lib/piglet/union.rb +0 -19
data/lib/piglet/relation.rb
DELETED
@@ -1,177 +0,0 @@
|
|
1
|
-
module Piglet
|
2
|
-
module Relation
|
3
|
-
attr_reader :sources
|
4
|
-
|
5
|
-
# The name this relation will get in Pig Latin. Then name is generated when
|
6
|
-
# the relation is outputed by the interpreter, and will be unique.
|
7
|
-
def alias
|
8
|
-
@alias ||= Relation.next_alias
|
9
|
-
end
|
10
|
-
|
11
|
-
# GROUP
|
12
|
-
#
|
13
|
-
# x.group(:a) # => GROUP x By a
|
14
|
-
# x.group(:a, :b, :c) # => GROUP x BY (a, b, c)
|
15
|
-
# x.group([:a, :b, :c], :parallel => 3) # => GROUP x BY (a, b, c) PARALLEL 3
|
16
|
-
def group(*args)
|
17
|
-
grouping, options = split_at_options(args)
|
18
|
-
Group.new(self, [grouping].flatten, options)
|
19
|
-
end
|
20
|
-
|
21
|
-
# DISTINCT
|
22
|
-
#
|
23
|
-
# x.distinct # => DISTINCT x
|
24
|
-
# x.distinct(:parallel => 5) # => DISTINCT x PARALLEL 5
|
25
|
-
def distinct(options={})
|
26
|
-
Distinct.new(self, options)
|
27
|
-
end
|
28
|
-
|
29
|
-
# COGROUP
|
30
|
-
#
|
31
|
-
# x.cogroup(x => :a, y => :b) # => COGROUP x BY a, y BY b
|
32
|
-
# x.cogroup(x => :a, y => :b, z => :c) # => COGROUP x BY a, y BY b, z BY c
|
33
|
-
# x.cogroup(x => [:a, :b], y => [:c, :d]) # => COGROUP x BY (a, b), y BY (c, d)
|
34
|
-
# x.cogroup(x => :a, y => [:b, :inner]) # => COGROUP x BY a, y BY b INNER
|
35
|
-
# x.cogroup(x => :a, y => :b, :parallel => 5) # => COGROUP x BY a, y BY b PARALLEL 5
|
36
|
-
def cogroup(description)
|
37
|
-
Cogroup.new(self, description)
|
38
|
-
end
|
39
|
-
|
40
|
-
# CROSS
|
41
|
-
#
|
42
|
-
# x.cross(y) # => CROSS x, y
|
43
|
-
# x.cross(y, z, w) # => CROSS x, y, z, w
|
44
|
-
# x.cross([y, z], :parallel => 5) # => CROSS x, y, z, w PARALLEL 5
|
45
|
-
def cross(*args)
|
46
|
-
relations, options = split_at_options(args)
|
47
|
-
Cross.new(([self] + relations).flatten, options)
|
48
|
-
end
|
49
|
-
|
50
|
-
# FILTER
|
51
|
-
#
|
52
|
-
# x.filter { |r| r.a == r.b } # => FILTER x BY a == b
|
53
|
-
# x.filter { |r| r.a > r.b && r.c != 3 } # => FILTER x BY a > b AND c != 3
|
54
|
-
def filter
|
55
|
-
Filter.new(self, yield(self))
|
56
|
-
end
|
57
|
-
|
58
|
-
# FOREACH ... GENERATE
|
59
|
-
#
|
60
|
-
# x.foreach { |r| r.a } # => FOREACH x GENERATE a
|
61
|
-
# x.foreach { |r| [r.a, r.b] } # => FOREACH x GENERATE a, b
|
62
|
-
# x.foreach { |r| r.a.max } # => FOREACH x GENERATE MAX(a)
|
63
|
-
# x.foreach { |r| r.a.avg.as(:b) } # => FOREACH x GENERATE AVG(a) AS b
|
64
|
-
#
|
65
|
-
#--
|
66
|
-
#
|
67
|
-
# TODO: FOREACH a { b GENERATE c }
|
68
|
-
def foreach
|
69
|
-
Foreach.new(self, yield(self))
|
70
|
-
end
|
71
|
-
|
72
|
-
# JOIN
|
73
|
-
#
|
74
|
-
# x.join(x => :a, y => :b) # => JOIN x BY a, y BY b
|
75
|
-
# x.join(x => :a, y => :b, z => :c) # => JOIN x BY a, y BY b, z BY c
|
76
|
-
# x.join(x => :a, y => :b, :using => :replicated) # => JOIN x BY a, y BY b USING "replicated"
|
77
|
-
# x.join(x => :a, y => :b, :parallel => 5) # => JOIN x BY a, y BY b PARALLEL 5
|
78
|
-
def join(description)
|
79
|
-
Join.new(self, description)
|
80
|
-
end
|
81
|
-
|
82
|
-
# LIMIT
|
83
|
-
#
|
84
|
-
# x.limit(10) # => LIMIT x 10
|
85
|
-
def limit(n)
|
86
|
-
Limit.new(self, n)
|
87
|
-
end
|
88
|
-
|
89
|
-
# ORDER
|
90
|
-
#
|
91
|
-
# x.order(:a) # => ORDER x BY a
|
92
|
-
# x.order(:a, :b) # => ORDER x BY a, b
|
93
|
-
# x.order([:a, :asc], [:b, :desc]) # => ORDER x BY a ASC, b DESC
|
94
|
-
# x.order(:a, :parallel => 5) # => ORDER x BY a PARALLEL 5
|
95
|
-
#
|
96
|
-
#--
|
97
|
-
#
|
98
|
-
# NOTE: the syntax x.order(:a => :asc, :b => :desc) would be nice, but in
|
99
|
-
# Ruby 1.8 the order of the keys cannot be guaranteed.
|
100
|
-
def order(*args)
|
101
|
-
fields, options = split_at_options(args)
|
102
|
-
fields = *fields
|
103
|
-
Order.new(self, fields, options)
|
104
|
-
end
|
105
|
-
|
106
|
-
# SAMPLE
|
107
|
-
#
|
108
|
-
# x.sample(5) # => SAMPLE x 5;
|
109
|
-
def sample(n)
|
110
|
-
Sample.new(self, n)
|
111
|
-
end
|
112
|
-
|
113
|
-
# SPLIT
|
114
|
-
#
|
115
|
-
# y, z = x.split { |r| [r.a <= 3, r.b > 4]} # => SPLIT x INTO y IF a <= 3, z IF a > 4
|
116
|
-
def split
|
117
|
-
Split.new(self, yield(self)).shards
|
118
|
-
end
|
119
|
-
|
120
|
-
# STREAM
|
121
|
-
#
|
122
|
-
# x.stream(x, 'cut -f 3') # => STREAM x THROUGH `cut -f 3`
|
123
|
-
# x.stream([x, y], 'cut -f 3') # => STREAM x, y THROUGH `cut -f 3`
|
124
|
-
# x.stream(x, 'cut -f 3', :schema => [%w(a int)]) # => STREAM x THROUGH `cut -f 3` AS (a:int)
|
125
|
-
#
|
126
|
-
#--
|
127
|
-
#
|
128
|
-
# TODO: how to handle DEFINE'd commands?
|
129
|
-
def stream(relations, command, options={})
|
130
|
-
raise NotSupportedError
|
131
|
-
end
|
132
|
-
|
133
|
-
# UNION
|
134
|
-
#
|
135
|
-
# x.union(y) # => UNION x, y
|
136
|
-
# x.union(y, z) # => UNION x, y, z
|
137
|
-
def union(*relations)
|
138
|
-
Union.new(*([self] + relations))
|
139
|
-
end
|
140
|
-
|
141
|
-
def method_missing(name, *args)
|
142
|
-
if name.to_s =~ /^\w+$/ && args.empty?
|
143
|
-
Field.new(name, self)
|
144
|
-
else
|
145
|
-
super
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
def [](n)
|
150
|
-
Field.new("\$#{n}", self)
|
151
|
-
end
|
152
|
-
|
153
|
-
def hash
|
154
|
-
self.alias.hash
|
155
|
-
end
|
156
|
-
|
157
|
-
def eql?(other)
|
158
|
-
other.is_a?(Relation) && other.alias == self.alias
|
159
|
-
end
|
160
|
-
|
161
|
-
private
|
162
|
-
|
163
|
-
def split_at_options(parameters)
|
164
|
-
if parameters.last.is_a? Hash
|
165
|
-
[parameters[0..-2], parameters.last]
|
166
|
-
else
|
167
|
-
[parameters, nil]
|
168
|
-
end
|
169
|
-
end
|
170
|
-
|
171
|
-
def self.next_alias
|
172
|
-
@counter ||= 0
|
173
|
-
@counter += 1
|
174
|
-
"relation_#{@counter}"
|
175
|
-
end
|
176
|
-
end
|
177
|
-
end
|
data/lib/piglet/sample.rb
DELETED
data/lib/piglet/split.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
module Piglet
|
2
|
-
class Split # :nodoc:
|
3
|
-
include Relation
|
4
|
-
|
5
|
-
|
6
|
-
def initialize(relation, expressions)
|
7
|
-
@sources, @expressions = [relation], expressions
|
8
|
-
@shard_map = create_shards
|
9
|
-
end
|
10
|
-
|
11
|
-
def shards
|
12
|
-
@shard_map.keys
|
13
|
-
end
|
14
|
-
|
15
|
-
def to_s
|
16
|
-
"SPLIT #{@sources.first.alias} INTO #{split_strings}"
|
17
|
-
end
|
18
|
-
|
19
|
-
private
|
20
|
-
|
21
|
-
def create_shards
|
22
|
-
Hash[*@expressions.map { |expr| [RelationShard.new(self), expr] }.flatten]
|
23
|
-
end
|
24
|
-
|
25
|
-
def split_strings
|
26
|
-
shards.map { |relation| "#{relation.alias} IF #{@shard_map[relation]}" }.join(', ')
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
class RelationShard # :nodoc:
|
31
|
-
include Relation
|
32
|
-
|
33
|
-
def initialize(split)
|
34
|
-
@sources = [split]
|
35
|
-
end
|
36
|
-
|
37
|
-
def to_s
|
38
|
-
self.alias
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
data/lib/piglet/store.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
module Piglet
|
2
|
-
class Store # :nodoc:
|
3
|
-
include LoadAndStore
|
4
|
-
include Storing
|
5
|
-
|
6
|
-
def initialize(relation, path, options={})
|
7
|
-
@relation, @path, @using = relation, path, options[:using]
|
8
|
-
end
|
9
|
-
|
10
|
-
def to_s
|
11
|
-
str = super
|
12
|
-
str << " INTO '#{@path}'"
|
13
|
-
str << " USING #{resolve_load_store_function(@using)}" if @using
|
14
|
-
str
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
data/lib/piglet/storing.rb
DELETED
data/lib/piglet/stream.rb
DELETED
data/lib/piglet/union.rb
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
module Piglet
|
2
|
-
class Union # :nodoc:
|
3
|
-
include Relation
|
4
|
-
|
5
|
-
def initialize(*relations)
|
6
|
-
@sources = relations
|
7
|
-
end
|
8
|
-
|
9
|
-
def to_s
|
10
|
-
"UNION #{source_aliases.join(', ')}"
|
11
|
-
end
|
12
|
-
|
13
|
-
private
|
14
|
-
|
15
|
-
def source_aliases
|
16
|
-
@sources.map { |s| s.alias }
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|