piglet 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -4
- data/lib/piglet/field/binary_conditional.rb +15 -0
- data/lib/piglet/field/call_expression.rb +21 -0
- data/lib/piglet/field/infix_expression.rb +19 -0
- data/lib/piglet/field/literal.rb +20 -0
- data/lib/piglet/field/operators.rb +80 -0
- data/lib/piglet/field/prefix_expression.rb +23 -0
- data/lib/piglet/field/reference.rb +41 -0
- data/lib/piglet/field/rename.rb +13 -0
- data/lib/piglet/field/suffix_expression.rb +19 -0
- data/lib/piglet/inout/describe.rb +7 -0
- data/lib/piglet/inout/dump.rb +7 -0
- data/lib/piglet/inout/explain.rb +15 -0
- data/lib/piglet/inout/illustrate.rb +7 -0
- data/lib/piglet/inout/load.rb +31 -0
- data/lib/piglet/inout/output.rb +15 -0
- data/lib/piglet/inout/storage_types.rb +18 -0
- data/lib/piglet/inout/store.rb +19 -0
- data/lib/piglet/interpreter.rb +39 -7
- data/lib/piglet/relation/cogroup.rb +33 -0
- data/lib/piglet/relation/cross.rb +24 -0
- data/lib/piglet/relation/distinct.rb +18 -0
- data/lib/piglet/relation/filter.rb +15 -0
- data/lib/piglet/relation/foreach.rb +21 -0
- data/lib/piglet/relation/group.rb +23 -0
- data/lib/piglet/relation/join.rb +22 -0
- data/lib/piglet/relation/limit.rb +15 -0
- data/lib/piglet/relation/order.rb +31 -0
- data/lib/piglet/relation/relation.rb +179 -0
- data/lib/piglet/relation/sample.rb +15 -0
- data/lib/piglet/relation/split.rb +45 -0
- data/lib/piglet/relation/stream.rb +7 -0
- data/lib/piglet/relation/union.rb +21 -0
- data/lib/piglet.rb +40 -38
- data/spec/piglet/{field_spec.rb → field/reference_spec.rb} +22 -6
- data/spec/piglet/interpreter_spec.rb +51 -5
- data/spec/piglet/{relation_spec.rb → relation/relation_spec.rb} +6 -6
- data/spec/piglet/{split_spec.rb → relation/split_spec.rb} +8 -8
- data/spec/spec_helper.rb +0 -2
- metadata +39 -40
- data/examples/spike1.rb +0 -43
- data/examples/spike2.rb +0 -40
- data/lib/piglet/assignment.rb +0 -13
- data/lib/piglet/cogroup.rb +0 -31
- data/lib/piglet/cross.rb +0 -22
- data/lib/piglet/describe.rb +0 -5
- data/lib/piglet/distinct.rb +0 -16
- data/lib/piglet/dump.rb +0 -5
- data/lib/piglet/explain.rb +0 -13
- data/lib/piglet/field.rb +0 -40
- data/lib/piglet/field_expression_functions.rb +0 -62
- data/lib/piglet/field_function_expression.rb +0 -19
- data/lib/piglet/field_infix_expression.rb +0 -17
- data/lib/piglet/field_prefix_expression.rb +0 -21
- data/lib/piglet/field_rename.rb +0 -11
- data/lib/piglet/field_suffix_expression.rb +0 -17
- data/lib/piglet/filter.rb +0 -13
- data/lib/piglet/foreach.rb +0 -19
- data/lib/piglet/group.rb +0 -21
- data/lib/piglet/illustrate.rb +0 -5
- data/lib/piglet/join.rb +0 -20
- data/lib/piglet/limit.rb +0 -13
- data/lib/piglet/load.rb +0 -31
- data/lib/piglet/load_and_store.rb +0 -16
- data/lib/piglet/order.rb +0 -29
- data/lib/piglet/relation.rb +0 -177
- data/lib/piglet/sample.rb +0 -13
- data/lib/piglet/split.rb +0 -41
- data/lib/piglet/store.rb +0 -17
- data/lib/piglet/storing.rb +0 -13
- data/lib/piglet/stream.rb +0 -5
- data/lib/piglet/union.rb +0 -19
data/lib/piglet/relation.rb
DELETED
@@ -1,177 +0,0 @@
|
|
1
|
-
module Piglet
|
2
|
-
module Relation
|
3
|
-
attr_reader :sources
|
4
|
-
|
5
|
-
# The name this relation will get in Pig Latin. Then name is generated when
|
6
|
-
# the relation is outputed by the interpreter, and will be unique.
|
7
|
-
def alias
|
8
|
-
@alias ||= Relation.next_alias
|
9
|
-
end
|
10
|
-
|
11
|
-
# GROUP
|
12
|
-
#
|
13
|
-
# x.group(:a) # => GROUP x By a
|
14
|
-
# x.group(:a, :b, :c) # => GROUP x BY (a, b, c)
|
15
|
-
# x.group([:a, :b, :c], :parallel => 3) # => GROUP x BY (a, b, c) PARALLEL 3
|
16
|
-
def group(*args)
|
17
|
-
grouping, options = split_at_options(args)
|
18
|
-
Group.new(self, [grouping].flatten, options)
|
19
|
-
end
|
20
|
-
|
21
|
-
# DISTINCT
|
22
|
-
#
|
23
|
-
# x.distinct # => DISTINCT x
|
24
|
-
# x.distinct(:parallel => 5) # => DISTINCT x PARALLEL 5
|
25
|
-
def distinct(options={})
|
26
|
-
Distinct.new(self, options)
|
27
|
-
end
|
28
|
-
|
29
|
-
# COGROUP
|
30
|
-
#
|
31
|
-
# x.cogroup(x => :a, y => :b) # => COGROUP x BY a, y BY b
|
32
|
-
# x.cogroup(x => :a, y => :b, z => :c) # => COGROUP x BY a, y BY b, z BY c
|
33
|
-
# x.cogroup(x => [:a, :b], y => [:c, :d]) # => COGROUP x BY (a, b), y BY (c, d)
|
34
|
-
# x.cogroup(x => :a, y => [:b, :inner]) # => COGROUP x BY a, y BY b INNER
|
35
|
-
# x.cogroup(x => :a, y => :b, :parallel => 5) # => COGROUP x BY a, y BY b PARALLEL 5
|
36
|
-
def cogroup(description)
|
37
|
-
Cogroup.new(self, description)
|
38
|
-
end
|
39
|
-
|
40
|
-
# CROSS
|
41
|
-
#
|
42
|
-
# x.cross(y) # => CROSS x, y
|
43
|
-
# x.cross(y, z, w) # => CROSS x, y, z, w
|
44
|
-
# x.cross([y, z], :parallel => 5) # => CROSS x, y, z, w PARALLEL 5
|
45
|
-
def cross(*args)
|
46
|
-
relations, options = split_at_options(args)
|
47
|
-
Cross.new(([self] + relations).flatten, options)
|
48
|
-
end
|
49
|
-
|
50
|
-
# FILTER
|
51
|
-
#
|
52
|
-
# x.filter { |r| r.a == r.b } # => FILTER x BY a == b
|
53
|
-
# x.filter { |r| r.a > r.b && r.c != 3 } # => FILTER x BY a > b AND c != 3
|
54
|
-
def filter
|
55
|
-
Filter.new(self, yield(self))
|
56
|
-
end
|
57
|
-
|
58
|
-
# FOREACH ... GENERATE
|
59
|
-
#
|
60
|
-
# x.foreach { |r| r.a } # => FOREACH x GENERATE a
|
61
|
-
# x.foreach { |r| [r.a, r.b] } # => FOREACH x GENERATE a, b
|
62
|
-
# x.foreach { |r| r.a.max } # => FOREACH x GENERATE MAX(a)
|
63
|
-
# x.foreach { |r| r.a.avg.as(:b) } # => FOREACH x GENERATE AVG(a) AS b
|
64
|
-
#
|
65
|
-
#--
|
66
|
-
#
|
67
|
-
# TODO: FOREACH a { b GENERATE c }
|
68
|
-
def foreach
|
69
|
-
Foreach.new(self, yield(self))
|
70
|
-
end
|
71
|
-
|
72
|
-
# JOIN
|
73
|
-
#
|
74
|
-
# x.join(x => :a, y => :b) # => JOIN x BY a, y BY b
|
75
|
-
# x.join(x => :a, y => :b, z => :c) # => JOIN x BY a, y BY b, z BY c
|
76
|
-
# x.join(x => :a, y => :b, :using => :replicated) # => JOIN x BY a, y BY b USING "replicated"
|
77
|
-
# x.join(x => :a, y => :b, :parallel => 5) # => JOIN x BY a, y BY b PARALLEL 5
|
78
|
-
def join(description)
|
79
|
-
Join.new(self, description)
|
80
|
-
end
|
81
|
-
|
82
|
-
# LIMIT
|
83
|
-
#
|
84
|
-
# x.limit(10) # => LIMIT x 10
|
85
|
-
def limit(n)
|
86
|
-
Limit.new(self, n)
|
87
|
-
end
|
88
|
-
|
89
|
-
# ORDER
|
90
|
-
#
|
91
|
-
# x.order(:a) # => ORDER x BY a
|
92
|
-
# x.order(:a, :b) # => ORDER x BY a, b
|
93
|
-
# x.order([:a, :asc], [:b, :desc]) # => ORDER x BY a ASC, b DESC
|
94
|
-
# x.order(:a, :parallel => 5) # => ORDER x BY a PARALLEL 5
|
95
|
-
#
|
96
|
-
#--
|
97
|
-
#
|
98
|
-
# NOTE: the syntax x.order(:a => :asc, :b => :desc) would be nice, but in
|
99
|
-
# Ruby 1.8 the order of the keys cannot be guaranteed.
|
100
|
-
def order(*args)
|
101
|
-
fields, options = split_at_options(args)
|
102
|
-
fields = *fields
|
103
|
-
Order.new(self, fields, options)
|
104
|
-
end
|
105
|
-
|
106
|
-
# SAMPLE
|
107
|
-
#
|
108
|
-
# x.sample(5) # => SAMPLE x 5;
|
109
|
-
def sample(n)
|
110
|
-
Sample.new(self, n)
|
111
|
-
end
|
112
|
-
|
113
|
-
# SPLIT
|
114
|
-
#
|
115
|
-
# y, z = x.split { |r| [r.a <= 3, r.b > 4]} # => SPLIT x INTO y IF a <= 3, z IF a > 4
|
116
|
-
def split
|
117
|
-
Split.new(self, yield(self)).shards
|
118
|
-
end
|
119
|
-
|
120
|
-
# STREAM
|
121
|
-
#
|
122
|
-
# x.stream(x, 'cut -f 3') # => STREAM x THROUGH `cut -f 3`
|
123
|
-
# x.stream([x, y], 'cut -f 3') # => STREAM x, y THROUGH `cut -f 3`
|
124
|
-
# x.stream(x, 'cut -f 3', :schema => [%w(a int)]) # => STREAM x THROUGH `cut -f 3` AS (a:int)
|
125
|
-
#
|
126
|
-
#--
|
127
|
-
#
|
128
|
-
# TODO: how to handle DEFINE'd commands?
|
129
|
-
def stream(relations, command, options={})
|
130
|
-
raise NotSupportedError
|
131
|
-
end
|
132
|
-
|
133
|
-
# UNION
|
134
|
-
#
|
135
|
-
# x.union(y) # => UNION x, y
|
136
|
-
# x.union(y, z) # => UNION x, y, z
|
137
|
-
def union(*relations)
|
138
|
-
Union.new(*([self] + relations))
|
139
|
-
end
|
140
|
-
|
141
|
-
def method_missing(name, *args)
|
142
|
-
if name.to_s =~ /^\w+$/ && args.empty?
|
143
|
-
Field.new(name, self)
|
144
|
-
else
|
145
|
-
super
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
def [](n)
|
150
|
-
Field.new("\$#{n}", self)
|
151
|
-
end
|
152
|
-
|
153
|
-
def hash
|
154
|
-
self.alias.hash
|
155
|
-
end
|
156
|
-
|
157
|
-
def eql?(other)
|
158
|
-
other.is_a?(Relation) && other.alias == self.alias
|
159
|
-
end
|
160
|
-
|
161
|
-
private
|
162
|
-
|
163
|
-
def split_at_options(parameters)
|
164
|
-
if parameters.last.is_a? Hash
|
165
|
-
[parameters[0..-2], parameters.last]
|
166
|
-
else
|
167
|
-
[parameters, nil]
|
168
|
-
end
|
169
|
-
end
|
170
|
-
|
171
|
-
def self.next_alias
|
172
|
-
@counter ||= 0
|
173
|
-
@counter += 1
|
174
|
-
"relation_#{@counter}"
|
175
|
-
end
|
176
|
-
end
|
177
|
-
end
|
data/lib/piglet/sample.rb
DELETED
data/lib/piglet/split.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
module Piglet
|
2
|
-
class Split # :nodoc:
|
3
|
-
include Relation
|
4
|
-
|
5
|
-
|
6
|
-
def initialize(relation, expressions)
|
7
|
-
@sources, @expressions = [relation], expressions
|
8
|
-
@shard_map = create_shards
|
9
|
-
end
|
10
|
-
|
11
|
-
def shards
|
12
|
-
@shard_map.keys
|
13
|
-
end
|
14
|
-
|
15
|
-
def to_s
|
16
|
-
"SPLIT #{@sources.first.alias} INTO #{split_strings}"
|
17
|
-
end
|
18
|
-
|
19
|
-
private
|
20
|
-
|
21
|
-
def create_shards
|
22
|
-
Hash[*@expressions.map { |expr| [RelationShard.new(self), expr] }.flatten]
|
23
|
-
end
|
24
|
-
|
25
|
-
def split_strings
|
26
|
-
shards.map { |relation| "#{relation.alias} IF #{@shard_map[relation]}" }.join(', ')
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
class RelationShard # :nodoc:
|
31
|
-
include Relation
|
32
|
-
|
33
|
-
def initialize(split)
|
34
|
-
@sources = [split]
|
35
|
-
end
|
36
|
-
|
37
|
-
def to_s
|
38
|
-
self.alias
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
data/lib/piglet/store.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
module Piglet
|
2
|
-
class Store # :nodoc:
|
3
|
-
include LoadAndStore
|
4
|
-
include Storing
|
5
|
-
|
6
|
-
def initialize(relation, path, options={})
|
7
|
-
@relation, @path, @using = relation, path, options[:using]
|
8
|
-
end
|
9
|
-
|
10
|
-
def to_s
|
11
|
-
str = super
|
12
|
-
str << " INTO '#{@path}'"
|
13
|
-
str << " USING #{resolve_load_store_function(@using)}" if @using
|
14
|
-
str
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
data/lib/piglet/storing.rb
DELETED
data/lib/piglet/stream.rb
DELETED
data/lib/piglet/union.rb
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
module Piglet
|
2
|
-
class Union # :nodoc:
|
3
|
-
include Relation
|
4
|
-
|
5
|
-
def initialize(*relations)
|
6
|
-
@sources = relations
|
7
|
-
end
|
8
|
-
|
9
|
-
def to_s
|
10
|
-
"UNION #{source_aliases.join(', ')}"
|
11
|
-
end
|
12
|
-
|
13
|
-
private
|
14
|
-
|
15
|
-
def source_aliases
|
16
|
-
@sources.map { |s| s.alias }
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|