piglet 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. data/README.rdoc +24 -4
  2. data/lib/piglet/field/binary_conditional.rb +15 -0
  3. data/lib/piglet/field/call_expression.rb +21 -0
  4. data/lib/piglet/field/infix_expression.rb +19 -0
  5. data/lib/piglet/field/literal.rb +20 -0
  6. data/lib/piglet/field/operators.rb +80 -0
  7. data/lib/piglet/field/prefix_expression.rb +23 -0
  8. data/lib/piglet/field/reference.rb +41 -0
  9. data/lib/piglet/field/rename.rb +13 -0
  10. data/lib/piglet/field/suffix_expression.rb +19 -0
  11. data/lib/piglet/inout/describe.rb +7 -0
  12. data/lib/piglet/inout/dump.rb +7 -0
  13. data/lib/piglet/inout/explain.rb +15 -0
  14. data/lib/piglet/inout/illustrate.rb +7 -0
  15. data/lib/piglet/inout/load.rb +31 -0
  16. data/lib/piglet/inout/output.rb +15 -0
  17. data/lib/piglet/inout/storage_types.rb +18 -0
  18. data/lib/piglet/inout/store.rb +19 -0
  19. data/lib/piglet/interpreter.rb +39 -7
  20. data/lib/piglet/relation/cogroup.rb +33 -0
  21. data/lib/piglet/relation/cross.rb +24 -0
  22. data/lib/piglet/relation/distinct.rb +18 -0
  23. data/lib/piglet/relation/filter.rb +15 -0
  24. data/lib/piglet/relation/foreach.rb +21 -0
  25. data/lib/piglet/relation/group.rb +23 -0
  26. data/lib/piglet/relation/join.rb +22 -0
  27. data/lib/piglet/relation/limit.rb +15 -0
  28. data/lib/piglet/relation/order.rb +31 -0
  29. data/lib/piglet/relation/relation.rb +179 -0
  30. data/lib/piglet/relation/sample.rb +15 -0
  31. data/lib/piglet/relation/split.rb +45 -0
  32. data/lib/piglet/relation/stream.rb +7 -0
  33. data/lib/piglet/relation/union.rb +21 -0
  34. data/lib/piglet.rb +40 -38
  35. data/spec/piglet/{field_spec.rb → field/reference_spec.rb} +22 -6
  36. data/spec/piglet/interpreter_spec.rb +51 -5
  37. data/spec/piglet/{relation_spec.rb → relation/relation_spec.rb} +6 -6
  38. data/spec/piglet/{split_spec.rb → relation/split_spec.rb} +8 -8
  39. data/spec/spec_helper.rb +0 -2
  40. metadata +39 -40
  41. data/examples/spike1.rb +0 -43
  42. data/examples/spike2.rb +0 -40
  43. data/lib/piglet/assignment.rb +0 -13
  44. data/lib/piglet/cogroup.rb +0 -31
  45. data/lib/piglet/cross.rb +0 -22
  46. data/lib/piglet/describe.rb +0 -5
  47. data/lib/piglet/distinct.rb +0 -16
  48. data/lib/piglet/dump.rb +0 -5
  49. data/lib/piglet/explain.rb +0 -13
  50. data/lib/piglet/field.rb +0 -40
  51. data/lib/piglet/field_expression_functions.rb +0 -62
  52. data/lib/piglet/field_function_expression.rb +0 -19
  53. data/lib/piglet/field_infix_expression.rb +0 -17
  54. data/lib/piglet/field_prefix_expression.rb +0 -21
  55. data/lib/piglet/field_rename.rb +0 -11
  56. data/lib/piglet/field_suffix_expression.rb +0 -17
  57. data/lib/piglet/filter.rb +0 -13
  58. data/lib/piglet/foreach.rb +0 -19
  59. data/lib/piglet/group.rb +0 -21
  60. data/lib/piglet/illustrate.rb +0 -5
  61. data/lib/piglet/join.rb +0 -20
  62. data/lib/piglet/limit.rb +0 -13
  63. data/lib/piglet/load.rb +0 -31
  64. data/lib/piglet/load_and_store.rb +0 -16
  65. data/lib/piglet/order.rb +0 -29
  66. data/lib/piglet/relation.rb +0 -177
  67. data/lib/piglet/sample.rb +0 -13
  68. data/lib/piglet/split.rb +0 -41
  69. data/lib/piglet/store.rb +0 -17
  70. data/lib/piglet/storing.rb +0 -13
  71. data/lib/piglet/stream.rb +0 -5
  72. data/lib/piglet/union.rb +0 -19
@@ -1,177 +0,0 @@
1
- module Piglet
2
- module Relation
3
- attr_reader :sources
4
-
5
- # The name this relation will get in Pig Latin. Then name is generated when
6
- # the relation is outputed by the interpreter, and will be unique.
7
- def alias
8
- @alias ||= Relation.next_alias
9
- end
10
-
11
- # GROUP
12
- #
13
- # x.group(:a) # => GROUP x By a
14
- # x.group(:a, :b, :c) # => GROUP x BY (a, b, c)
15
- # x.group([:a, :b, :c], :parallel => 3) # => GROUP x BY (a, b, c) PARALLEL 3
16
- def group(*args)
17
- grouping, options = split_at_options(args)
18
- Group.new(self, [grouping].flatten, options)
19
- end
20
-
21
- # DISTINCT
22
- #
23
- # x.distinct # => DISTINCT x
24
- # x.distinct(:parallel => 5) # => DISTINCT x PARALLEL 5
25
- def distinct(options={})
26
- Distinct.new(self, options)
27
- end
28
-
29
- # COGROUP
30
- #
31
- # x.cogroup(x => :a, y => :b) # => COGROUP x BY a, y BY b
32
- # x.cogroup(x => :a, y => :b, z => :c) # => COGROUP x BY a, y BY b, z BY c
33
- # x.cogroup(x => [:a, :b], y => [:c, :d]) # => COGROUP x BY (a, b), y BY (c, d)
34
- # x.cogroup(x => :a, y => [:b, :inner]) # => COGROUP x BY a, y BY b INNER
35
- # x.cogroup(x => :a, y => :b, :parallel => 5) # => COGROUP x BY a, y BY b PARALLEL 5
36
- def cogroup(description)
37
- Cogroup.new(self, description)
38
- end
39
-
40
- # CROSS
41
- #
42
- # x.cross(y) # => CROSS x, y
43
- # x.cross(y, z, w) # => CROSS x, y, z, w
44
- # x.cross([y, z], :parallel => 5) # => CROSS x, y, z, w PARALLEL 5
45
- def cross(*args)
46
- relations, options = split_at_options(args)
47
- Cross.new(([self] + relations).flatten, options)
48
- end
49
-
50
- # FILTER
51
- #
52
- # x.filter { |r| r.a == r.b } # => FILTER x BY a == b
53
- # x.filter { |r| r.a > r.b && r.c != 3 } # => FILTER x BY a > b AND c != 3
54
- def filter
55
- Filter.new(self, yield(self))
56
- end
57
-
58
- # FOREACH ... GENERATE
59
- #
60
- # x.foreach { |r| r.a } # => FOREACH x GENERATE a
61
- # x.foreach { |r| [r.a, r.b] } # => FOREACH x GENERATE a, b
62
- # x.foreach { |r| r.a.max } # => FOREACH x GENERATE MAX(a)
63
- # x.foreach { |r| r.a.avg.as(:b) } # => FOREACH x GENERATE AVG(a) AS b
64
- #
65
- #--
66
- #
67
- # TODO: FOREACH a { b GENERATE c }
68
- def foreach
69
- Foreach.new(self, yield(self))
70
- end
71
-
72
- # JOIN
73
- #
74
- # x.join(x => :a, y => :b) # => JOIN x BY a, y BY b
75
- # x.join(x => :a, y => :b, z => :c) # => JOIN x BY a, y BY b, z BY c
76
- # x.join(x => :a, y => :b, :using => :replicated) # => JOIN x BY a, y BY b USING "replicated"
77
- # x.join(x => :a, y => :b, :parallel => 5) # => JOIN x BY a, y BY b PARALLEL 5
78
- def join(description)
79
- Join.new(self, description)
80
- end
81
-
82
- # LIMIT
83
- #
84
- # x.limit(10) # => LIMIT x 10
85
- def limit(n)
86
- Limit.new(self, n)
87
- end
88
-
89
- # ORDER
90
- #
91
- # x.order(:a) # => ORDER x BY a
92
- # x.order(:a, :b) # => ORDER x BY a, b
93
- # x.order([:a, :asc], [:b, :desc]) # => ORDER x BY a ASC, b DESC
94
- # x.order(:a, :parallel => 5) # => ORDER x BY a PARALLEL 5
95
- #
96
- #--
97
- #
98
- # NOTE: the syntax x.order(:a => :asc, :b => :desc) would be nice, but in
99
- # Ruby 1.8 the order of the keys cannot be guaranteed.
100
- def order(*args)
101
- fields, options = split_at_options(args)
102
- fields = *fields
103
- Order.new(self, fields, options)
104
- end
105
-
106
- # SAMPLE
107
- #
108
- # x.sample(5) # => SAMPLE x 5;
109
- def sample(n)
110
- Sample.new(self, n)
111
- end
112
-
113
- # SPLIT
114
- #
115
- # y, z = x.split { |r| [r.a <= 3, r.b > 4]} # => SPLIT x INTO y IF a <= 3, z IF a > 4
116
- def split
117
- Split.new(self, yield(self)).shards
118
- end
119
-
120
- # STREAM
121
- #
122
- # x.stream(x, 'cut -f 3') # => STREAM x THROUGH `cut -f 3`
123
- # x.stream([x, y], 'cut -f 3') # => STREAM x, y THROUGH `cut -f 3`
124
- # x.stream(x, 'cut -f 3', :schema => [%w(a int)]) # => STREAM x THROUGH `cut -f 3` AS (a:int)
125
- #
126
- #--
127
- #
128
- # TODO: how to handle DEFINE'd commands?
129
- def stream(relations, command, options={})
130
- raise NotSupportedError
131
- end
132
-
133
- # UNION
134
- #
135
- # x.union(y) # => UNION x, y
136
- # x.union(y, z) # => UNION x, y, z
137
- def union(*relations)
138
- Union.new(*([self] + relations))
139
- end
140
-
141
- def method_missing(name, *args)
142
- if name.to_s =~ /^\w+$/ && args.empty?
143
- Field.new(name, self)
144
- else
145
- super
146
- end
147
- end
148
-
149
- def [](n)
150
- Field.new("\$#{n}", self)
151
- end
152
-
153
- def hash
154
- self.alias.hash
155
- end
156
-
157
- def eql?(other)
158
- other.is_a?(Relation) && other.alias == self.alias
159
- end
160
-
161
- private
162
-
163
- def split_at_options(parameters)
164
- if parameters.last.is_a? Hash
165
- [parameters[0..-2], parameters.last]
166
- else
167
- [parameters, nil]
168
- end
169
- end
170
-
171
- def self.next_alias
172
- @counter ||= 0
173
- @counter += 1
174
- "relation_#{@counter}"
175
- end
176
- end
177
- end
data/lib/piglet/sample.rb DELETED
@@ -1,13 +0,0 @@
1
- module Piglet
2
- class Sample # :nodoc:
3
- include Relation
4
-
5
- def initialize(relation, n)
6
- @sources, @n = [relation], n
7
- end
8
-
9
- def to_s
10
- "SAMPLE #{@sources.first.alias} #{@n}"
11
- end
12
- end
13
- end
data/lib/piglet/split.rb DELETED
@@ -1,41 +0,0 @@
1
- module Piglet
2
- class Split # :nodoc:
3
- include Relation
4
-
5
-
6
- def initialize(relation, expressions)
7
- @sources, @expressions = [relation], expressions
8
- @shard_map = create_shards
9
- end
10
-
11
- def shards
12
- @shard_map.keys
13
- end
14
-
15
- def to_s
16
- "SPLIT #{@sources.first.alias} INTO #{split_strings}"
17
- end
18
-
19
- private
20
-
21
- def create_shards
22
- Hash[*@expressions.map { |expr| [RelationShard.new(self), expr] }.flatten]
23
- end
24
-
25
- def split_strings
26
- shards.map { |relation| "#{relation.alias} IF #{@shard_map[relation]}" }.join(', ')
27
- end
28
- end
29
-
30
- class RelationShard # :nodoc:
31
- include Relation
32
-
33
- def initialize(split)
34
- @sources = [split]
35
- end
36
-
37
- def to_s
38
- self.alias
39
- end
40
- end
41
- end
data/lib/piglet/store.rb DELETED
@@ -1,17 +0,0 @@
1
- module Piglet
2
- class Store # :nodoc:
3
- include LoadAndStore
4
- include Storing
5
-
6
- def initialize(relation, path, options={})
7
- @relation, @path, @using = relation, path, options[:using]
8
- end
9
-
10
- def to_s
11
- str = super
12
- str << " INTO '#{@path}'"
13
- str << " USING #{resolve_load_store_function(@using)}" if @using
14
- str
15
- end
16
- end
17
- end
@@ -1,13 +0,0 @@
1
- module Piglet
2
- module Storing # :nodoc:
3
- attr_reader :relation
4
-
5
- def initialize(relation)
6
- @relation = relation
7
- end
8
-
9
- def to_s
10
- "#{self.class.name.split(/::/).last.upcase} #{@relation.alias}"
11
- end
12
- end
13
- end
data/lib/piglet/stream.rb DELETED
@@ -1,5 +0,0 @@
1
- module Piglet
2
- class Stream # :nodoc:
3
- include Relation
4
- end
5
- end
data/lib/piglet/union.rb DELETED
@@ -1,19 +0,0 @@
1
- module Piglet
2
- class Union # :nodoc:
3
- include Relation
4
-
5
- def initialize(*relations)
6
- @sources = relations
7
- end
8
-
9
- def to_s
10
- "UNION #{source_aliases.join(', ')}"
11
- end
12
-
13
- private
14
-
15
- def source_aliases
16
- @sources.map { |s| s.alias }
17
- end
18
- end
19
- end