piglet 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. data/README.rdoc +24 -4
  2. data/lib/piglet/field/binary_conditional.rb +15 -0
  3. data/lib/piglet/field/call_expression.rb +21 -0
  4. data/lib/piglet/field/infix_expression.rb +19 -0
  5. data/lib/piglet/field/literal.rb +20 -0
  6. data/lib/piglet/field/operators.rb +80 -0
  7. data/lib/piglet/field/prefix_expression.rb +23 -0
  8. data/lib/piglet/field/reference.rb +41 -0
  9. data/lib/piglet/field/rename.rb +13 -0
  10. data/lib/piglet/field/suffix_expression.rb +19 -0
  11. data/lib/piglet/inout/describe.rb +7 -0
  12. data/lib/piglet/inout/dump.rb +7 -0
  13. data/lib/piglet/inout/explain.rb +15 -0
  14. data/lib/piglet/inout/illustrate.rb +7 -0
  15. data/lib/piglet/inout/load.rb +31 -0
  16. data/lib/piglet/inout/output.rb +15 -0
  17. data/lib/piglet/inout/storage_types.rb +18 -0
  18. data/lib/piglet/inout/store.rb +19 -0
  19. data/lib/piglet/interpreter.rb +39 -7
  20. data/lib/piglet/relation/cogroup.rb +33 -0
  21. data/lib/piglet/relation/cross.rb +24 -0
  22. data/lib/piglet/relation/distinct.rb +18 -0
  23. data/lib/piglet/relation/filter.rb +15 -0
  24. data/lib/piglet/relation/foreach.rb +21 -0
  25. data/lib/piglet/relation/group.rb +23 -0
  26. data/lib/piglet/relation/join.rb +22 -0
  27. data/lib/piglet/relation/limit.rb +15 -0
  28. data/lib/piglet/relation/order.rb +31 -0
  29. data/lib/piglet/relation/relation.rb +179 -0
  30. data/lib/piglet/relation/sample.rb +15 -0
  31. data/lib/piglet/relation/split.rb +45 -0
  32. data/lib/piglet/relation/stream.rb +7 -0
  33. data/lib/piglet/relation/union.rb +21 -0
  34. data/lib/piglet.rb +40 -38
  35. data/spec/piglet/{field_spec.rb → field/reference_spec.rb} +22 -6
  36. data/spec/piglet/interpreter_spec.rb +51 -5
  37. data/spec/piglet/{relation_spec.rb → relation/relation_spec.rb} +6 -6
  38. data/spec/piglet/{split_spec.rb → relation/split_spec.rb} +8 -8
  39. data/spec/spec_helper.rb +0 -2
  40. metadata +39 -40
  41. data/examples/spike1.rb +0 -43
  42. data/examples/spike2.rb +0 -40
  43. data/lib/piglet/assignment.rb +0 -13
  44. data/lib/piglet/cogroup.rb +0 -31
  45. data/lib/piglet/cross.rb +0 -22
  46. data/lib/piglet/describe.rb +0 -5
  47. data/lib/piglet/distinct.rb +0 -16
  48. data/lib/piglet/dump.rb +0 -5
  49. data/lib/piglet/explain.rb +0 -13
  50. data/lib/piglet/field.rb +0 -40
  51. data/lib/piglet/field_expression_functions.rb +0 -62
  52. data/lib/piglet/field_function_expression.rb +0 -19
  53. data/lib/piglet/field_infix_expression.rb +0 -17
  54. data/lib/piglet/field_prefix_expression.rb +0 -21
  55. data/lib/piglet/field_rename.rb +0 -11
  56. data/lib/piglet/field_suffix_expression.rb +0 -17
  57. data/lib/piglet/filter.rb +0 -13
  58. data/lib/piglet/foreach.rb +0 -19
  59. data/lib/piglet/group.rb +0 -21
  60. data/lib/piglet/illustrate.rb +0 -5
  61. data/lib/piglet/join.rb +0 -20
  62. data/lib/piglet/limit.rb +0 -13
  63. data/lib/piglet/load.rb +0 -31
  64. data/lib/piglet/load_and_store.rb +0 -16
  65. data/lib/piglet/order.rb +0 -29
  66. data/lib/piglet/relation.rb +0 -177
  67. data/lib/piglet/sample.rb +0 -13
  68. data/lib/piglet/split.rb +0 -41
  69. data/lib/piglet/store.rb +0 -17
  70. data/lib/piglet/storing.rb +0 -13
  71. data/lib/piglet/stream.rb +0 -5
  72. data/lib/piglet/union.rb +0 -19
@@ -1,177 +0,0 @@
1
- module Piglet
2
- module Relation
3
- attr_reader :sources
4
-
5
- # The name this relation will get in Pig Latin. Then name is generated when
6
- # the relation is outputed by the interpreter, and will be unique.
7
- def alias
8
- @alias ||= Relation.next_alias
9
- end
10
-
11
- # GROUP
12
- #
13
- # x.group(:a) # => GROUP x By a
14
- # x.group(:a, :b, :c) # => GROUP x BY (a, b, c)
15
- # x.group([:a, :b, :c], :parallel => 3) # => GROUP x BY (a, b, c) PARALLEL 3
16
- def group(*args)
17
- grouping, options = split_at_options(args)
18
- Group.new(self, [grouping].flatten, options)
19
- end
20
-
21
- # DISTINCT
22
- #
23
- # x.distinct # => DISTINCT x
24
- # x.distinct(:parallel => 5) # => DISTINCT x PARALLEL 5
25
- def distinct(options={})
26
- Distinct.new(self, options)
27
- end
28
-
29
- # COGROUP
30
- #
31
- # x.cogroup(x => :a, y => :b) # => COGROUP x BY a, y BY b
32
- # x.cogroup(x => :a, y => :b, z => :c) # => COGROUP x BY a, y BY b, z BY c
33
- # x.cogroup(x => [:a, :b], y => [:c, :d]) # => COGROUP x BY (a, b), y BY (c, d)
34
- # x.cogroup(x => :a, y => [:b, :inner]) # => COGROUP x BY a, y BY b INNER
35
- # x.cogroup(x => :a, y => :b, :parallel => 5) # => COGROUP x BY a, y BY b PARALLEL 5
36
- def cogroup(description)
37
- Cogroup.new(self, description)
38
- end
39
-
40
- # CROSS
41
- #
42
- # x.cross(y) # => CROSS x, y
43
- # x.cross(y, z, w) # => CROSS x, y, z, w
44
- # x.cross([y, z], :parallel => 5) # => CROSS x, y, z, w PARALLEL 5
45
- def cross(*args)
46
- relations, options = split_at_options(args)
47
- Cross.new(([self] + relations).flatten, options)
48
- end
49
-
50
- # FILTER
51
- #
52
- # x.filter { |r| r.a == r.b } # => FILTER x BY a == b
53
- # x.filter { |r| r.a > r.b && r.c != 3 } # => FILTER x BY a > b AND c != 3
54
- def filter
55
- Filter.new(self, yield(self))
56
- end
57
-
58
- # FOREACH ... GENERATE
59
- #
60
- # x.foreach { |r| r.a } # => FOREACH x GENERATE a
61
- # x.foreach { |r| [r.a, r.b] } # => FOREACH x GENERATE a, b
62
- # x.foreach { |r| r.a.max } # => FOREACH x GENERATE MAX(a)
63
- # x.foreach { |r| r.a.avg.as(:b) } # => FOREACH x GENERATE AVG(a) AS b
64
- #
65
- #--
66
- #
67
- # TODO: FOREACH a { b GENERATE c }
68
- def foreach
69
- Foreach.new(self, yield(self))
70
- end
71
-
72
- # JOIN
73
- #
74
- # x.join(x => :a, y => :b) # => JOIN x BY a, y BY b
75
- # x.join(x => :a, y => :b, z => :c) # => JOIN x BY a, y BY b, z BY c
76
- # x.join(x => :a, y => :b, :using => :replicated) # => JOIN x BY a, y BY b USING "replicated"
77
- # x.join(x => :a, y => :b, :parallel => 5) # => JOIN x BY a, y BY b PARALLEL 5
78
- def join(description)
79
- Join.new(self, description)
80
- end
81
-
82
- # LIMIT
83
- #
84
- # x.limit(10) # => LIMIT x 10
85
- def limit(n)
86
- Limit.new(self, n)
87
- end
88
-
89
- # ORDER
90
- #
91
- # x.order(:a) # => ORDER x BY a
92
- # x.order(:a, :b) # => ORDER x BY a, b
93
- # x.order([:a, :asc], [:b, :desc]) # => ORDER x BY a ASC, b DESC
94
- # x.order(:a, :parallel => 5) # => ORDER x BY a PARALLEL 5
95
- #
96
- #--
97
- #
98
- # NOTE: the syntax x.order(:a => :asc, :b => :desc) would be nice, but in
99
- # Ruby 1.8 the order of the keys cannot be guaranteed.
100
- def order(*args)
101
- fields, options = split_at_options(args)
102
- fields = *fields
103
- Order.new(self, fields, options)
104
- end
105
-
106
- # SAMPLE
107
- #
108
- # x.sample(5) # => SAMPLE x 5;
109
- def sample(n)
110
- Sample.new(self, n)
111
- end
112
-
113
- # SPLIT
114
- #
115
- # y, z = x.split { |r| [r.a <= 3, r.b > 4]} # => SPLIT x INTO y IF a <= 3, z IF a > 4
116
- def split
117
- Split.new(self, yield(self)).shards
118
- end
119
-
120
- # STREAM
121
- #
122
- # x.stream(x, 'cut -f 3') # => STREAM x THROUGH `cut -f 3`
123
- # x.stream([x, y], 'cut -f 3') # => STREAM x, y THROUGH `cut -f 3`
124
- # x.stream(x, 'cut -f 3', :schema => [%w(a int)]) # => STREAM x THROUGH `cut -f 3` AS (a:int)
125
- #
126
- #--
127
- #
128
- # TODO: how to handle DEFINE'd commands?
129
- def stream(relations, command, options={})
130
- raise NotSupportedError
131
- end
132
-
133
- # UNION
134
- #
135
- # x.union(y) # => UNION x, y
136
- # x.union(y, z) # => UNION x, y, z
137
- def union(*relations)
138
- Union.new(*([self] + relations))
139
- end
140
-
141
- def method_missing(name, *args)
142
- if name.to_s =~ /^\w+$/ && args.empty?
143
- Field.new(name, self)
144
- else
145
- super
146
- end
147
- end
148
-
149
- def [](n)
150
- Field.new("\$#{n}", self)
151
- end
152
-
153
- def hash
154
- self.alias.hash
155
- end
156
-
157
- def eql?(other)
158
- other.is_a?(Relation) && other.alias == self.alias
159
- end
160
-
161
- private
162
-
163
- def split_at_options(parameters)
164
- if parameters.last.is_a? Hash
165
- [parameters[0..-2], parameters.last]
166
- else
167
- [parameters, nil]
168
- end
169
- end
170
-
171
- def self.next_alias
172
- @counter ||= 0
173
- @counter += 1
174
- "relation_#{@counter}"
175
- end
176
- end
177
- end
data/lib/piglet/sample.rb DELETED
@@ -1,13 +0,0 @@
1
- module Piglet
2
- class Sample # :nodoc:
3
- include Relation
4
-
5
- def initialize(relation, n)
6
- @sources, @n = [relation], n
7
- end
8
-
9
- def to_s
10
- "SAMPLE #{@sources.first.alias} #{@n}"
11
- end
12
- end
13
- end
data/lib/piglet/split.rb DELETED
@@ -1,41 +0,0 @@
1
- module Piglet
2
- class Split # :nodoc:
3
- include Relation
4
-
5
-
6
- def initialize(relation, expressions)
7
- @sources, @expressions = [relation], expressions
8
- @shard_map = create_shards
9
- end
10
-
11
- def shards
12
- @shard_map.keys
13
- end
14
-
15
- def to_s
16
- "SPLIT #{@sources.first.alias} INTO #{split_strings}"
17
- end
18
-
19
- private
20
-
21
- def create_shards
22
- Hash[*@expressions.map { |expr| [RelationShard.new(self), expr] }.flatten]
23
- end
24
-
25
- def split_strings
26
- shards.map { |relation| "#{relation.alias} IF #{@shard_map[relation]}" }.join(', ')
27
- end
28
- end
29
-
30
- class RelationShard # :nodoc:
31
- include Relation
32
-
33
- def initialize(split)
34
- @sources = [split]
35
- end
36
-
37
- def to_s
38
- self.alias
39
- end
40
- end
41
- end
data/lib/piglet/store.rb DELETED
@@ -1,17 +0,0 @@
1
- module Piglet
2
- class Store # :nodoc:
3
- include LoadAndStore
4
- include Storing
5
-
6
- def initialize(relation, path, options={})
7
- @relation, @path, @using = relation, path, options[:using]
8
- end
9
-
10
- def to_s
11
- str = super
12
- str << " INTO '#{@path}'"
13
- str << " USING #{resolve_load_store_function(@using)}" if @using
14
- str
15
- end
16
- end
17
- end
@@ -1,13 +0,0 @@
1
- module Piglet
2
- module Storing # :nodoc:
3
- attr_reader :relation
4
-
5
- def initialize(relation)
6
- @relation = relation
7
- end
8
-
9
- def to_s
10
- "#{self.class.name.split(/::/).last.upcase} #{@relation.alias}"
11
- end
12
- end
13
- end
data/lib/piglet/stream.rb DELETED
@@ -1,5 +0,0 @@
1
- module Piglet
2
- class Stream # :nodoc:
3
- include Relation
4
- end
5
- end
data/lib/piglet/union.rb DELETED
@@ -1,19 +0,0 @@
1
- module Piglet
2
- class Union # :nodoc:
3
- include Relation
4
-
5
- def initialize(*relations)
6
- @sources = relations
7
- end
8
-
9
- def to_s
10
- "UNION #{source_aliases.join(', ')}"
11
- end
12
-
13
- private
14
-
15
- def source_aliases
16
- @sources.map { |s| s.alias }
17
- end
18
- end
19
- end