piglet 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/.document +5 -0
  2. data/.gitignore +22 -0
  3. data/LICENSE +20 -0
  4. data/README.rdoc +293 -0
  5. data/Rakefile +50 -0
  6. data/bin/piglet +9 -0
  7. data/examples/analysis.rb +311 -0
  8. data/examples/scratch.rb +11 -0
  9. data/examples/spike1.rb +43 -0
  10. data/examples/spike2.rb +40 -0
  11. data/examples/test1.rb +3 -0
  12. data/examples/test2.rb +5 -0
  13. data/examples/test3.rb +4 -0
  14. data/lib/piglet/assignment.rb +13 -0
  15. data/lib/piglet/cogroup.rb +31 -0
  16. data/lib/piglet/cross.rb +22 -0
  17. data/lib/piglet/describe.rb +5 -0
  18. data/lib/piglet/distinct.rb +16 -0
  19. data/lib/piglet/dump.rb +5 -0
  20. data/lib/piglet/explain.rb +13 -0
  21. data/lib/piglet/field.rb +40 -0
  22. data/lib/piglet/field_expression_functions.rb +62 -0
  23. data/lib/piglet/field_function_expression.rb +19 -0
  24. data/lib/piglet/field_infix_expression.rb +17 -0
  25. data/lib/piglet/field_prefix_expression.rb +21 -0
  26. data/lib/piglet/field_rename.rb +11 -0
  27. data/lib/piglet/field_suffix_expression.rb +17 -0
  28. data/lib/piglet/filter.rb +13 -0
  29. data/lib/piglet/foreach.rb +19 -0
  30. data/lib/piglet/group.rb +21 -0
  31. data/lib/piglet/illustrate.rb +5 -0
  32. data/lib/piglet/interpreter.rb +108 -0
  33. data/lib/piglet/join.rb +20 -0
  34. data/lib/piglet/limit.rb +13 -0
  35. data/lib/piglet/load.rb +31 -0
  36. data/lib/piglet/load_and_store.rb +16 -0
  37. data/lib/piglet/order.rb +29 -0
  38. data/lib/piglet/relation.rb +177 -0
  39. data/lib/piglet/sample.rb +13 -0
  40. data/lib/piglet/split.rb +41 -0
  41. data/lib/piglet/store.rb +17 -0
  42. data/lib/piglet/storing.rb +13 -0
  43. data/lib/piglet/stream.rb +5 -0
  44. data/lib/piglet/union.rb +19 -0
  45. data/lib/piglet.rb +45 -0
  46. data/spec/piglet/field_spec.rb +130 -0
  47. data/spec/piglet/interpreter_spec.rb +413 -0
  48. data/spec/piglet/relation_spec.rb +79 -0
  49. data/spec/piglet/split_spec.rb +34 -0
  50. data/spec/piglet_spec.rb +7 -0
  51. data/spec/spec.opts +3 -0
  52. data/spec/spec_helper.rb +14 -0
  53. metadata +123 -0
@@ -0,0 +1,311 @@
1
+ # raw_sessions =
2
+ # LOAD '$INPUT/sessions*'
3
+ # USING PigStorage AS (
4
+ # date:chararray,
5
+ # api_key:chararray,
6
+ # ad_id:chararray,
7
+ # user_id:chararray,
8
+ # site:chararray,
9
+ # size:chararray,
10
+ # name:chararray,
11
+ # destination:chararray,
12
+ # indeterminate_visibility:int,
13
+ # impression:int,
14
+ # engagement:int,
15
+ # click_thru:int,
16
+ # extra:int,
17
+ # session_time:int,
18
+ # visible_time:int,
19
+ # engagement_time:int
20
+ # );
21
+ raw_sessions = load('$INPUT/sessions*', :schema => [
22
+ [:date :chararray],
23
+ [:api_key :chararray],
24
+ [:ad_id :chararray],
25
+ [:user_id :chararray],
26
+ [:site :chararray],
27
+ [:size :chararray],
28
+ [:name :chararray],
29
+ [:destination :chararray],
30
+ [:indeterminate_visibility :int],
31
+ [:impression :int],
32
+ [:engagement :int],
33
+ [:click_thru :int],
34
+ [:extra :int],
35
+ [:session_time :int],
36
+ [:visible_time :int],
37
+ [:engagement_time :int]
38
+ ])
39
+
40
+ # raw_actions =
41
+ # LOAD '$INPUT/actions*'
42
+ # USING PigStorage AS (
43
+ # date:chararray,
44
+ # api_key:chararray,
45
+ # ad_id:chararray,
46
+ # user_id:chararray,
47
+ # action:chararray,
48
+ # site:chararray,
49
+ # size:chararray,
50
+ # name:chararray,
51
+ # destination:chararray,
52
+ # extra:int
53
+ # );
54
+ raw_actions = load('$INPUT/actions*', :schema =>
55
+ [:date :chararray],
56
+ [:api_key :chararray],
57
+ [:ad_id :chararray],
58
+ [:user_id :chararray],
59
+ [:action :chararray],
60
+ [:site :chararray],
61
+ [:size :chararray],
62
+ [:name :chararray],
63
+ [:destination :chararray],
64
+ [:extra :int]
65
+ )
66
+
67
+ #sessions = FILTER raw_sessions BY date is not null;
68
+ sessions = raw_sessions.filter { |r| r.date.not_null? }
69
+
70
+ #actions = FILTER raw_actions BY date is not null;
71
+ actions = raw_actions.filter { |r| r.date.not_null? }
72
+
73
+ # /*
74
+ # * Modify each session and action based on whether or not it's an extra session
75
+ # * (a session that was logged only because it was a click thru). Extra sessions
76
+ # * should affect only the total number of click thrus, not the number of
77
+ # * exposures, impressions, etc. nor the durations. By setting these values to
78
+ # * zero and introducing a field for whether or not the session was an exposure
79
+ # * (zero for extra sessions, one for all other), the calculations below can
80
+ # * filter out extra sessions without too much work.
81
+ # */
82
+ # sessions =
83
+ # FOREACH
84
+ # sessions
85
+ # GENERATE
86
+ # date,
87
+ # api_key,
88
+ # ad_id,
89
+ # user_id,
90
+ # site,
91
+ # size,
92
+ # name,
93
+ # destination,
94
+ # (extra == 1 ? 0 : indeterminate_visibility) AS indeterminate_visibility,
95
+ # (extra == 1 ? 0 : 1) AS exposure,
96
+ # (extra == 1 ? 0 : impression) AS impression,
97
+ # (extra == 1 ? 0 : engagement) AS engagement,
98
+ # click_thru,
99
+ # (extra == 1 ? 0 : session_time) AS session_time,
100
+ # (extra == 1 ? 0 : visible_time) AS visible_time,
101
+ # (extra == 1 ? 0 : engagement_time) AS engagement_time;
102
+ sessions = sessions.foreach do |r|
103
+ [
104
+ r.date,
105
+ r.api_key,
106
+ r.ad_id,
107
+ r.user_id,
108
+ r.site,
109
+ r.size,
110
+ r.name,
111
+ r.destination,
112
+ r.test(r.extra == 1, 0, r.indeterminate_visibility).as(:indeterminate_visibility),
113
+ r.test(r.extra == 1, 0, 1).as(:exposure),
114
+ r.test(r.extra == 1, 0, r.impression).as(:impression),
115
+ r.test(r.extra == 1, 0, r.engagement).as(:engagement),
116
+ r.click_thru,
117
+ r.test(r.extra == 1, 0, r.session_time).as(:session_time),
118
+ r.test(r.extra == 1, 0, r.visible_time).as(:visible_time),
119
+ r.test(r.extra == 1, 0, r.engagement_time).as(:engagement_time)
120
+ ]
121
+ end
122
+
123
+ # actions =
124
+ # FOREACH
125
+ # actions
126
+ # GENERATE
127
+ # date,
128
+ # api_key,
129
+ # ad_id,
130
+ # user_id,
131
+ # action,
132
+ # site,
133
+ # size,
134
+ # name,
135
+ # destination,
136
+ # (extra == 1 ? 0 : 1) AS exposure;
137
+ actions = actions.foreach do |r|
138
+ [
139
+ r.date,
140
+ r.api_key,
141
+ r.ad_id,
142
+ r.user_id,
143
+ r.action,
144
+ r.site,
145
+ r.size,
146
+ r.name,
147
+ r.destination,
148
+ r.test(r.extra == 1, 0, 1).as(:exposure)
149
+ ]
150
+ end
151
+
152
+ %w(all site size name).each do |name|
153
+ # session_category_<%= name %> =
154
+ # FOREACH
155
+ # (GROUP sessions BY (date, ad_id, api_key, <%= name == 'all' ? "'all'" : name %>) PARALLEL $PARALLELISM)
156
+ # GENERATE
157
+ # $0.date AS date,
158
+ # $0.ad_id AS ad_id,
159
+ # $0.api_key AS api_key,
160
+ # '<%= name %>' AS category,
161
+ # <%= name == 'all' ? "'all'" : "$0.#{name}" %> AS segment,
162
+ # SUM($1.exposure) AS exposures,
163
+ # SUM($1.impression) AS impressions,
164
+ # SUM($1.engagement) AS engagements,
165
+ # SUM($1.click_thru) AS click_thrus,
166
+ # SUM($1.indeterminate_visibility) AS indeterminate_visibility,
167
+ # SUM($1.session_time) AS session_time,
168
+ # SUM($1.visible_time) AS visible_time,
169
+ # SUM($1.engagement_time) AS engagement_time;
170
+ session_category = sessions.group(:date, :ad_id, :api_key, (name == 'all' ? 'all' : name), :parallel => '$PARALLELISM')
171
+ session_category = session_category.foreach do |r|
172
+ [
173
+ r[0].date.as(:date),
174
+ r[0].ad_id.as(:ad_id),
175
+ r[0].api_key.as(:api_key),
176
+ name.as(:category),
177
+ (name == 'all' ? "'all'" : r[0].name).as(:segment),
178
+ r[1].sum.as(:exposure),
179
+ r[1].sum.as(:impression),
180
+ r[1].sum.as(:engagement),
181
+ r[1].sum.as(:click_thru),
182
+ r[1].sum.as(:indeterminate_visibility),
183
+ r[1].sum.as(:session_time),
184
+ r[1].sum.as(:visible_time),
185
+ r[1].sum.as(:engagement_time)
186
+ ]
187
+ end
188
+
189
+ # session_category_<%= name %>_by_user_id =
190
+ # FOREACH
191
+ # (GROUP sessions BY (date, ad_id, api_key, user_id, <%= name == 'all' ? "'all'" : name %>) PARALLEL $PARALLELISM)
192
+ # GENERATE
193
+ # $0.date AS date,
194
+ # $0.ad_id AS ad_id,
195
+ # $0.api_key AS api_key,
196
+ # '<%= name %>' AS category,
197
+ # <%= name == 'all' ? "'all'" : "$0.#{name}" %> AS segment,
198
+ # 1 AS exposures,
199
+ # MAX($1.impression) AS impressions,
200
+ # MAX($1.engagement) AS engagements,
201
+ # MAX($1.click_thru) AS click_thrus;
202
+ session_category_by_user_id = sessions.group(:date, :ad_id, :api_key, (name == 'all' ? 'all' : name), :parallel => '$PARALLELISM')
203
+ session_category_by_user_id = session_category_by_user_id.foreach do |r|
204
+ r[0].date.as(:date),
205
+ r[0].ad_id.as(:ad_id),
206
+ r[0].api_key.as(:api_key),
207
+ name.as(:category),
208
+ (name == 'all' ? "'all'" : r[0].name).as(:segment)
209
+ 1.as(:exposures),
210
+ r[1].impression.max.as(:impressions),
211
+ r[1].engagement.max.as(:engagements),
212
+ r[1].click_thru.max.as(:click_thrus)
213
+ end
214
+
215
+ # unique_session_category_<%= name %> =
216
+ # FOREACH
217
+ # (GROUP session_category_<%= name %>_by_user_id BY (date, ad_id, api_key, category, segment) PARALLEL $PARALLELISM)
218
+ # GENERATE
219
+ # $0.date AS date,
220
+ # $0.ad_id AS ad_id,
221
+ # $0.api_key AS api_key,
222
+ # $0.category,
223
+ # $0.segment,
224
+ # COUNT($1.ad_id) AS unique_exposures,
225
+ # SUM($1.impressions) AS unique_impressions,
226
+ # SUM($1.engagements) AS unique_engagements,
227
+ # SUM($1.click_thrus) AS unique_click_thrus;
228
+ #
229
+ # action_category_<%= name %> =
230
+ # FOREACH
231
+ # (GROUP actions BY (date, ad_id, api_key, action, <%= name == 'all' ? "'all'" : name %>) PARALLEL $PARALLELISM)
232
+ # GENERATE
233
+ # $0.date AS date,
234
+ # $0.ad_id AS ad_id,
235
+ # $0.api_key AS api_key,
236
+ # $0.action AS action,
237
+ # '<%= name %>' AS category,
238
+ # <%= name == 'all' ? "'all'" : "$0.#{name}" %> AS segment,
239
+ # SUM($1.exposure) AS engagements;
240
+ #
241
+ # action_category_<%= name %>_by_user_id =
242
+ # FOREACH
243
+ # (GROUP actions BY (date, ad_id, api_key, action, user_id, <%= name == 'all' ? "'all'" : name %>) PARALLEL $PARALLELISM)
244
+ # GENERATE
245
+ # $0.date AS date,
246
+ # $0.ad_id AS ad_id,
247
+ # $0.api_key AS api_key,
248
+ # $0.action AS action,
249
+ # '<%= name %>' AS category,
250
+ # <%= name == 'all' ? "'all'" : "$0.#{name}" %> AS segment,
251
+ # 1 AS exposures,
252
+ # 1 AS engagements;
253
+ #
254
+ # unique_action_category_<%= name %> =
255
+ # FOREACH
256
+ # (GROUP action_category_<%= name %>_by_user_id BY (date, ad_id, api_key, action, category, segment) PARALLEL $PARALLELISM)
257
+ # GENERATE
258
+ # $0.date AS date,
259
+ # $0.ad_id AS ad_id,
260
+ # $0.api_key AS api_key,
261
+ # $0.action AS action,
262
+ # $0.category,
263
+ # $0.segment,
264
+ # SUM($1.engagements) AS unique_engagements;
265
+ end
266
+
267
+ -- unions ----------------------------------------------------------------------
268
+ -- -----------------------------------------------------------------------------
269
+
270
+ <% if @categories.size > 1 -%>
271
+ report_metrics =
272
+ UNION
273
+ <%= @categories.map { |name| "session_category_#{name}" }.join(",\n ") %>;
274
+ <% else -%>
275
+ report_metrics = FILTER session_category_<%= @categories.first %> BY 1 == 1;
276
+ <% end -%>
277
+
278
+ <% if @categories.size > 1 -%>
279
+ unique_report_metrics =
280
+ UNION
281
+ <%= @categories.map { |name| "unique_session_category_#{name}" }.join(",\n ") %>;
282
+ <% else -%>
283
+ unique_report_metrics = FILTER unique_session_category_<%= @categories.first %> BY 1 == 1;
284
+ <% end -%>
285
+ <% if @categories.size > 1 -%>
286
+ report_action_metrics =
287
+ UNION
288
+ <%= @categories.map { |name| "action_category_#{name}" }.join(",\n ") %>;
289
+ <% else -%>
290
+ report_action_metrics = FILTER action_category_<%= @categories.first %> BY 1 == 1;
291
+ <% end -%>
292
+ <% if @categories.size > 1 -%>
293
+ unique_report_action_metrics =
294
+ UNION
295
+ <%= @categories.map { |name| "unique_action_category_#{name}" }.join(",\n ") %>;
296
+ <% else -%>
297
+ unique_report_action_metrics = FILTER unique_action_category_<%= @categories.first %> BY 1 == 1;
298
+ <% end %>
299
+
300
+ -- complete output -------------------------------------------------------------
301
+ -- -----------------------------------------------------------------------------
302
+
303
+
304
+ <% %w(report_metrics unique_report_metrics report_action_metrics unique_report_action_metrics).each do |relation| -%>
305
+ <%= relation %> = FILTER <%= relation %> BY date is not null AND date != '' AND api_key is not null AND api_key != '';
306
+ <% end -%>
307
+
308
+ STORE report_metrics INTO '$OUTPUT/report_metrics' USING PigStorage;
309
+ STORE unique_report_metrics INTO '$OUTPUT/unique_report_metrics' USING PigStorage;
310
+ STORE report_action_metrics INTO '$OUTPUT/report_action_metrics' USING PigStorage;
311
+ STORE unique_report_action_metrics INTO '$OUTPUT/unique_report_action_metrics' USING PigStorage;
@@ -0,0 +1,11 @@
1
+ module Piglet::Relation
2
+ def samples(*sizes)
3
+ sizes.map { |s| sample(s) }
4
+ end
5
+ end
6
+
7
+ input = load('input', :schema => %w(country browser site visit_duration))
8
+ a, b, c = input.samples(0.1, 0.2, 0.3)
9
+ store(a, 'output1')
10
+ store(b, 'output2')
11
+ store(c, 'output3')
@@ -0,0 +1,43 @@
1
+ # raw_ads =
2
+ # LOAD '$INPUT/ads*'
3
+ # USING PigStorage AS (
4
+ # ad_id:chararray,
5
+ # api_key:chararray,
6
+ # name:chararray,
7
+ # dimensions:chararray,
8
+ # destination:chararray,
9
+ # agent_version:chararray
10
+ # );
11
+ raw_ads << load('$INPUT/ads*').using(:pig_storage).as(
12
+ [:ad_id, :chararray],
13
+ [:api_key, :chararray],
14
+ [:name, :chararray],
15
+ [:dimensions, :chararray],
16
+ [:destination, :chararray],
17
+ [:agent_version, :chararray]
18
+ )
19
+
20
+ # ads =
21
+ # FOREACH
22
+ # (GROUP raw_ads BY ad_id PARALLEL $PARALLELISM)
23
+ # GENERATE
24
+ # $0 AS ad_id,
25
+ # MAX($1.api_key) AS api_key,
26
+ # MAX($1.name) AS name,
27
+ # MAX($1.dimensions) AS dimensions,
28
+ # MAX($1.destination) AS destination,
29
+ # MAX($1.agent_version) AS agent_version
30
+ # ;
31
+ ads << (raw_ads.group(:ad_id)).foreach do |relation|
32
+ [
33
+ relation[0].as(:ad_id),
34
+ relation[1].api_key.max.as(:api_key),
35
+ relation[1].name.max.as(:name),
36
+ relation[1].dimensions.max.as(:dimensions),
37
+ relation[1].destination.max.as(:destination),
38
+ relation[1].agent_version.max.as(:agent_version)
39
+ ]
40
+ end
41
+
42
+ # STORE ads INTO '$OUTPUT/ads' USING PigStorage;
43
+ ads.store('$OUTPUT/ads').using(:pig_storage)
@@ -0,0 +1,40 @@
1
+ # raw_ads =
2
+ # LOAD '$INPUT/ads*'
3
+ # USING PigStorage AS (
4
+ # ad_id:chararray,
5
+ # api_key:chararray,
6
+ # name:chararray,
7
+ # dimensions:chararray,
8
+ # destination:chararray,
9
+ # agent_version:chararray
10
+ # );
11
+ raw_ads = load(
12
+ '$INPUT/ads*',
13
+ :using => :pig_storage,
14
+ :schema => %w(ad_id api_key name dimensions destination agent_version)
15
+ )
16
+
17
+ # ads =
18
+ # FOREACH
19
+ # (GROUP raw_ads BY ad_id PARALLEL $PARALLELISM)
20
+ # GENERATE
21
+ # $0 AS ad_id,
22
+ # MAX($1.api_key) AS api_key,
23
+ # MAX($1.name) AS name,
24
+ # MAX($1.dimensions) AS dimensions,
25
+ # MAX($1.destination) AS destination,
26
+ # MAX($1.agent_version) AS agent_version
27
+ # ;
28
+ ads = raw_ads.group(:ad_id, :parallel => 2).foreach do |relation|
29
+ [
30
+ relation[0].as(:ad_id),
31
+ relation[1].api_key.as(:api_key)
32
+ relation[1].name.max.as(:name)
33
+ relation[1].dimensions.max.as(:dimensions)
34
+ relation[1].destination.max.as(:destination)
35
+ relation[1].agent_version.max.as(:agent_version)
36
+ ]
37
+ end
38
+
39
+ # STORE ads INTO '$OUTPUT/ads' USING PigStorage;
40
+ store(ads, '$OUTPUT/ads', :using => :pig_storage)
data/examples/test1.rb ADDED
@@ -0,0 +1,3 @@
1
+ raw_data = load 'test1-data.txt', :schema => %w(name city country)
2
+ grouped_by_country = raw_data.group :country
3
+ dump grouped_by_country
data/examples/test2.rb ADDED
@@ -0,0 +1,5 @@
1
+ a = load('in', :schema => %w(x y z w))
2
+ %w(x y z w).each do |f|
3
+ r = a.group(f)
4
+ store(r, 'out-' + f)
5
+ end
data/examples/test3.rb ADDED
@@ -0,0 +1,4 @@
1
+ a = load 'input', :schema => [:a, :b, :c]
2
+ b = a.group :c
3
+ c = b.foreach { |r| [r[0], r[1].a.max, r[1].b.max] }
4
+ store c, 'output'
@@ -0,0 +1,13 @@
1
+ module Piglet
2
+ class Assignment # :nodoc:
3
+ attr_reader :target
4
+
5
+ def initialize(relation)
6
+ @target = relation
7
+ end
8
+
9
+ def to_s
10
+ "#{@target.alias} = #{@target.to_s}"
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,31 @@
1
+ module Piglet
2
+ class Cogroup # :nodoc:
3
+ include Relation
4
+
5
+ def initialize(relation, description)
6
+ @join_fields = description.reject { |k, v| ! (k.is_a?(Relation)) }
7
+ @sources = @join_fields.keys
8
+ @parallel = description[:parallel]
9
+ end
10
+
11
+ def to_s
12
+ joins = @sources.map do |s|
13
+ fields = @join_fields[s]
14
+ if fields.is_a?(Enumerable) && fields.size > 1 && (fields.last == :inner || fields.last == :outer)
15
+ inout = fields.last.to_s.upcase
16
+ fields = fields[0..-2]
17
+ end
18
+ if fields.is_a?(Enumerable) && fields.size > 1
19
+ str = "#{s.alias} BY (#{fields.join(', ')})"
20
+ else
21
+ str = "#{s.alias} BY #{fields}"
22
+ end
23
+ str << " #{inout}" if inout
24
+ str
25
+ end
26
+ str = "COGROUP #{joins.join(', ')}"
27
+ str << " PARALLEL #{@parallel}" if @parallel
28
+ str
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,22 @@
1
+ module Piglet
2
+ class Cross # :nodoc:
3
+ include Relation
4
+
5
+ def initialize(relations, options={})
6
+ options ||= {}
7
+ @sources, @parallel = relations, options[:parallel]
8
+ end
9
+
10
+ def to_s
11
+ str = "CROSS #{source_aliases.join(', ')}"
12
+ str << " PARALLEL #{@parallel}" if @parallel
13
+ str
14
+ end
15
+
16
+ private
17
+
18
+ def source_aliases
19
+ @sources.map { |s| s.alias }
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,5 @@
1
+ module Piglet
2
+ class Describe # :nodoc:
3
+ include Storing
4
+ end
5
+ end
@@ -0,0 +1,16 @@
1
+ module Piglet
2
+ class Distinct # :nodoc:
3
+ include Relation
4
+
5
+ def initialize(relation, options={})
6
+ options ||= {}
7
+ @sources, @parallel = [relation], options[:parallel]
8
+ end
9
+
10
+ def to_s
11
+ str = "DISTINCT #{@sources.first.alias}"
12
+ str << " PARALLEL #{@parallel}" if @parallel
13
+ str
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,5 @@
1
+ module Piglet
2
+ class Dump # :nodoc:
3
+ include Storing
4
+ end
5
+ end
@@ -0,0 +1,13 @@
1
+ module Piglet
2
+ class Explain # :nodoc:
3
+ include Storing
4
+
5
+ def to_s
6
+ if relation.nil?
7
+ "EXPLAIN"
8
+ else
9
+ super
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,40 @@
1
+ module Piglet
2
+ class Field # :nodoc:
3
+ include FieldExpressionFunctions
4
+
5
+ def initialize(name, relation=nil, options=nil)
6
+ options ||= {}
7
+ @name, @parent = name, relation
8
+ @explicit_ancestry = options[:explicit_ancestry] || false
9
+ end
10
+
11
+ def simple?
12
+ true
13
+ end
14
+
15
+ def method_missing(name, *args)
16
+ if name.to_s =~ /^\w+$/ && args.empty?
17
+ Field.new(name, self, :explicit_ancestry => true)
18
+ else
19
+ super
20
+ end
21
+ end
22
+
23
+ def [](n)
24
+ Field.new("\$#{n}", self, :explicit_ancestry => true)
25
+ end
26
+
27
+ def to_s
28
+ if @explicit_ancestry
29
+ if @parent.respond_to?(:alias)
30
+ "#{@parent.alias}.#{@name.to_s}"
31
+ else
32
+ "#{@parent}.#{@name.to_s}"
33
+ end
34
+ else
35
+ @name.to_s
36
+ end
37
+ end
38
+ end
39
+
40
+ end
@@ -0,0 +1,62 @@
1
+ module Piglet
2
+ module FieldExpressionFunctions # :nodoc:
3
+ SYMBOLIC_OPERATORS = [:==, :>, :<, :>=, :<=, :%, :+, :-, :*, :/]
4
+ FUNCTIONS = [:avg, :count, :diff, :max, :min, :size, :sum, :tokenize]
5
+
6
+ FUNCTIONS.each do |fun|
7
+ define_method(fun) { FieldFunctionExpression.new(fun.to_s.upcase, self) }
8
+ end
9
+
10
+ def empty?
11
+ FieldFunctionExpression.new('IsEmpty', self)
12
+ end
13
+
14
+ def as(new_name)
15
+ FieldRename.new(new_name, self)
16
+ end
17
+
18
+ def not
19
+ FieldPrefixExpression.new('NOT', self)
20
+ end
21
+
22
+ def null?
23
+ FieldSuffixExpression.new('is null', self)
24
+ end
25
+
26
+ def not_null?
27
+ FieldSuffixExpression.new('is not null', self)
28
+ end
29
+
30
+ def cast(type)
31
+ FieldPrefixExpression.new("(#{type.to_s})", self)
32
+ end
33
+
34
+ def matches(pattern)
35
+ regex_options_pattern = /^\(\?.+?:(.*)\)$/
36
+ pattern = pattern.to_s.sub(regex_options_pattern, '\1') if pattern.is_a?(Regexp) && pattern.to_s =~ regex_options_pattern
37
+ FieldInfixExpression.new('matches', self, "'#{pattern.to_s}'")
38
+ end
39
+
40
+ def neg
41
+ FieldPrefixExpression.new('-', self, false)
42
+ end
43
+
44
+ def ne(other)
45
+ FieldInfixExpression.new('!=', self, other)
46
+ end
47
+
48
+ SYMBOLIC_OPERATORS.each do |op|
49
+ define_method(op) { |other| FieldInfixExpression.new(op.to_s, self, other) }
50
+ end
51
+
52
+ protected
53
+
54
+ def parenthesise(expr)
55
+ if expr.respond_to?(:simple?) && ! expr.simple?
56
+ "(#{expr})"
57
+ else
58
+ expr.to_s
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,19 @@
1
+ module Piglet
2
+ class FieldFunctionExpression # :nodoc:
3
+ include FieldExpressionFunctions
4
+
5
+ def initialize(name, inner_expression, options=nil)
6
+ options ||= {}
7
+ @name, @inner_expression = name, inner_expression
8
+ @new_name = options[:as]
9
+ end
10
+
11
+ def simple?
12
+ false
13
+ end
14
+
15
+ def to_s
16
+ "#{@name}(#{@inner_expression})"
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,17 @@
1
+ module Piglet
2
+ class FieldInfixExpression # :nodoc:
3
+ include FieldExpressionFunctions
4
+
5
+ def initialize(operator, left_expression, right_expression)
6
+ @operator, @left_expression, @right_expression = operator, left_expression, right_expression
7
+ end
8
+
9
+ def simple?
10
+ false
11
+ end
12
+
13
+ def to_s
14
+ "#{parenthesise(@left_expression)} #{@operator} #{parenthesise(@right_expression)}"
15
+ end
16
+ end
17
+ end