piglet 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/.document +5 -0
  2. data/.gitignore +22 -0
  3. data/LICENSE +20 -0
  4. data/README.rdoc +293 -0
  5. data/Rakefile +50 -0
  6. data/bin/piglet +9 -0
  7. data/examples/analysis.rb +311 -0
  8. data/examples/scratch.rb +11 -0
  9. data/examples/spike1.rb +43 -0
  10. data/examples/spike2.rb +40 -0
  11. data/examples/test1.rb +3 -0
  12. data/examples/test2.rb +5 -0
  13. data/examples/test3.rb +4 -0
  14. data/lib/piglet/assignment.rb +13 -0
  15. data/lib/piglet/cogroup.rb +31 -0
  16. data/lib/piglet/cross.rb +22 -0
  17. data/lib/piglet/describe.rb +5 -0
  18. data/lib/piglet/distinct.rb +16 -0
  19. data/lib/piglet/dump.rb +5 -0
  20. data/lib/piglet/explain.rb +13 -0
  21. data/lib/piglet/field.rb +40 -0
  22. data/lib/piglet/field_expression_functions.rb +62 -0
  23. data/lib/piglet/field_function_expression.rb +19 -0
  24. data/lib/piglet/field_infix_expression.rb +17 -0
  25. data/lib/piglet/field_prefix_expression.rb +21 -0
  26. data/lib/piglet/field_rename.rb +11 -0
  27. data/lib/piglet/field_suffix_expression.rb +17 -0
  28. data/lib/piglet/filter.rb +13 -0
  29. data/lib/piglet/foreach.rb +19 -0
  30. data/lib/piglet/group.rb +21 -0
  31. data/lib/piglet/illustrate.rb +5 -0
  32. data/lib/piglet/interpreter.rb +108 -0
  33. data/lib/piglet/join.rb +20 -0
  34. data/lib/piglet/limit.rb +13 -0
  35. data/lib/piglet/load.rb +31 -0
  36. data/lib/piglet/load_and_store.rb +16 -0
  37. data/lib/piglet/order.rb +29 -0
  38. data/lib/piglet/relation.rb +177 -0
  39. data/lib/piglet/sample.rb +13 -0
  40. data/lib/piglet/split.rb +41 -0
  41. data/lib/piglet/store.rb +17 -0
  42. data/lib/piglet/storing.rb +13 -0
  43. data/lib/piglet/stream.rb +5 -0
  44. data/lib/piglet/union.rb +19 -0
  45. data/lib/piglet.rb +45 -0
  46. data/spec/piglet/field_spec.rb +130 -0
  47. data/spec/piglet/interpreter_spec.rb +413 -0
  48. data/spec/piglet/relation_spec.rb +79 -0
  49. data/spec/piglet/split_spec.rb +34 -0
  50. data/spec/piglet_spec.rb +7 -0
  51. data/spec/spec.opts +3 -0
  52. data/spec/spec_helper.rb +14 -0
  53. metadata +123 -0
@@ -0,0 +1,311 @@
1
+ # raw_sessions =
2
+ # LOAD '$INPUT/sessions*'
3
+ # USING PigStorage AS (
4
+ # date:chararray,
5
+ # api_key:chararray,
6
+ # ad_id:chararray,
7
+ # user_id:chararray,
8
+ # site:chararray,
9
+ # size:chararray,
10
+ # name:chararray,
11
+ # destination:chararray,
12
+ # indeterminate_visibility:int,
13
+ # impression:int,
14
+ # engagement:int,
15
+ # click_thru:int,
16
+ # extra:int,
17
+ # session_time:int,
18
+ # visible_time:int,
19
+ # engagement_time:int
20
+ # );
21
+ raw_sessions = load('$INPUT/sessions*', :schema => [
22
+ [:date :chararray],
23
+ [:api_key :chararray],
24
+ [:ad_id :chararray],
25
+ [:user_id :chararray],
26
+ [:site :chararray],
27
+ [:size :chararray],
28
+ [:name :chararray],
29
+ [:destination :chararray],
30
+ [:indeterminate_visibility :int],
31
+ [:impression :int],
32
+ [:engagement :int],
33
+ [:click_thru :int],
34
+ [:extra :int],
35
+ [:session_time :int],
36
+ [:visible_time :int],
37
+ [:engagement_time :int]
38
+ ])
39
+
40
+ # raw_actions =
41
+ # LOAD '$INPUT/actions*'
42
+ # USING PigStorage AS (
43
+ # date:chararray,
44
+ # api_key:chararray,
45
+ # ad_id:chararray,
46
+ # user_id:chararray,
47
+ # action:chararray,
48
+ # site:chararray,
49
+ # size:chararray,
50
+ # name:chararray,
51
+ # destination:chararray,
52
+ # extra:int
53
+ # );
54
+ raw_actions = load('$INPUT/actions*', :schema =>
55
+ [:date :chararray],
56
+ [:api_key :chararray],
57
+ [:ad_id :chararray],
58
+ [:user_id :chararray],
59
+ [:action :chararray],
60
+ [:site :chararray],
61
+ [:size :chararray],
62
+ [:name :chararray],
63
+ [:destination :chararray],
64
+ [:extra :int]
65
+ )
66
+
67
+ #sessions = FILTER raw_sessions BY date is not null;
68
+ sessions = raw_sessions.filter { |r| r.date.not_null? }
69
+
70
+ #actions = FILTER raw_actions BY date is not null;
71
+ actions = raw_actions.filter { |r| r.date.not_null? }
72
+
73
+ # /*
74
+ # * Modify each session and action based on whether or not it's an extra session
75
+ # * (a session that was logged only because it was a click thru). Extra sessions
76
+ # * should affect only the total number of click thrus, not the number of
77
+ # * exposures, impressions, etc. nor the durations. By setting these values to
78
+ # * zero and introducing a field for whether or not the session was an exposure
79
+ # * (zero for extra sessions, one for all other), the calculations below can
80
+ # * filter out extra sessions without too much work.
81
+ # */
82
+ # sessions =
83
+ # FOREACH
84
+ # sessions
85
+ # GENERATE
86
+ # date,
87
+ # api_key,
88
+ # ad_id,
89
+ # user_id,
90
+ # site,
91
+ # size,
92
+ # name,
93
+ # destination,
94
+ # (extra == 1 ? 0 : indeterminate_visibility) AS indeterminate_visibility,
95
+ # (extra == 1 ? 0 : 1) AS exposure,
96
+ # (extra == 1 ? 0 : impression) AS impression,
97
+ # (extra == 1 ? 0 : engagement) AS engagement,
98
+ # click_thru,
99
+ # (extra == 1 ? 0 : session_time) AS session_time,
100
+ # (extra == 1 ? 0 : visible_time) AS visible_time,
101
+ # (extra == 1 ? 0 : engagement_time) AS engagement_time;
102
+ sessions = sessions.foreach do |r|
103
+ [
104
+ r.date,
105
+ r.api_key,
106
+ r.ad_id,
107
+ r.user_id,
108
+ r.site,
109
+ r.size,
110
+ r.name,
111
+ r.destination,
112
+ r.test(r.extra == 1, 0, r.indeterminate_visibility).as(:indeterminate_visibility),
113
+ r.test(r.extra == 1, 0, 1).as(:exposure),
114
+ r.test(r.extra == 1, 0, r.impression).as(:impression),
115
+ r.test(r.extra == 1, 0, r.engagement).as(:engagement),
116
+ r.click_thru,
117
+ r.test(r.extra == 1, 0, r.session_time).as(:session_time),
118
+ r.test(r.extra == 1, 0, r.visible_time).as(:visible_time),
119
+ r.test(r.extra == 1, 0, r.engagement_time).as(:engagement_time)
120
+ ]
121
+ end
122
+
123
+ # actions =
124
+ # FOREACH
125
+ # actions
126
+ # GENERATE
127
+ # date,
128
+ # api_key,
129
+ # ad_id,
130
+ # user_id,
131
+ # action,
132
+ # site,
133
+ # size,
134
+ # name,
135
+ # destination,
136
+ # (extra == 1 ? 0 : 1) AS exposure;
137
+ actions = actions.foreach do |r|
138
+ [
139
+ r.date,
140
+ r.api_key,
141
+ r.ad_id,
142
+ r.user_id,
143
+ r.action,
144
+ r.site,
145
+ r.size,
146
+ r.name,
147
+ r.destination,
148
+ r.test(r.extra == 1, 0, 1).as(:exposure)
149
+ ]
150
+ end
151
+
152
+ %w(all site size name).each do |name|
153
+ # session_category_<%= name %> =
154
+ # FOREACH
155
+ # (GROUP sessions BY (date, ad_id, api_key, <%= name == 'all' ? "'all'" : name %>) PARALLEL $PARALLELISM)
156
+ # GENERATE
157
+ # $0.date AS date,
158
+ # $0.ad_id AS ad_id,
159
+ # $0.api_key AS api_key,
160
+ # '<%= name %>' AS category,
161
+ # <%= name == 'all' ? "'all'" : "$0.#{name}" %> AS segment,
162
+ # SUM($1.exposure) AS exposures,
163
+ # SUM($1.impression) AS impressions,
164
+ # SUM($1.engagement) AS engagements,
165
+ # SUM($1.click_thru) AS click_thrus,
166
+ # SUM($1.indeterminate_visibility) AS indeterminate_visibility,
167
+ # SUM($1.session_time) AS session_time,
168
+ # SUM($1.visible_time) AS visible_time,
169
+ # SUM($1.engagement_time) AS engagement_time;
170
+ session_category = sessions.group(:date, :ad_id, :api_key, (name == 'all' ? 'all' : name), :parallel => '$PARALLELISM')
171
+ session_category = session_category.foreach do |r|
172
+ [
173
+ r[0].date.as(:date),
174
+ r[0].ad_id.as(:ad_id),
175
+ r[0].api_key.as(:api_key),
176
+ name.as(:category),
177
+ (name == 'all' ? "'all'" : r[0].name).as(:segment),
178
+ r[1].sum.as(:exposure),
179
+ r[1].sum.as(:impression),
180
+ r[1].sum.as(:engagement),
181
+ r[1].sum.as(:click_thru),
182
+ r[1].sum.as(:indeterminate_visibility),
183
+ r[1].sum.as(:session_time),
184
+ r[1].sum.as(:visible_time),
185
+ r[1].sum.as(:engagement_time)
186
+ ]
187
+ end
188
+
189
+ # session_category_<%= name %>_by_user_id =
190
+ # FOREACH
191
+ # (GROUP sessions BY (date, ad_id, api_key, user_id, <%= name == 'all' ? "'all'" : name %>) PARALLEL $PARALLELISM)
192
+ # GENERATE
193
+ # $0.date AS date,
194
+ # $0.ad_id AS ad_id,
195
+ # $0.api_key AS api_key,
196
+ # '<%= name %>' AS category,
197
+ # <%= name == 'all' ? "'all'" : "$0.#{name}" %> AS segment,
198
+ # 1 AS exposures,
199
+ # MAX($1.impression) AS impressions,
200
+ # MAX($1.engagement) AS engagements,
201
+ # MAX($1.click_thru) AS click_thrus;
202
+ session_category_by_user_id = sessions.group(:date, :ad_id, :api_key, (name == 'all' ? 'all' : name), :parallel => '$PARALLELISM')
203
+ session_category_by_user_id = session_category_by_user_id.foreach do |r|
204
+ r[0].date.as(:date),
205
+ r[0].ad_id.as(:ad_id),
206
+ r[0].api_key.as(:api_key),
207
+ name.as(:category),
208
+ (name == 'all' ? "'all'" : r[0].name).as(:segment)
209
+ 1.as(:exposures),
210
+ r[1].impression.max.as(:impressions),
211
+ r[1].engagement.max.as(:engagements),
212
+ r[1].click_thru.max.as(:click_thrus)
213
+ end
214
+
215
+ # unique_session_category_<%= name %> =
216
+ # FOREACH
217
+ # (GROUP session_category_<%= name %>_by_user_id BY (date, ad_id, api_key, category, segment) PARALLEL $PARALLELISM)
218
+ # GENERATE
219
+ # $0.date AS date,
220
+ # $0.ad_id AS ad_id,
221
+ # $0.api_key AS api_key,
222
+ # $0.category,
223
+ # $0.segment,
224
+ # COUNT($1.ad_id) AS unique_exposures,
225
+ # SUM($1.impressions) AS unique_impressions,
226
+ # SUM($1.engagements) AS unique_engagements,
227
+ # SUM($1.click_thrus) AS unique_click_thrus;
228
+ #
229
+ # action_category_<%= name %> =
230
+ # FOREACH
231
+ # (GROUP actions BY (date, ad_id, api_key, action, <%= name == 'all' ? "'all'" : name %>) PARALLEL $PARALLELISM)
232
+ # GENERATE
233
+ # $0.date AS date,
234
+ # $0.ad_id AS ad_id,
235
+ # $0.api_key AS api_key,
236
+ # $0.action AS action,
237
+ # '<%= name %>' AS category,
238
+ # <%= name == 'all' ? "'all'" : "$0.#{name}" %> AS segment,
239
+ # SUM($1.exposure) AS engagements;
240
+ #
241
+ # action_category_<%= name %>_by_user_id =
242
+ # FOREACH
243
+ # (GROUP actions BY (date, ad_id, api_key, action, user_id, <%= name == 'all' ? "'all'" : name %>) PARALLEL $PARALLELISM)
244
+ # GENERATE
245
+ # $0.date AS date,
246
+ # $0.ad_id AS ad_id,
247
+ # $0.api_key AS api_key,
248
+ # $0.action AS action,
249
+ # '<%= name %>' AS category,
250
+ # <%= name == 'all' ? "'all'" : "$0.#{name}" %> AS segment,
251
+ # 1 AS exposures,
252
+ # 1 AS engagements;
253
+ #
254
+ # unique_action_category_<%= name %> =
255
+ # FOREACH
256
+ # (GROUP action_category_<%= name %>_by_user_id BY (date, ad_id, api_key, action, category, segment) PARALLEL $PARALLELISM)
257
+ # GENERATE
258
+ # $0.date AS date,
259
+ # $0.ad_id AS ad_id,
260
+ # $0.api_key AS api_key,
261
+ # $0.action AS action,
262
+ # $0.category,
263
+ # $0.segment,
264
+ # SUM($1.engagements) AS unique_engagements;
265
+ end
266
+
267
+ -- unions ----------------------------------------------------------------------
268
+ -- -----------------------------------------------------------------------------
269
+
270
+ <% if @categories.size > 1 -%>
271
+ report_metrics =
272
+ UNION
273
+ <%= @categories.map { |name| "session_category_#{name}" }.join(",\n ") %>;
274
+ <% else -%>
275
+ report_metrics = FILTER session_category_<%= @categories.first %> BY 1 == 1;
276
+ <% end -%>
277
+
278
+ <% if @categories.size > 1 -%>
279
+ unique_report_metrics =
280
+ UNION
281
+ <%= @categories.map { |name| "unique_session_category_#{name}" }.join(",\n ") %>;
282
+ <% else -%>
283
+ unique_report_metrics = FILTER unique_session_category_<%= @categories.first %> BY 1 == 1;
284
+ <% end -%>
285
+ <% if @categories.size > 1 -%>
286
+ report_action_metrics =
287
+ UNION
288
+ <%= @categories.map { |name| "action_category_#{name}" }.join(",\n ") %>;
289
+ <% else -%>
290
+ report_action_metrics = FILTER action_category_<%= @categories.first %> BY 1 == 1;
291
+ <% end -%>
292
+ <% if @categories.size > 1 -%>
293
+ unique_report_action_metrics =
294
+ UNION
295
+ <%= @categories.map { |name| "unique_action_category_#{name}" }.join(",\n ") %>;
296
+ <% else -%>
297
+ unique_report_action_metrics = FILTER unique_action_category_<%= @categories.first %> BY 1 == 1;
298
+ <% end %>
299
+
300
+ -- complete output -------------------------------------------------------------
301
+ -- -----------------------------------------------------------------------------
302
+
303
+
304
+ <% %w(report_metrics unique_report_metrics report_action_metrics unique_report_action_metrics).each do |relation| -%>
305
+ <%= relation %> = FILTER <%= relation %> BY date is not null AND date != '' AND api_key is not null AND api_key != '';
306
+ <% end -%>
307
+
308
+ STORE report_metrics INTO '$OUTPUT/report_metrics' USING PigStorage;
309
+ STORE unique_report_metrics INTO '$OUTPUT/unique_report_metrics' USING PigStorage;
310
+ STORE report_action_metrics INTO '$OUTPUT/report_action_metrics' USING PigStorage;
311
+ STORE unique_report_action_metrics INTO '$OUTPUT/unique_report_action_metrics' USING PigStorage;
@@ -0,0 +1,11 @@
1
+ module Piglet::Relation
2
+ def samples(*sizes)
3
+ sizes.map { |s| sample(s) }
4
+ end
5
+ end
6
+
7
+ input = load('input', :schema => %w(country browser site visit_duration))
8
+ a, b, c = input.samples(0.1, 0.2, 0.3)
9
+ store(a, 'output1')
10
+ store(b, 'output2')
11
+ store(c, 'output3')
@@ -0,0 +1,43 @@
1
+ # raw_ads =
2
+ # LOAD '$INPUT/ads*'
3
+ # USING PigStorage AS (
4
+ # ad_id:chararray,
5
+ # api_key:chararray,
6
+ # name:chararray,
7
+ # dimensions:chararray,
8
+ # destination:chararray,
9
+ # agent_version:chararray
10
+ # );
11
+ raw_ads << load('$INPUT/ads*').using(:pig_storage).as(
12
+ [:ad_id, :chararray],
13
+ [:api_key, :chararray],
14
+ [:name, :chararray],
15
+ [:dimensions, :chararray],
16
+ [:destination, :chararray],
17
+ [:agent_version, :chararray]
18
+ )
19
+
20
+ # ads =
21
+ # FOREACH
22
+ # (GROUP raw_ads BY ad_id PARALLEL $PARALLELISM)
23
+ # GENERATE
24
+ # $0 AS ad_id,
25
+ # MAX($1.api_key) AS api_key,
26
+ # MAX($1.name) AS name,
27
+ # MAX($1.dimensions) AS dimensions,
28
+ # MAX($1.destination) AS destination,
29
+ # MAX($1.agent_version) AS agent_version
30
+ # ;
31
+ ads << (raw_ads.group(:ad_id)).foreach do |relation|
32
+ [
33
+ relation[0].as(:ad_id),
34
+ relation[1].api_key.max.as(:api_key),
35
+ relation[1].name.max.as(:name),
36
+ relation[1].dimensions.max.as(:dimensions),
37
+ relation[1].destination.max.as(:destination),
38
+ relation[1].agent_version.max.as(:agent_version)
39
+ ]
40
+ end
41
+
42
+ # STORE ads INTO '$OUTPUT/ads' USING PigStorage;
43
+ ads.store('$OUTPUT/ads').using(:pig_storage)
@@ -0,0 +1,40 @@
1
+ # raw_ads =
2
+ # LOAD '$INPUT/ads*'
3
+ # USING PigStorage AS (
4
+ # ad_id:chararray,
5
+ # api_key:chararray,
6
+ # name:chararray,
7
+ # dimensions:chararray,
8
+ # destination:chararray,
9
+ # agent_version:chararray
10
+ # );
11
+ raw_ads = load(
12
+ '$INPUT/ads*',
13
+ :using => :pig_storage,
14
+ :schema => %w(ad_id api_key name dimensions destination agent_version)
15
+ )
16
+
17
+ # ads =
18
+ # FOREACH
19
+ # (GROUP raw_ads BY ad_id PARALLEL $PARALLELISM)
20
+ # GENERATE
21
+ # $0 AS ad_id,
22
+ # MAX($1.api_key) AS api_key,
23
+ # MAX($1.name) AS name,
24
+ # MAX($1.dimensions) AS dimensions,
25
+ # MAX($1.destination) AS destination,
26
+ # MAX($1.agent_version) AS agent_version
27
+ # ;
28
+ ads = raw_ads.group(:ad_id, :parallel => 2).foreach do |relation|
29
+ [
30
+ relation[0].as(:ad_id),
31
+ relation[1].api_key.as(:api_key)
32
+ relation[1].name.max.as(:name)
33
+ relation[1].dimensions.max.as(:dimensions)
34
+ relation[1].destination.max.as(:destination)
35
+ relation[1].agent_version.max.as(:agent_version)
36
+ ]
37
+ end
38
+
39
+ # STORE ads INTO '$OUTPUT/ads' USING PigStorage;
40
+ store(ads, '$OUTPUT/ads', :using => :pig_storage)
data/examples/test1.rb ADDED
@@ -0,0 +1,3 @@
1
+ raw_data = load 'test1-data.txt', :schema => %w(name city country)
2
+ grouped_by_country = raw_data.group :country
3
+ dump grouped_by_country
data/examples/test2.rb ADDED
@@ -0,0 +1,5 @@
1
+ a = load('in', :schema => %w(x y z w))
2
+ %w(x y z w).each do |f|
3
+ r = a.group(f)
4
+ store(r, 'out-' + f)
5
+ end
data/examples/test3.rb ADDED
@@ -0,0 +1,4 @@
1
+ a = load 'input', :schema => [:a, :b, :c]
2
+ b = a.group :c
3
+ c = b.foreach { |r| [r[0], r[1].a.max, r[1].b.max] }
4
+ store c, 'output'
@@ -0,0 +1,13 @@
1
+ module Piglet
2
+ class Assignment # :nodoc:
3
+ attr_reader :target
4
+
5
+ def initialize(relation)
6
+ @target = relation
7
+ end
8
+
9
+ def to_s
10
+ "#{@target.alias} = #{@target.to_s}"
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,31 @@
1
+ module Piglet
2
+ class Cogroup # :nodoc:
3
+ include Relation
4
+
5
+ def initialize(relation, description)
6
+ @join_fields = description.reject { |k, v| ! (k.is_a?(Relation)) }
7
+ @sources = @join_fields.keys
8
+ @parallel = description[:parallel]
9
+ end
10
+
11
+ def to_s
12
+ joins = @sources.map do |s|
13
+ fields = @join_fields[s]
14
+ if fields.is_a?(Enumerable) && fields.size > 1 && (fields.last == :inner || fields.last == :outer)
15
+ inout = fields.last.to_s.upcase
16
+ fields = fields[0..-2]
17
+ end
18
+ if fields.is_a?(Enumerable) && fields.size > 1
19
+ str = "#{s.alias} BY (#{fields.join(', ')})"
20
+ else
21
+ str = "#{s.alias} BY #{fields}"
22
+ end
23
+ str << " #{inout}" if inout
24
+ str
25
+ end
26
+ str = "COGROUP #{joins.join(', ')}"
27
+ str << " PARALLEL #{@parallel}" if @parallel
28
+ str
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,22 @@
1
+ module Piglet
2
+ class Cross # :nodoc:
3
+ include Relation
4
+
5
+ def initialize(relations, options={})
6
+ options ||= {}
7
+ @sources, @parallel = relations, options[:parallel]
8
+ end
9
+
10
+ def to_s
11
+ str = "CROSS #{source_aliases.join(', ')}"
12
+ str << " PARALLEL #{@parallel}" if @parallel
13
+ str
14
+ end
15
+
16
+ private
17
+
18
+ def source_aliases
19
+ @sources.map { |s| s.alias }
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,5 @@
1
+ module Piglet
2
+ class Describe # :nodoc:
3
+ include Storing
4
+ end
5
+ end
@@ -0,0 +1,16 @@
1
+ module Piglet
2
+ class Distinct # :nodoc:
3
+ include Relation
4
+
5
+ def initialize(relation, options={})
6
+ options ||= {}
7
+ @sources, @parallel = [relation], options[:parallel]
8
+ end
9
+
10
+ def to_s
11
+ str = "DISTINCT #{@sources.first.alias}"
12
+ str << " PARALLEL #{@parallel}" if @parallel
13
+ str
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,5 @@
1
+ module Piglet
2
+ class Dump # :nodoc:
3
+ include Storing
4
+ end
5
+ end
@@ -0,0 +1,13 @@
1
+ module Piglet
2
+ class Explain # :nodoc:
3
+ include Storing
4
+
5
+ def to_s
6
+ if relation.nil?
7
+ "EXPLAIN"
8
+ else
9
+ super
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,40 @@
1
+ module Piglet
2
+ class Field # :nodoc:
3
+ include FieldExpressionFunctions
4
+
5
+ def initialize(name, relation=nil, options=nil)
6
+ options ||= {}
7
+ @name, @parent = name, relation
8
+ @explicit_ancestry = options[:explicit_ancestry] || false
9
+ end
10
+
11
+ def simple?
12
+ true
13
+ end
14
+
15
+ def method_missing(name, *args)
16
+ if name.to_s =~ /^\w+$/ && args.empty?
17
+ Field.new(name, self, :explicit_ancestry => true)
18
+ else
19
+ super
20
+ end
21
+ end
22
+
23
+ def [](n)
24
+ Field.new("\$#{n}", self, :explicit_ancestry => true)
25
+ end
26
+
27
+ def to_s
28
+ if @explicit_ancestry
29
+ if @parent.respond_to?(:alias)
30
+ "#{@parent.alias}.#{@name.to_s}"
31
+ else
32
+ "#{@parent}.#{@name.to_s}"
33
+ end
34
+ else
35
+ @name.to_s
36
+ end
37
+ end
38
+ end
39
+
40
+ end
@@ -0,0 +1,62 @@
1
+ module Piglet
2
+ module FieldExpressionFunctions # :nodoc:
3
+ SYMBOLIC_OPERATORS = [:==, :>, :<, :>=, :<=, :%, :+, :-, :*, :/]
4
+ FUNCTIONS = [:avg, :count, :diff, :max, :min, :size, :sum, :tokenize]
5
+
6
+ FUNCTIONS.each do |fun|
7
+ define_method(fun) { FieldFunctionExpression.new(fun.to_s.upcase, self) }
8
+ end
9
+
10
+ def empty?
11
+ FieldFunctionExpression.new('IsEmpty', self)
12
+ end
13
+
14
+ def as(new_name)
15
+ FieldRename.new(new_name, self)
16
+ end
17
+
18
+ def not
19
+ FieldPrefixExpression.new('NOT', self)
20
+ end
21
+
22
+ def null?
23
+ FieldSuffixExpression.new('is null', self)
24
+ end
25
+
26
+ def not_null?
27
+ FieldSuffixExpression.new('is not null', self)
28
+ end
29
+
30
+ def cast(type)
31
+ FieldPrefixExpression.new("(#{type.to_s})", self)
32
+ end
33
+
34
+ def matches(pattern)
35
+ regex_options_pattern = /^\(\?.+?:(.*)\)$/
36
+ pattern = pattern.to_s.sub(regex_options_pattern, '\1') if pattern.is_a?(Regexp) && pattern.to_s =~ regex_options_pattern
37
+ FieldInfixExpression.new('matches', self, "'#{pattern.to_s}'")
38
+ end
39
+
40
+ def neg
41
+ FieldPrefixExpression.new('-', self, false)
42
+ end
43
+
44
+ def ne(other)
45
+ FieldInfixExpression.new('!=', self, other)
46
+ end
47
+
48
+ SYMBOLIC_OPERATORS.each do |op|
49
+ define_method(op) { |other| FieldInfixExpression.new(op.to_s, self, other) }
50
+ end
51
+
52
+ protected
53
+
54
+ def parenthesise(expr)
55
+ if expr.respond_to?(:simple?) && ! expr.simple?
56
+ "(#{expr})"
57
+ else
58
+ expr.to_s
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,19 @@
1
+ module Piglet
2
+ class FieldFunctionExpression # :nodoc:
3
+ include FieldExpressionFunctions
4
+
5
+ def initialize(name, inner_expression, options=nil)
6
+ options ||= {}
7
+ @name, @inner_expression = name, inner_expression
8
+ @new_name = options[:as]
9
+ end
10
+
11
+ def simple?
12
+ false
13
+ end
14
+
15
+ def to_s
16
+ "#{@name}(#{@inner_expression})"
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,17 @@
1
+ module Piglet
2
+ class FieldInfixExpression # :nodoc:
3
+ include FieldExpressionFunctions
4
+
5
+ def initialize(operator, left_expression, right_expression)
6
+ @operator, @left_expression, @right_expression = operator, left_expression, right_expression
7
+ end
8
+
9
+ def simple?
10
+ false
11
+ end
12
+
13
+ def to_s
14
+ "#{parenthesise(@left_expression)} #{@operator} #{parenthesise(@right_expression)}"
15
+ end
16
+ end
17
+ end