wonkavision 0.5.11 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. data/CHANGELOG.rdoc +3 -0
  2. data/lib/wonkavision.rb +28 -1
  3. data/lib/wonkavision/aggregation.rb +21 -0
  4. data/lib/wonkavision/event_coordinator.rb +19 -7
  5. data/lib/wonkavision/extensions/symbol.rb +55 -0
  6. data/lib/wonkavision/facts.rb +27 -0
  7. data/lib/wonkavision/local_job_queue.rb +28 -0
  8. data/lib/wonkavision/message_mapper.rb +2 -2
  9. data/lib/wonkavision/message_mapper/map.rb +60 -8
  10. data/lib/wonkavision/persistence/mongo.rb +95 -0
  11. data/lib/wonkavision/plugins.rb +2 -1
  12. data/lib/wonkavision/plugins/analytics/aggregation.rb +139 -0
  13. data/lib/wonkavision/plugins/analytics/aggregation/aggregation_spec.rb +53 -0
  14. data/lib/wonkavision/plugins/analytics/aggregation/attribute.rb +22 -0
  15. data/lib/wonkavision/plugins/analytics/aggregation/dimension.rb +64 -0
  16. data/lib/wonkavision/plugins/analytics/aggregation/measure.rb +240 -0
  17. data/lib/wonkavision/plugins/analytics/cellset.rb +171 -0
  18. data/lib/wonkavision/plugins/analytics/facts.rb +106 -0
  19. data/lib/wonkavision/plugins/analytics/handlers/apply_aggregation.rb +35 -0
  20. data/lib/wonkavision/plugins/analytics/handlers/split_by_aggregation.rb +60 -0
  21. data/lib/wonkavision/plugins/analytics/member_filter.rb +106 -0
  22. data/lib/wonkavision/plugins/analytics/mongo.rb +6 -0
  23. data/lib/wonkavision/plugins/analytics/persistence/hash_store.rb +59 -0
  24. data/lib/wonkavision/plugins/analytics/persistence/mongo_store.rb +85 -0
  25. data/lib/wonkavision/plugins/analytics/persistence/store.rb +105 -0
  26. data/lib/wonkavision/plugins/analytics/query.rb +76 -0
  27. data/lib/wonkavision/plugins/event_handling.rb +15 -3
  28. data/lib/wonkavision/version.rb +1 -1
  29. data/test/aggregation_spec_test.rb +99 -0
  30. data/test/aggregation_test.rb +170 -0
  31. data/test/analytics/test_aggregation.rb +78 -0
  32. data/test/apply_aggregation_test.rb +92 -0
  33. data/test/attribute_test.rb +26 -0
  34. data/test/cellset_test.rb +200 -0
  35. data/test/dimension_test.rb +186 -0
  36. data/test/facts_test.rb +146 -0
  37. data/test/hash_store_test.rb +112 -0
  38. data/test/log/test.log +96844 -0
  39. data/test/map_test.rb +48 -1
  40. data/test/measure_test.rb +146 -0
  41. data/test/member_filter_test.rb +143 -0
  42. data/test/mongo_store_test.rb +115 -0
  43. data/test/query_test.rb +106 -0
  44. data/test/split_by_aggregation_test.rb +114 -0
  45. data/test/store_test.rb +71 -0
  46. data/test/symbol_test.rb +62 -0
  47. data/test/test_activity_models.rb +1 -1
  48. data/test/test_aggregation.rb +42 -0
  49. data/test/test_data.tuples +100 -0
  50. data/test/test_helper.rb +7 -0
  51. metadata +57 -5
@@ -4,6 +4,7 @@
4
4
  #names to avoid conflicts.
5
5
  module Wonkavision
6
6
  module Plugins
7
+
7
8
  def wonkavision_plugins
8
9
  @wonkavision_plugins ||= []
9
10
  end
@@ -25,6 +26,6 @@ module Wonkavision
25
26
  wonkavision_plugins << mod
26
27
  end
27
28
  alias use plug
28
-
29
+
29
30
  end
30
31
  end
@@ -0,0 +1,139 @@
1
+ module Wonkavision
2
+ module Plugins
3
+ module Aggregation
4
+
5
+ def self.all
6
+ @@all ||= {}
7
+ end
8
+
9
+ def self.configure(aggregation,options={})
10
+ aggregation.write_inheritable_attribute :aggregation_options, options
11
+ aggregation.class_inheritable_reader :aggregation_options
12
+
13
+ aggregation.write_inheritable_attribute( :aggregation_spec,
14
+ AggregationSpec.new(aggregation.name) )
15
+ aggregation.class_inheritable_reader :aggregation_spec
16
+
17
+ Aggregation.all[aggregation.name] = aggregation
18
+ end
19
+
20
+ module ClassMethods
21
+ def store(new_store=nil)
22
+ if new_store
23
+ store = new_store.kind_of?(Wonkavision::Analytics::Persistence::Store) ? store :
24
+ Wonkavision::Analytics::Persistence::Store[new_store]
25
+
26
+ raise "Could not find a storage type of #{new_store}" unless store
27
+
28
+ store = store.new(self) if store.respond_to?(:new)
29
+
30
+ aggregation_options[:store] = store
31
+ else
32
+ aggregation_options[:store]
33
+ end
34
+ end
35
+
36
+
37
+ def [](dimensions)
38
+ key = [dimension_names(dimensions),dimension_keys(dimensions)]
39
+ @instances ||= HashWithIndifferentAccess.new
40
+ @instances[key] ||= self.new(dimensions)
41
+ end
42
+
43
+ def aggregates(facts_class = nil)
44
+ return aggregation_options[:facts_class] unless facts_class
45
+
46
+ facts_class.aggregations << self
47
+ aggregation_options[:facts_class] = facts_class
48
+ end
49
+ alias facts aggregates
50
+
51
+ def dimension_names(dimensions)
52
+ dimensions.keys.sort
53
+ end
54
+
55
+ def dimension_keys(dimensions)
56
+ dimension_names(dimensions).map do |dim|
57
+ dimensions[dim][self.dimensions[dim].key.to_s]
58
+ end
59
+ end
60
+
61
+ def query(options={},&block)
62
+ raise "Aggregation#query is not valid unless a store has been configured" unless store
63
+ query = Wonkavision::Analytics::Query.new
64
+ query.instance_eval(&block) if block
65
+ query.validate!
66
+
67
+ return query if options[:defer]
68
+
69
+ tuples = store.execute_query(query)
70
+
71
+ Wonkavision::Analytics::CellSet.new( self,
72
+ query,
73
+ tuples )
74
+ end
75
+
76
+
77
+ def method_missing(m,*args,&block)
78
+ aggregation_spec.respond_to?(m) ? aggregation_spec.send(m,*args,&block) : super
79
+ end
80
+ end
81
+
82
+ module InstanceMethods
83
+ attr_reader :dimensions, :measures
84
+
85
+ def initialize(dimensions)
86
+ @dimensions = dimensions
87
+ end
88
+
89
+ def add(measures)
90
+ update(measures, :add)
91
+ end
92
+
93
+ def reject(measures)
94
+ update(measures, :reject)
95
+ end
96
+
97
+ def dimension_names
98
+ @dimension_names ||= self.class.dimension_names(@dimensions)
99
+ end
100
+
101
+ def dimension_keys
102
+ @dimension_keys ||= self.class.dimension_keys(@dimensions)
103
+ end
104
+
105
+ protected
106
+ def update(measures, method)
107
+ aggregation = {
108
+ :dimension_keys => dimension_keys,
109
+ :dimension_names => dimension_names,
110
+ :measures => {},
111
+ :dimensions => @dimensions
112
+ }
113
+
114
+ measures.keys.each do |measure|
115
+ if val = measures[measure]
116
+ aggregation[:measures].merge! measure_changes_for(measure.to_s,
117
+ val,
118
+ method)
119
+ end
120
+
121
+ end
122
+ self.class.store.update_aggregation(aggregation)
123
+ self
124
+ end
125
+
126
+ def measure_changes_for(measure_name, measure_value, update_method)
127
+ sign = update_method.to_s == "reject" ? -1 : 1
128
+ {
129
+ "measures.#{measure_name}.count" => 1 * sign,
130
+ "measures.#{measure_name}.sum" => measure_value * sign,
131
+ "measures.#{measure_name}.sum2" => (measure_value * measure_value) * sign
132
+ }
133
+ end
134
+
135
+ end
136
+
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,53 @@
1
+ module Wonkavision
2
+ module Plugins
3
+ module Aggregation
4
+ class AggregationSpec
5
+
6
+ attr_reader :name, :dimensions, :measures, :aggregations, :filter
7
+
8
+ def initialize(name)
9
+ @name = name
10
+ @measures = HashWithIndifferentAccess.new
11
+ @aggregations = []
12
+ @dimensions = HashWithIndifferentAccess.new
13
+ end
14
+
15
+ def dimension(*dimension_names,&block)
16
+ options = dimension_names.extract_options! || {}
17
+ dimension_names.flatten.each do |dim|
18
+ @dimensions[dim] = Dimension.new(dim,options,&block)
19
+ end
20
+ end
21
+
22
+ def measure(*measure_list)
23
+ options = measure_list.extract_options! || {}
24
+ measure_list.flatten.each { |m| self.measures[m] = options }
25
+ end
26
+
27
+ def aggregate_by(*aggregation_list)
28
+ self.aggregations << aggregation_list.flatten
29
+ end
30
+
31
+ def aggregate_all_combinations
32
+ dimension_names = dimensions.keys
33
+ (1..dimension_names.length).each do |combination_size|
34
+ dimension_names.combination(combination_size).each { |combo| aggregate_by *combo}
35
+ end
36
+ end
37
+ alias aggregate_by_all aggregate_all_combinations
38
+
39
+ def filter(&block)
40
+ return @filter unless block
41
+ @filter = block
42
+ end
43
+
44
+ def matches(message)
45
+ return true unless filter
46
+ filter.arity == 0 ? filter.call : filter.call(message)
47
+ end
48
+
49
+ end
50
+ end
51
+ end
52
+
53
+ end
@@ -0,0 +1,22 @@
1
+ require "set"
2
+
3
+ module Wonkavision
4
+ module Plugins
5
+ module Aggregation
6
+ class Attribute
7
+ attr_reader :name, :options
8
+
9
+ def initialize(name,options={})
10
+ @name = name
11
+ @options = options
12
+ end
13
+
14
+ def extract(message)
15
+ message[name.to_s]
16
+ end
17
+
18
+ end
19
+ end
20
+ end
21
+ end
22
+
@@ -0,0 +1,64 @@
1
+ require "set"
2
+
3
+ module Wonkavision
4
+ module Plugins
5
+ module Aggregation
6
+ class Dimension
7
+ attr_reader :name, :attributes, :options, :from
8
+ attr_writer :key, :sort, :caption
9
+
10
+ def initialize(name,options={},&block)
11
+ @name = name
12
+ @options = options
13
+ @attributes = HashWithIndifferentAccess.new
14
+ @from = options[:from]
15
+ key options[:key] if options[:key]
16
+ sort options[:sort] if options[:sort]
17
+ caption options[:caption] if options[:caption]
18
+ self.instance_eval(&block) if block
19
+ key name unless key
20
+ end
21
+
22
+ def attribute(*attribute_list)
23
+ raise "No attribute names were specified when calling '#attribute'" if
24
+ attribute_list.blank?
25
+
26
+ options = attribute_list.extract_options! || {}
27
+ attribute_list.flatten.each do |attribute|
28
+ @attributes[attribute] = Attribute.new(attribute,options)
29
+ end
30
+ end
31
+
32
+ def sort(sort_key = nil, options={})
33
+ return @sort || @key unless sort_key
34
+ attribute(sort_key, options) unless attributes[sort_key]
35
+ @sort = sort_key
36
+ end
37
+ alias :sort_by :sort
38
+
39
+ def caption(caption_key=nil, options={})
40
+ return @caption || @key unless caption_key
41
+ attribute(caption_key, options) unless attributes[caption_key]
42
+ @caption = caption_key
43
+ end
44
+
45
+ def key(key=nil, options={})
46
+ return @key unless key
47
+ attribute(key, options) unless attributes[key]
48
+ @key = key
49
+ end
50
+
51
+ def extract(data)
52
+ dimension_data = data[from.to_s] if from
53
+ dimension_data ||= data[name.to_s] if data[name.to_s].kind_of?(Hash)
54
+ dimension_data ||= data
55
+ attributes.values.inject({}) do |message,attribute|
56
+ message.tap { |m| m[attribute.name.to_s] = attribute.extract(dimension_data)};
57
+ end
58
+ end
59
+
60
+ end
61
+ end
62
+ end
63
+ end
64
+
@@ -0,0 +1,240 @@
1
+ # This class is based off of
2
+ # https://github.com/josephruscio/aggregate and
3
+ # https://github.com/afurmanov/aggregate
4
+ #
5
+ # Copyright (c) 2009 Joseph Ruscio
6
+ #
7
+ #Permission is hereby granted, free of charge, to any person
8
+ #obtaining a copy of this software and associated documentation
9
+ #files (the "Software"), to deal in the Software without
10
+ #restriction, including without limitation the rights to use,
11
+ #copy, modify, merge, publish, distribute, sublicense, and/or sell
12
+ #copies of the Software, and to permit persons to whom the
13
+ #Software is furnished to do so, subject to the following
14
+ #conditions:
15
+ #
16
+ #The above copyright notice and this permission notice shall be
17
+ #included in all copies or substantial portions of the Software.
18
+ #
19
+ #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
+ #EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
21
+ #OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22
+ #NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
23
+ #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24
+ #WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25
+ #FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
26
+ #OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ module Wonkavision
29
+ module Plugins
30
+ module Aggregation
31
+ class Measure
32
+ #The current average of all samples
33
+ attr_reader :mean
34
+
35
+ #The current number of samples
36
+ attr_reader :count
37
+
38
+ #The maximum sample value
39
+ attr_reader :max
40
+
41
+ #The minimum samples value
42
+ attr_reader :min
43
+
44
+ #The sum of all samples
45
+ attr_reader :sum
46
+
47
+ #The number of samples falling below the lowest valued histogram bucket
48
+ attr_reader :outliers_low
49
+
50
+ #The number of samples falling above the highest valued histogram bucket
51
+ attr_reader :outliers_high
52
+
53
+ DEFAULT_LOG_BUCKETS = 8
54
+
55
+ # The number of buckets in the binary logarithmic histogram (low => 2**0, high => 2**@@LOG_BUCKETS)
56
+ def log_buckets
57
+ @log_buckets
58
+ end
59
+
60
+ # Create a new Aggregate that maintains a binary logarithmic histogram
61
+ # by default. Specifying values for low, high, and width configures
62
+ # the aggregate to maintain a linear histogram with (high - low)/width buckets
63
+ def initialize (options={})
64
+ low = options[:low]
65
+ high = options[:high]
66
+ width = options[:width]
67
+ @log_buckets = options[:log_buckets] || DEFAULT_LOG_BUCKETS
68
+ @count = 0
69
+ @sum = 0.0
70
+ @sum2 = 0.0
71
+ @outliers_low = 0
72
+ @outliers_high = 0
73
+
74
+ # If the user asks we maintain a linear histogram where
75
+ # values in the range [low, high) are bucketed in multiples
76
+ # of width
77
+ if (nil != low && nil != high && nil != width)
78
+
79
+ #Validate linear specification
80
+ if high <= low
81
+ raise ArgumentError, "High bucket must be > Low bucket"
82
+ end
83
+
84
+ if high - low < width
85
+ raise ArgumentError, "Histogram width must be <= histogram range"
86
+ end
87
+
88
+ if 0 != (high - low).modulo(width)
89
+ raise ArgumentError, "Histogram range (high - low) must be a multiple of width"
90
+ end
91
+
92
+ @low = low
93
+ @high = high
94
+ @width = width
95
+ else
96
+ low ||= 1
97
+ @low = 1
98
+ @low = to_bucket(to_index(low))
99
+ @high = to_bucket(to_index(@low) + log_buckets - 1)
100
+ end
101
+
102
+ #Initialize all buckets to 0
103
+ @buckets = Array.new(bucket_count, 0)
104
+ end
105
+
106
+ # Include a sample in the aggregate
107
+ def add data
108
+
109
+ # Update min/max
110
+ if 0 == @count
111
+ @min = data
112
+ @max = data
113
+ else
114
+ @max = [data, @max].max
115
+ @min = [data, @min].min
116
+ end
117
+
118
+ # Update the running info
119
+ @count += 1
120
+ @sum += data
121
+ @sum2 += (data * data)
122
+
123
+ # Update the bucket
124
+ @buckets[to_index(data)] += 1 unless outlier?(data)
125
+ end
126
+ alias << add
127
+
128
+ def reject(data)
129
+ @min = Wonkavision::NaN
130
+ @max = Wonkavision::NaN
131
+ @count -= 1
132
+ @sum -= data
133
+ @sum2 -= (data * data)
134
+ @buckets[to_index(data)] -= 1 unless outlier?(data, true)
135
+ end
136
+ alias >> reject
137
+
138
+ def mean
139
+ @sum / @count
140
+ end
141
+
142
+ #Calculate the standard deviation
143
+ def std_dev
144
+ return Wonkavision::NaN unless @count > 1
145
+ Math.sqrt((@sum2.to_f - ((@sum.to_f * @sum.to_f)/@count.to_f)) / (@count.to_f - 1))
146
+ end
147
+
148
+ #Iterate through each bucket in the histogram regardless of
149
+ #its contents
150
+ def each
151
+ @buckets.each_with_index do |count, index|
152
+ yield(to_bucket(index), count)
153
+ end
154
+ end
155
+
156
+ #Iterate through only the buckets in the histogram that contain
157
+ #samples
158
+ def each_nonzero
159
+ @buckets.each_with_index do |count, index|
160
+ yield(to_bucket(index), count) if count != 0
161
+ end
162
+ end
163
+
164
+ # log2(x) returns j, | i = j-1 and 2**i <= data < 2**j
165
+ @@LOG2_DIVEDEND = Math.log(2)
166
+ def self.log2( x )
167
+ Math.log(x) / @@LOG2_DIVEDEND
168
+ end
169
+ private
170
+
171
+ def linear?
172
+ nil != @width
173
+ end
174
+
175
+ def outlier? (data, remove=false)
176
+ delta = remove ? -1 : 1
177
+ if data < @low
178
+ @outliers_low += delta
179
+ elsif data >= @high
180
+ @outliers_high += delta
181
+ else
182
+ return false
183
+ end
184
+ end
185
+
186
+ def bucket_count
187
+ if linear?
188
+ return (@high-@low)/@width
189
+ else
190
+ return log_buckets
191
+ end
192
+ end
193
+
194
+ def to_bucket(index)
195
+ if linear?
196
+ return @low + (index * @width)
197
+ else
198
+ return 2**(log2(@low) + index)
199
+ end
200
+ end
201
+
202
+ def right_bucket? index, data
203
+
204
+ # check invariant
205
+ raise unless linear?
206
+
207
+ bucket = to_bucket(index)
208
+
209
+ #It's the right bucket if data falls between bucket and next bucket
210
+ bucket <= data && data < bucket + @width
211
+ end
212
+
213
+ # A data point is added to the bucket[n] where the data point
214
+ # is less than the value represented by bucket[n], but greater
215
+ # than the value represented by bucket[n+1]
216
+
217
+ def to_index (data)
218
+
219
+ # basic case is simple
220
+ return log2([1,data/@low].max).to_i if !linear?
221
+
222
+ # Search for the right bucket in the linear case
223
+ @buckets.each_with_index do |count, idx|
224
+ return idx if right_bucket?(idx, data)
225
+ end
226
+ #find_bucket(0, bucket_count-1, data)
227
+
228
+ #Should not get here
229
+ raise "#{data}"
230
+ end
231
+
232
+ def log2(x)
233
+ self.class.log2(x)
234
+ end
235
+
236
+ end
237
+ end
238
+ end
239
+ end
240
+