wonkavision 0.5.11 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. data/CHANGELOG.rdoc +3 -0
  2. data/lib/wonkavision.rb +28 -1
  3. data/lib/wonkavision/aggregation.rb +21 -0
  4. data/lib/wonkavision/event_coordinator.rb +19 -7
  5. data/lib/wonkavision/extensions/symbol.rb +55 -0
  6. data/lib/wonkavision/facts.rb +27 -0
  7. data/lib/wonkavision/local_job_queue.rb +28 -0
  8. data/lib/wonkavision/message_mapper.rb +2 -2
  9. data/lib/wonkavision/message_mapper/map.rb +60 -8
  10. data/lib/wonkavision/persistence/mongo.rb +95 -0
  11. data/lib/wonkavision/plugins.rb +2 -1
  12. data/lib/wonkavision/plugins/analytics/aggregation.rb +139 -0
  13. data/lib/wonkavision/plugins/analytics/aggregation/aggregation_spec.rb +53 -0
  14. data/lib/wonkavision/plugins/analytics/aggregation/attribute.rb +22 -0
  15. data/lib/wonkavision/plugins/analytics/aggregation/dimension.rb +64 -0
  16. data/lib/wonkavision/plugins/analytics/aggregation/measure.rb +240 -0
  17. data/lib/wonkavision/plugins/analytics/cellset.rb +171 -0
  18. data/lib/wonkavision/plugins/analytics/facts.rb +106 -0
  19. data/lib/wonkavision/plugins/analytics/handlers/apply_aggregation.rb +35 -0
  20. data/lib/wonkavision/plugins/analytics/handlers/split_by_aggregation.rb +60 -0
  21. data/lib/wonkavision/plugins/analytics/member_filter.rb +106 -0
  22. data/lib/wonkavision/plugins/analytics/mongo.rb +6 -0
  23. data/lib/wonkavision/plugins/analytics/persistence/hash_store.rb +59 -0
  24. data/lib/wonkavision/plugins/analytics/persistence/mongo_store.rb +85 -0
  25. data/lib/wonkavision/plugins/analytics/persistence/store.rb +105 -0
  26. data/lib/wonkavision/plugins/analytics/query.rb +76 -0
  27. data/lib/wonkavision/plugins/event_handling.rb +15 -3
  28. data/lib/wonkavision/version.rb +1 -1
  29. data/test/aggregation_spec_test.rb +99 -0
  30. data/test/aggregation_test.rb +170 -0
  31. data/test/analytics/test_aggregation.rb +78 -0
  32. data/test/apply_aggregation_test.rb +92 -0
  33. data/test/attribute_test.rb +26 -0
  34. data/test/cellset_test.rb +200 -0
  35. data/test/dimension_test.rb +186 -0
  36. data/test/facts_test.rb +146 -0
  37. data/test/hash_store_test.rb +112 -0
  38. data/test/log/test.log +96844 -0
  39. data/test/map_test.rb +48 -1
  40. data/test/measure_test.rb +146 -0
  41. data/test/member_filter_test.rb +143 -0
  42. data/test/mongo_store_test.rb +115 -0
  43. data/test/query_test.rb +106 -0
  44. data/test/split_by_aggregation_test.rb +114 -0
  45. data/test/store_test.rb +71 -0
  46. data/test/symbol_test.rb +62 -0
  47. data/test/test_activity_models.rb +1 -1
  48. data/test/test_aggregation.rb +42 -0
  49. data/test/test_data.tuples +100 -0
  50. data/test/test_helper.rb +7 -0
  51. metadata +57 -5
@@ -4,6 +4,7 @@
4
4
  #names to avoid conflicts.
5
5
  module Wonkavision
6
6
  module Plugins
7
+
7
8
  def wonkavision_plugins
8
9
  @wonkavision_plugins ||= []
9
10
  end
@@ -25,6 +26,6 @@ module Wonkavision
25
26
  wonkavision_plugins << mod
26
27
  end
27
28
  alias use plug
28
-
29
+
29
30
  end
30
31
  end
@@ -0,0 +1,139 @@
1
+ module Wonkavision
2
+ module Plugins
3
+ module Aggregation
4
+
5
+ def self.all
6
+ @@all ||= {}
7
+ end
8
+
9
+ def self.configure(aggregation,options={})
10
+ aggregation.write_inheritable_attribute :aggregation_options, options
11
+ aggregation.class_inheritable_reader :aggregation_options
12
+
13
+ aggregation.write_inheritable_attribute( :aggregation_spec,
14
+ AggregationSpec.new(aggregation.name) )
15
+ aggregation.class_inheritable_reader :aggregation_spec
16
+
17
+ Aggregation.all[aggregation.name] = aggregation
18
+ end
19
+
20
+ module ClassMethods
21
+ def store(new_store=nil)
22
+ if new_store
23
+ store = new_store.kind_of?(Wonkavision::Analytics::Persistence::Store) ? store :
24
+ Wonkavision::Analytics::Persistence::Store[new_store]
25
+
26
+ raise "Could not find a storage type of #{new_store}" unless store
27
+
28
+ store = store.new(self) if store.respond_to?(:new)
29
+
30
+ aggregation_options[:store] = store
31
+ else
32
+ aggregation_options[:store]
33
+ end
34
+ end
35
+
36
+
37
+ def [](dimensions)
38
+ key = [dimension_names(dimensions),dimension_keys(dimensions)]
39
+ @instances ||= HashWithIndifferentAccess.new
40
+ @instances[key] ||= self.new(dimensions)
41
+ end
42
+
43
+ def aggregates(facts_class = nil)
44
+ return aggregation_options[:facts_class] unless facts_class
45
+
46
+ facts_class.aggregations << self
47
+ aggregation_options[:facts_class] = facts_class
48
+ end
49
+ alias facts aggregates
50
+
51
+ def dimension_names(dimensions)
52
+ dimensions.keys.sort
53
+ end
54
+
55
+ def dimension_keys(dimensions)
56
+ dimension_names(dimensions).map do |dim|
57
+ dimensions[dim][self.dimensions[dim].key.to_s]
58
+ end
59
+ end
60
+
61
+ def query(options={},&block)
62
+ raise "Aggregation#query is not valid unless a store has been configured" unless store
63
+ query = Wonkavision::Analytics::Query.new
64
+ query.instance_eval(&block) if block
65
+ query.validate!
66
+
67
+ return query if options[:defer]
68
+
69
+ tuples = store.execute_query(query)
70
+
71
+ Wonkavision::Analytics::CellSet.new( self,
72
+ query,
73
+ tuples )
74
+ end
75
+
76
+
77
+ def method_missing(m,*args,&block)
78
+ aggregation_spec.respond_to?(m) ? aggregation_spec.send(m,*args,&block) : super
79
+ end
80
+ end
81
+
82
+ module InstanceMethods
83
+ attr_reader :dimensions, :measures
84
+
85
+ def initialize(dimensions)
86
+ @dimensions = dimensions
87
+ end
88
+
89
+ def add(measures)
90
+ update(measures, :add)
91
+ end
92
+
93
+ def reject(measures)
94
+ update(measures, :reject)
95
+ end
96
+
97
+ def dimension_names
98
+ @dimension_names ||= self.class.dimension_names(@dimensions)
99
+ end
100
+
101
+ def dimension_keys
102
+ @dimension_keys ||= self.class.dimension_keys(@dimensions)
103
+ end
104
+
105
+ protected
106
+ def update(measures, method)
107
+ aggregation = {
108
+ :dimension_keys => dimension_keys,
109
+ :dimension_names => dimension_names,
110
+ :measures => {},
111
+ :dimensions => @dimensions
112
+ }
113
+
114
+ measures.keys.each do |measure|
115
+ if val = measures[measure]
116
+ aggregation[:measures].merge! measure_changes_for(measure.to_s,
117
+ val,
118
+ method)
119
+ end
120
+
121
+ end
122
+ self.class.store.update_aggregation(aggregation)
123
+ self
124
+ end
125
+
126
+ def measure_changes_for(measure_name, measure_value, update_method)
127
+ sign = update_method.to_s == "reject" ? -1 : 1
128
+ {
129
+ "measures.#{measure_name}.count" => 1 * sign,
130
+ "measures.#{measure_name}.sum" => measure_value * sign,
131
+ "measures.#{measure_name}.sum2" => (measure_value * measure_value) * sign
132
+ }
133
+ end
134
+
135
+ end
136
+
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,53 @@
1
+ module Wonkavision
2
+ module Plugins
3
+ module Aggregation
4
+ class AggregationSpec
5
+
6
+ attr_reader :name, :dimensions, :measures, :aggregations, :filter
7
+
8
+ def initialize(name)
9
+ @name = name
10
+ @measures = HashWithIndifferentAccess.new
11
+ @aggregations = []
12
+ @dimensions = HashWithIndifferentAccess.new
13
+ end
14
+
15
+ def dimension(*dimension_names,&block)
16
+ options = dimension_names.extract_options! || {}
17
+ dimension_names.flatten.each do |dim|
18
+ @dimensions[dim] = Dimension.new(dim,options,&block)
19
+ end
20
+ end
21
+
22
+ def measure(*measure_list)
23
+ options = measure_list.extract_options! || {}
24
+ measure_list.flatten.each { |m| self.measures[m] = options }
25
+ end
26
+
27
+ def aggregate_by(*aggregation_list)
28
+ self.aggregations << aggregation_list.flatten
29
+ end
30
+
31
+ def aggregate_all_combinations
32
+ dimension_names = dimensions.keys
33
+ (1..dimension_names.length).each do |combination_size|
34
+ dimension_names.combination(combination_size).each { |combo| aggregate_by *combo}
35
+ end
36
+ end
37
+ alias aggregate_by_all aggregate_all_combinations
38
+
39
+ def filter(&block)
40
+ return @filter unless block
41
+ @filter = block
42
+ end
43
+
44
+ def matches(message)
45
+ return true unless filter
46
+ filter.arity == 0 ? filter.call : filter.call(message)
47
+ end
48
+
49
+ end
50
+ end
51
+ end
52
+
53
+ end
@@ -0,0 +1,22 @@
1
+ require "set"
2
+
3
+ module Wonkavision
4
+ module Plugins
5
+ module Aggregation
6
+ class Attribute
7
+ attr_reader :name, :options
8
+
9
+ def initialize(name,options={})
10
+ @name = name
11
+ @options = options
12
+ end
13
+
14
+ def extract(message)
15
+ message[name.to_s]
16
+ end
17
+
18
+ end
19
+ end
20
+ end
21
+ end
22
+
@@ -0,0 +1,64 @@
1
+ require "set"
2
+
3
+ module Wonkavision
4
+ module Plugins
5
+ module Aggregation
6
+ class Dimension
7
+ attr_reader :name, :attributes, :options, :from
8
+ attr_writer :key, :sort, :caption
9
+
10
+ def initialize(name,options={},&block)
11
+ @name = name
12
+ @options = options
13
+ @attributes = HashWithIndifferentAccess.new
14
+ @from = options[:from]
15
+ key options[:key] if options[:key]
16
+ sort options[:sort] if options[:sort]
17
+ caption options[:caption] if options[:caption]
18
+ self.instance_eval(&block) if block
19
+ key name unless key
20
+ end
21
+
22
+ def attribute(*attribute_list)
23
+ raise "No attribute names were specified when calling '#attribute'" if
24
+ attribute_list.blank?
25
+
26
+ options = attribute_list.extract_options! || {}
27
+ attribute_list.flatten.each do |attribute|
28
+ @attributes[attribute] = Attribute.new(attribute,options)
29
+ end
30
+ end
31
+
32
+ def sort(sort_key = nil, options={})
33
+ return @sort || @key unless sort_key
34
+ attribute(sort_key, options) unless attributes[sort_key]
35
+ @sort = sort_key
36
+ end
37
+ alias :sort_by :sort
38
+
39
+ def caption(caption_key=nil, options={})
40
+ return @caption || @key unless caption_key
41
+ attribute(caption_key, options) unless attributes[caption_key]
42
+ @caption = caption_key
43
+ end
44
+
45
+ def key(key=nil, options={})
46
+ return @key unless key
47
+ attribute(key, options) unless attributes[key]
48
+ @key = key
49
+ end
50
+
51
+ def extract(data)
52
+ dimension_data = data[from.to_s] if from
53
+ dimension_data ||= data[name.to_s] if data[name.to_s].kind_of?(Hash)
54
+ dimension_data ||= data
55
+ attributes.values.inject({}) do |message,attribute|
56
+ message.tap { |m| m[attribute.name.to_s] = attribute.extract(dimension_data)};
57
+ end
58
+ end
59
+
60
+ end
61
+ end
62
+ end
63
+ end
64
+
@@ -0,0 +1,240 @@
1
+ # This class is based off of
2
+ # https://github.com/josephruscio/aggregate and
3
+ # https://github.com/afurmanov/aggregate
4
+ #
5
+ # Copyright (c) 2009 Joseph Ruscio
6
+ #
7
+ #Permission is hereby granted, free of charge, to any person
8
+ #obtaining a copy of this software and associated documentation
9
+ #files (the "Software"), to deal in the Software without
10
+ #restriction, including without limitation the rights to use,
11
+ #copy, modify, merge, publish, distribute, sublicense, and/or sell
12
+ #copies of the Software, and to permit persons to whom the
13
+ #Software is furnished to do so, subject to the following
14
+ #conditions:
15
+ #
16
+ #The above copyright notice and this permission notice shall be
17
+ #included in all copies or substantial portions of the Software.
18
+ #
19
+ #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
+ #EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
21
+ #OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22
+ #NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
23
+ #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24
+ #WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25
+ #FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
26
+ #OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ module Wonkavision
29
+ module Plugins
30
+ module Aggregation
31
+ class Measure
32
+ #The current average of all samples
33
+ attr_reader :mean
34
+
35
+ #The current number of samples
36
+ attr_reader :count
37
+
38
+ #The maximum sample value
39
+ attr_reader :max
40
+
41
+ #The minimum samples value
42
+ attr_reader :min
43
+
44
+ #The sum of all samples
45
+ attr_reader :sum
46
+
47
+ #The number of samples falling below the lowest valued histogram bucket
48
+ attr_reader :outliers_low
49
+
50
+ #The number of samples falling above the highest valued histogram bucket
51
+ attr_reader :outliers_high
52
+
53
+ DEFAULT_LOG_BUCKETS = 8
54
+
55
+ # The number of buckets in the binary logarithmic histogram (low => 2**0, high => 2**@@LOG_BUCKETS)
56
+ def log_buckets
57
+ @log_buckets
58
+ end
59
+
60
+ # Create a new Aggregate that maintains a binary logarithmic histogram
61
+ # by default. Specifying values for low, high, and width configures
62
+ # the aggregate to maintain a linear histogram with (high - low)/width buckets
63
+ def initialize (options={})
64
+ low = options[:low]
65
+ high = options[:high]
66
+ width = options[:width]
67
+ @log_buckets = options[:log_buckets] || DEFAULT_LOG_BUCKETS
68
+ @count = 0
69
+ @sum = 0.0
70
+ @sum2 = 0.0
71
+ @outliers_low = 0
72
+ @outliers_high = 0
73
+
74
+ # If the user asks we maintain a linear histogram where
75
+ # values in the range [low, high) are bucketed in multiples
76
+ # of width
77
+ if (nil != low && nil != high && nil != width)
78
+
79
+ #Validate linear specification
80
+ if high <= low
81
+ raise ArgumentError, "High bucket must be > Low bucket"
82
+ end
83
+
84
+ if high - low < width
85
+ raise ArgumentError, "Histogram width must be <= histogram range"
86
+ end
87
+
88
+ if 0 != (high - low).modulo(width)
89
+ raise ArgumentError, "Histogram range (high - low) must be a multiple of width"
90
+ end
91
+
92
+ @low = low
93
+ @high = high
94
+ @width = width
95
+ else
96
+ low ||= 1
97
+ @low = 1
98
+ @low = to_bucket(to_index(low))
99
+ @high = to_bucket(to_index(@low) + log_buckets - 1)
100
+ end
101
+
102
+ #Initialize all buckets to 0
103
+ @buckets = Array.new(bucket_count, 0)
104
+ end
105
+
106
+ # Include a sample in the aggregate
107
+ def add data
108
+
109
+ # Update min/max
110
+ if 0 == @count
111
+ @min = data
112
+ @max = data
113
+ else
114
+ @max = [data, @max].max
115
+ @min = [data, @min].min
116
+ end
117
+
118
+ # Update the running info
119
+ @count += 1
120
+ @sum += data
121
+ @sum2 += (data * data)
122
+
123
+ # Update the bucket
124
+ @buckets[to_index(data)] += 1 unless outlier?(data)
125
+ end
126
+ alias << add
127
+
128
+ def reject(data)
129
+ @min = Wonkavision::NaN
130
+ @max = Wonkavision::NaN
131
+ @count -= 1
132
+ @sum -= data
133
+ @sum2 -= (data * data)
134
+ @buckets[to_index(data)] -= 1 unless outlier?(data, true)
135
+ end
136
+ alias >> reject
137
+
138
+ def mean
139
+ @sum / @count
140
+ end
141
+
142
+ #Calculate the standard deviation
143
+ def std_dev
144
+ return Wonkavision::NaN unless @count > 1
145
+ Math.sqrt((@sum2.to_f - ((@sum.to_f * @sum.to_f)/@count.to_f)) / (@count.to_f - 1))
146
+ end
147
+
148
+ #Iterate through each bucket in the histogram regardless of
149
+ #its contents
150
+ def each
151
+ @buckets.each_with_index do |count, index|
152
+ yield(to_bucket(index), count)
153
+ end
154
+ end
155
+
156
+ #Iterate through only the buckets in the histogram that contain
157
+ #samples
158
+ def each_nonzero
159
+ @buckets.each_with_index do |count, index|
160
+ yield(to_bucket(index), count) if count != 0
161
+ end
162
+ end
163
+
164
+ # log2(x) returns j, | i = j-1 and 2**i <= data < 2**j
165
+ @@LOG2_DIVEDEND = Math.log(2)
166
+ def self.log2( x )
167
+ Math.log(x) / @@LOG2_DIVEDEND
168
+ end
169
+ private
170
+
171
+ def linear?
172
+ nil != @width
173
+ end
174
+
175
+ def outlier? (data, remove=false)
176
+ delta = remove ? -1 : 1
177
+ if data < @low
178
+ @outliers_low += delta
179
+ elsif data >= @high
180
+ @outliers_high += delta
181
+ else
182
+ return false
183
+ end
184
+ end
185
+
186
+ def bucket_count
187
+ if linear?
188
+ return (@high-@low)/@width
189
+ else
190
+ return log_buckets
191
+ end
192
+ end
193
+
194
+ def to_bucket(index)
195
+ if linear?
196
+ return @low + (index * @width)
197
+ else
198
+ return 2**(log2(@low) + index)
199
+ end
200
+ end
201
+
202
+ def right_bucket? index, data
203
+
204
+ # check invariant
205
+ raise unless linear?
206
+
207
+ bucket = to_bucket(index)
208
+
209
+ #It's the right bucket if data falls between bucket and next bucket
210
+ bucket <= data && data < bucket + @width
211
+ end
212
+
213
+ # A data point is added to the bucket[n] where the data point
214
+ # is less than the value represented by bucket[n], but greater
215
+ # than the value represented by bucket[n+1]
216
+
217
+ def to_index (data)
218
+
219
+ # basic case is simple
220
+ return log2([1,data/@low].max).to_i if !linear?
221
+
222
+ # Search for the right bucket in the linear case
223
+ @buckets.each_with_index do |count, idx|
224
+ return idx if right_bucket?(idx, data)
225
+ end
226
+ #find_bucket(0, bucket_count-1, data)
227
+
228
+ #Should not get here
229
+ raise "#{data}"
230
+ end
231
+
232
+ def log2(x)
233
+ self.class.log2(x)
234
+ end
235
+
236
+ end
237
+ end
238
+ end
239
+ end
240
+