wonkavision 0.5.11 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. data/CHANGELOG.rdoc +3 -0
  2. data/lib/wonkavision.rb +28 -1
  3. data/lib/wonkavision/aggregation.rb +21 -0
  4. data/lib/wonkavision/event_coordinator.rb +19 -7
  5. data/lib/wonkavision/extensions/symbol.rb +55 -0
  6. data/lib/wonkavision/facts.rb +27 -0
  7. data/lib/wonkavision/local_job_queue.rb +28 -0
  8. data/lib/wonkavision/message_mapper.rb +2 -2
  9. data/lib/wonkavision/message_mapper/map.rb +60 -8
  10. data/lib/wonkavision/persistence/mongo.rb +95 -0
  11. data/lib/wonkavision/plugins.rb +2 -1
  12. data/lib/wonkavision/plugins/analytics/aggregation.rb +139 -0
  13. data/lib/wonkavision/plugins/analytics/aggregation/aggregation_spec.rb +53 -0
  14. data/lib/wonkavision/plugins/analytics/aggregation/attribute.rb +22 -0
  15. data/lib/wonkavision/plugins/analytics/aggregation/dimension.rb +64 -0
  16. data/lib/wonkavision/plugins/analytics/aggregation/measure.rb +240 -0
  17. data/lib/wonkavision/plugins/analytics/cellset.rb +171 -0
  18. data/lib/wonkavision/plugins/analytics/facts.rb +106 -0
  19. data/lib/wonkavision/plugins/analytics/handlers/apply_aggregation.rb +35 -0
  20. data/lib/wonkavision/plugins/analytics/handlers/split_by_aggregation.rb +60 -0
  21. data/lib/wonkavision/plugins/analytics/member_filter.rb +106 -0
  22. data/lib/wonkavision/plugins/analytics/mongo.rb +6 -0
  23. data/lib/wonkavision/plugins/analytics/persistence/hash_store.rb +59 -0
  24. data/lib/wonkavision/plugins/analytics/persistence/mongo_store.rb +85 -0
  25. data/lib/wonkavision/plugins/analytics/persistence/store.rb +105 -0
  26. data/lib/wonkavision/plugins/analytics/query.rb +76 -0
  27. data/lib/wonkavision/plugins/event_handling.rb +15 -3
  28. data/lib/wonkavision/version.rb +1 -1
  29. data/test/aggregation_spec_test.rb +99 -0
  30. data/test/aggregation_test.rb +170 -0
  31. data/test/analytics/test_aggregation.rb +78 -0
  32. data/test/apply_aggregation_test.rb +92 -0
  33. data/test/attribute_test.rb +26 -0
  34. data/test/cellset_test.rb +200 -0
  35. data/test/dimension_test.rb +186 -0
  36. data/test/facts_test.rb +146 -0
  37. data/test/hash_store_test.rb +112 -0
  38. data/test/log/test.log +96844 -0
  39. data/test/map_test.rb +48 -1
  40. data/test/measure_test.rb +146 -0
  41. data/test/member_filter_test.rb +143 -0
  42. data/test/mongo_store_test.rb +115 -0
  43. data/test/query_test.rb +106 -0
  44. data/test/split_by_aggregation_test.rb +114 -0
  45. data/test/store_test.rb +71 -0
  46. data/test/symbol_test.rb +62 -0
  47. data/test/test_activity_models.rb +1 -1
  48. data/test/test_aggregation.rb +42 -0
  49. data/test/test_data.tuples +100 -0
  50. data/test/test_helper.rb +7 -0
  51. metadata +57 -5
@@ -0,0 +1,171 @@
1
+ require "set"
2
+
3
+ module Wonkavision
4
+ module Analytics
5
+ class CellSet
6
+ attr_reader :axes, :query
7
+
8
+ def initialize(aggregation,query,tuples)
9
+ @axes = []
10
+ @query = query
11
+ dimension_members, @cells = process_tuples(aggregation, query, tuples)
12
+
13
+ query.axes.each do |axis_dimensions|
14
+ @axes << Axis.new(axis_dimensions,dimension_members,aggregation)
15
+ end
16
+ end
17
+
18
+ def columns; axes[0]; end
19
+ def rows; axes[1]; end
20
+ def pages; axex[2]; end
21
+ def chapters; axes[3]; end
22
+ def sections; axes[4]; end
23
+
24
+ def inspect
25
+ "<Cellset #{object_id} select:#{@query.selected_dimensions} where:#{@query.slicer}>"
26
+ end
27
+
28
+ def [](*coordinates)
29
+ key = coordinates.map{ |c|c.to_s }
30
+ @cells[key]
31
+ end
32
+
33
+ def length
34
+ @cells.length
35
+ end
36
+
37
+ private
38
+
39
+ def process_tuples(aggregation, query, tuples)
40
+ dims = {}
41
+ cells = {}
42
+ tuples.each do |record|
43
+ next unless query.matches_filter?(aggregation, record)
44
+ append_to_cell( cells, query, record )
45
+ record["dimension_names"].each_with_index do |dim_name,idx|
46
+ dim = dims[dim_name] ||= {}
47
+ dim_key = record["dimension_keys"][idx]
48
+ dim[dim_key] ||= record["dimensions"][dim_name]
49
+ end
50
+ end
51
+ [dims, cells]
52
+ end
53
+
54
+ def key_for(query,record)
55
+ key = []
56
+ query.selected_dimensions.each_with_index do |dim_name, idx|
57
+ dim_name = dim_name.to_s
58
+ dim_ordinal = record["dimension_names"].index(dim_name)
59
+ key << record["dimension_keys"][dim_ordinal]
60
+ end
61
+ key
62
+ end
63
+
64
+ def append_to_cell(cells, query, record)
65
+ #If a slicer is used for a dimension not on one of the main axes,
66
+ #then we'll have cases where more than one tuple needs to be
67
+ #stuck into a cell. In these cases, we need to aggregate
68
+ #the measure data for that cell on the fly
69
+ cell_key = key_for(query,record)
70
+ measures = record["measures"]
71
+
72
+ cell = cells[cell_key]
73
+ cell ? cell.aggregate(measures) : cells[cell_key] = Cell.new(cell_key,measures)
74
+ end
75
+
76
+ class Axis
77
+ attr_reader :dimensions
78
+ def initialize(dimensions,dimension_members,aggregation)
79
+ @dimensions = []
80
+ dimensions.each do |dim_name|
81
+ definition = aggregation.dimensions[dim_name]
82
+ members = dimension_members[dim_name.to_s]
83
+ @dimensions << Dimension.new(dim_name,definition,members)
84
+ end
85
+ end
86
+ end
87
+
88
+ class Dimension
89
+ attr_reader :definition,:members,:name
90
+ def initialize(name,definition,members)
91
+ @name = name.to_s
92
+ @definition = definition
93
+ @members = members ? members.values.map{ |mem_data| Member.new(self,mem_data)}.sort : []
94
+ end
95
+ end
96
+
97
+ class Member
98
+ attr_reader :dimension, :attributes
99
+ def initialize(dimension,member_data)
100
+ @dimension = dimension
101
+ @attributes = member_data
102
+ end
103
+ def caption
104
+ attributes[dimension.definition.caption.to_s]
105
+ end
106
+ def key
107
+ attributes[dimension.definition.key.to_s]
108
+ end
109
+ def sort
110
+ attributes[dimension.definition.sort.to_s]
111
+ end
112
+ def <=>(other)
113
+ sort <=> other.sort
114
+ end
115
+ def to_s
116
+ key.to_s
117
+ end
118
+ end
119
+
120
+ class Cell
121
+ attr_reader :key
122
+ attr_reader :measures
123
+ def initialize(key,measure_data)
124
+ @key = key
125
+ @measures = HashWithIndifferentAccess.new
126
+ measure_data.each_pair do |measure_name,measure|
127
+ @measures[measure_name] = Measure.new(measure_name,measure)
128
+ end
129
+ end
130
+ def aggregate(measure_data)
131
+ measure_data.each_pair do |measure_name,measure_data|
132
+ measure = @measures[measure_name]
133
+ measure ? measure.aggregate(measure_data) :
134
+ @measures[measure_name] = Measure.new(measure_name,measure)
135
+ end
136
+ end
137
+ def method_missing(method,*args)
138
+ measures[method] || super
139
+ end
140
+ end
141
+
142
+ class Measure
143
+ attr_reader :name, :data
144
+ def initialize(name,data)
145
+ @name = name
146
+ @data = data
147
+ end
148
+
149
+ def sum; @data["sum"]; end
150
+ def sum2; @data["sum2"]; end
151
+ def count; @data["count"]; end
152
+
153
+ def mean; sum/count; end
154
+ alias :average :mean
155
+
156
+ def std_dev
157
+ return Wonkavision::NaN unless count > 1
158
+ Math.sqrt((sum2.to_f - ((sum.to_f * sum.to_f)/count.to_f)) / (count.to_f - 1))
159
+ end
160
+
161
+ def aggregate(new_data)
162
+ @data["sum"] = @data["sum"].to_f + new_data["sum"].to_f
163
+ @data["sum2"] = @data["sum2"].to_f + new_data["sum2"].to_f
164
+ @data["count"] = @data["count"].to_i + new_data["count"].to_i
165
+ end
166
+
167
+ end
168
+
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,106 @@
1
+ require "set"
2
+
3
+ module Wonkavision
4
+ module Plugins
5
+ module Facts
6
+
7
+ def self.configure(facts, options ={})
8
+ facts.write_inheritable_attribute :facts_options, options
9
+ facts.class_inheritable_reader :facts_options
10
+
11
+ facts.write_inheritable_attribute :aggregations, []
12
+ facts.class_inheritable_reader :aggregations
13
+ end
14
+
15
+ module ClassMethods
16
+
17
+ def output_event_path(new_path=nil)
18
+ if new_path
19
+ facts_options[:output_event_path] = new_path
20
+ else
21
+ facts_options[:output_event_path] ||=
22
+ Wonkavision.join('wv','analytics','facts','updated')
23
+ end
24
+ end
25
+
26
+ def accept(event_path, options={}, &mapping_block)
27
+ map(event_path, &mapping_block) if mapping_block
28
+ handle event_path do
29
+ accept_event(event_context.data, options)
30
+ end
31
+ end
32
+
33
+ def record_id(new_record_id=nil)
34
+ if new_record_id
35
+ facts_options[:record_id] = new_record_id
36
+ else
37
+ facts_options[:record_id] ||= "id"
38
+ end
39
+ end
40
+
41
+ def store(new_store=nil)
42
+ if new_store
43
+ store = new_store.kind_of?(Wonkavision::Analytics::Persistence::Store) ? store :
44
+ Wonkavision::Analytics::Persistence::Store[new_store]
45
+
46
+ raise "Could not find a storage type of #{new_store}" unless store
47
+
48
+ store = store.new(self) if store.respond_to?(:new)
49
+
50
+ facts_options[:store] = store
51
+ else
52
+ facts_options[:store]
53
+ end
54
+ end
55
+
56
+ end
57
+
58
+ module InstanceMethods
59
+ def accept_event(event_data, options={})
60
+ action = options[:action] || :add
61
+ send "#{action}_facts", event_data
62
+ end
63
+
64
+ def update_facts(data)
65
+ raise "A persistent store must be configured in order to update facts" unless store
66
+
67
+ previous_facts, current_facts = store.update_facts(data)
68
+ unless previous_facts == current_facts
69
+ process_facts previous_facts, "reject" if previous_facts
70
+ process_facts current_facts, "add" if current_facts
71
+ end
72
+ end
73
+
74
+ def add_facts(data)
75
+ current_facts = store ? store.add_facts(data) : data
76
+ process_facts current_facts, "add" if current_facts
77
+ end
78
+
79
+ def reject_facts(data)
80
+ previous_facts = store ? store.remove_facts(data) : data
81
+ process_facts previous_facts, "reject" if previous_facts
82
+ end
83
+
84
+ protected
85
+
86
+ def store
87
+ self.class.store
88
+ end
89
+
90
+ #It is unnecessary to accept multiple actions - this should be removed
91
+ def process_facts(event_data, *actions)
92
+ actions.each do |action|
93
+ self.class.aggregations.each do |aggregation|
94
+ submit self.class.output_event_path, {
95
+ "action" => action,
96
+ "aggregation" => aggregation.name,
97
+ "data" => event_data
98
+ }
99
+ end
100
+ end
101
+ end
102
+
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,35 @@
1
+ module Wonkavision
2
+ module Analytics
3
+ class ApplyAggregation
4
+ include Wonkavision::EventHandler
5
+
6
+ event_namespace Wonkavision.join('wv', 'analytics')
7
+
8
+ handle Wonkavision.join('aggregation', 'updated') do
9
+ process_event(event_context.data)
10
+ end
11
+
12
+ def process_event(event)
13
+ return false unless
14
+ (aggregation = aggregation_for(event["aggregation"])) &&
15
+ (action = event["action"]) &&
16
+ (measures = event["measures"]) &&
17
+ (dimensions = event["dimensions"])
18
+
19
+ raise "The only valid values for 'action' on an aggregation.updated message are 'add' and 'reject', #{action} was encountered. Message: #{event.inspect}" unless ["add", "reject"].include?(action.to_s)
20
+
21
+ #Don't bother to continue if the measures are all nil
22
+ if measures.values.detect{|m|m}
23
+ action.to_s == "add" ? aggregation[dimensions].add(measures) :
24
+ aggregation[dimensions].reject(measures)
25
+ end
26
+
27
+ end
28
+
29
+ def aggregation_for(aggregation_name)
30
+ Wonkavision::Aggregation.all[aggregation_name]
31
+ end
32
+
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,60 @@
1
+ module Wonkavision
2
+ module Analytics
3
+ class SplitByAggregation
4
+ include Wonkavision::EventHandler
5
+
6
+ event_namespace Wonkavision.join('wv', 'analytics')
7
+
8
+ handle Wonkavision.join('facts', 'updated') do
9
+ process_event(event_context.data)
10
+ end
11
+
12
+ def process_event(event)
13
+ return false unless
14
+ (aggregation = aggregation_for(event["aggregation"])) &&
15
+ (action = event["action"]) &&
16
+ (entity = event["data"])
17
+
18
+ return [] unless aggregation.matches(entity)
19
+
20
+ measures = aggregation.measures.keys.inject({}) do |measures,measure|
21
+ measures[measure] = entity[measure.to_s]
22
+ measures
23
+ end
24
+
25
+ messages = split_dimensions_by_aggregation(aggregation,entity).map do |dimensions|
26
+ {
27
+ "action" => action,
28
+ "aggregation" => aggregation.name,
29
+ "dimensions" => dimensions,
30
+ "measures" => measures
31
+ }
32
+ end
33
+ process_aggregations messages
34
+ end
35
+
36
+ def process_aggregations(messages)
37
+ messages = [messages].flatten
38
+ event_path = self.class.event_path( Wonkavision.join('aggregation', 'updated') )
39
+ messages.each { |message| submit(event_path, message) }
40
+ messages
41
+ end
42
+
43
+ def split_dimensions_by_aggregation(aggregation,entity)
44
+ aggregation.aggregations.inject([]) do |aggregations,aggregate_by|
45
+ aggregations << aggregate_by.inject({}) do |dimensions,dimension_name|
46
+ dimension = aggregation.dimensions[dimension_name]
47
+ dimensions[dimension_name.to_s] = dimension.extract(entity)
48
+ dimensions
49
+ end
50
+ aggregations
51
+ end
52
+ end
53
+
54
+ def aggregation_for(aggregation_name)
55
+ Wonkavision::Aggregation.all[aggregation_name]
56
+ end
57
+
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,106 @@
1
+ module Wonkavision
2
+ module Analytics
3
+ class MemberFilter
4
+
5
+ attr_reader :name, :operator, :member_type
6
+ attr_accessor :value
7
+
8
+ def initialize(member_name, options={})
9
+ @name = member_name
10
+ @attribute_name = options[:attribute_name]
11
+ @operator = options[:operator] || options[:op] || :eq
12
+ @member_type = options[:member_type] || :dimension
13
+ @value = options[:value]
14
+ @applied = false
15
+ end
16
+
17
+ def attribute_name
18
+ @attribute_name ||= dimension? ? :key : :count
19
+ end
20
+
21
+ def dimension?
22
+ member_type == :dimension
23
+ end
24
+
25
+ def measure?
26
+ member_type == :measure
27
+ end
28
+
29
+ def applied!
30
+ @applied = true
31
+ end
32
+
33
+ def applied?
34
+ @applied
35
+ end
36
+
37
+ [:gt, :lt, :gte, :lte, :ne, :in, :nin, :eq].each do |operator|
38
+ define_method(operator) do |*args|
39
+ @value = args[0] if args.length > 0
40
+ @operator = operator; self
41
+ end unless method_defined?(operator)
42
+ end
43
+
44
+ def matches(aggregation, tuple)
45
+ #this check allows the database adapter to apply a filter at the db query level
46
+ #Wonkavision will avoid the overhead of checking again if the store signals it has taken care of things
47
+ return true if @applied || tuple.blank?
48
+
49
+ assert_operator_matches_value
50
+
51
+ data = extract_attribute_value_from_tuple(aggregation, tuple)
52
+
53
+ case operator
54
+ when :gt then data ? data > value : false
55
+ when :lt then data ? data < value : false
56
+ when :gte then data ? data >= value : false
57
+ when :lte then data ? data <= value : false
58
+ when :in then value.include?(data)
59
+ when :nin then !value.include?(data)
60
+ when :ne then data != value
61
+ when :eq then value == data
62
+ else raise "Unknown filter operator #{operator}"
63
+ end
64
+ end
65
+
66
+ def attribute_key(aggregation)
67
+ attribute_key = attribute_name.to_s
68
+ #If the attribute name is key, caption or sort, we need to find the real name of the underling
69
+ # attribute
70
+ if dimension?
71
+ dimension = aggregation.dimensions[name]
72
+ raise "Error applying a member filter: Dimension #{name} does not exist" unless dimension
73
+ attribute_key = dimension.send(attribute_name).to_s if dimension.respond_to?(attribute_name)
74
+ end
75
+ attribute_key
76
+ end
77
+
78
+ private
79
+
80
+ # TODO: This is smelly - we should have a Tuple class that knows its aggregation
81
+ # and can return this kind of information on demand - it is dirty business
82
+ # that a filter class has to know the about the anatomy of a tuple to do its
83
+ # job
84
+ def extract_attribute_value_from_tuple(aggregation,tuple)
85
+ val = tuple["#{member_type}s"] #dimensions or measures
86
+ val = val[name.to_s] #measure name or dimension name
87
+
88
+ if val
89
+ val[attribute_key(aggregation)]
90
+ end
91
+ end
92
+
93
+ def assert_operator_matches_value
94
+
95
+ case operator
96
+ when :gt, :lt, :gte, :lte then
97
+ raise "A filter value is required for #{operator}" unless value
98
+ when :in, :nin then
99
+ raise "A filter value is required for #{operator}" unless value
100
+ raise "The filter value for #{operator} must respond to :include?" unless value.respond_to?(:include?)
101
+ end
102
+ end
103
+
104
+ end
105
+ end
106
+ end