cubicle 0.1.20 → 0.1.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +5 -0
- data/cubicle.gemspec +8 -2
- data/lib/cubicle.rb +2 -0
- data/lib/cubicle/aggregation/aggregation_manager.rb +38 -70
- data/lib/cubicle/aggregation/aggregation_metadata.rb +121 -0
- data/lib/cubicle/aggregation/cubicle_metadata.rb +30 -0
- data/lib/cubicle/aggregation/dsl.rb +1 -1
- data/lib/cubicle/version.rb +1 -1
- data/test/cubicle/aggregation/aggregation_metadata_test.rb +89 -0
- data/test/cubicle/aggregation/cubicle_metadata_test.rb +9 -0
- data/test/cubicle/cubicle_aggregation_test.rb +13 -11
- data/test/cubicle/cubicle_query_test.rb +1 -1
- data/test/cubicle/mongo_mapper/aggregate_plugin_test.rb +2 -6
- data/test/log/test.log +84495 -1228
- metadata +9 -3
data/CHANGELOG.rdoc
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
==0.1.21
|
|
2
|
+
*Added metadata tables in the database for cubicle to manage aggregation info. This was necessary because previously
|
|
3
|
+
I was trying to overload the collection name with metadata, which was making the names longer than MongoDb could support
|
|
4
|
+
and causing errors. This change will enable richer monitoring and profiling and optimization in the near future.
|
|
5
|
+
|
|
1
6
|
==0.1.20
|
|
2
7
|
*Updated to work with mongo driver 1.0 (and therefore latest versions of MongoMapper)
|
|
3
8
|
|
data/cubicle.gemspec
CHANGED
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = %q{cubicle}
|
|
8
|
-
s.version = "0.1.
|
|
8
|
+
s.version = "0.1.21"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["Nathan Stults"]
|
|
12
|
-
s.date = %q{2010-05-
|
|
12
|
+
s.date = %q{2010-05-05}
|
|
13
13
|
s.description = %q{Cubicle provides a dsl and aggregation caching framework for automating the generation, execution and caching of map reduce queries when using MongoDB in Ruby. Cubicle also includes a MongoMapper plugin for quickly performing ad-hoc, multi-level group-by queries against a MongoMapper model.}
|
|
14
14
|
s.email = %q{hereiam@sonic.net}
|
|
15
15
|
s.extra_rdoc_files = [
|
|
@@ -27,7 +27,9 @@ Gem::Specification.new do |s|
|
|
|
27
27
|
"lib/cubicle/aggregation.rb",
|
|
28
28
|
"lib/cubicle/aggregation/ad_hoc.rb",
|
|
29
29
|
"lib/cubicle/aggregation/aggregation_manager.rb",
|
|
30
|
+
"lib/cubicle/aggregation/aggregation_metadata.rb",
|
|
30
31
|
"lib/cubicle/aggregation/aggregation_view.rb",
|
|
32
|
+
"lib/cubicle/aggregation/cubicle_metadata.rb",
|
|
31
33
|
"lib/cubicle/aggregation/dsl.rb",
|
|
32
34
|
"lib/cubicle/aggregation/map_reduce_helper.rb",
|
|
33
35
|
"lib/cubicle/bucketized_dimension.rb",
|
|
@@ -55,6 +57,8 @@ Gem::Specification.new do |s|
|
|
|
55
57
|
"lib/cubicle/version.rb",
|
|
56
58
|
"test/config/database.yml",
|
|
57
59
|
"test/cubicle/aggregation/ad_hoc_test.rb",
|
|
60
|
+
"test/cubicle/aggregation/aggregation_metadata_test.rb",
|
|
61
|
+
"test/cubicle/aggregation/cubicle_metadata_test.rb",
|
|
58
62
|
"test/cubicle/bucketized_dimension_test.rb",
|
|
59
63
|
"test/cubicle/cubicle_aggregation_test.rb",
|
|
60
64
|
"test/cubicle/cubicle_query_test.rb",
|
|
@@ -77,6 +81,8 @@ Gem::Specification.new do |s|
|
|
|
77
81
|
s.summary = %q{Pseudo-Multi Dimensional analysis / simplified aggregation for MongoDB in Ruby (NOLAP ;))}
|
|
78
82
|
s.test_files = [
|
|
79
83
|
"test/cubicle/aggregation/ad_hoc_test.rb",
|
|
84
|
+
"test/cubicle/aggregation/aggregation_metadata_test.rb",
|
|
85
|
+
"test/cubicle/aggregation/cubicle_metadata_test.rb",
|
|
80
86
|
"test/cubicle/bucketized_dimension_test.rb",
|
|
81
87
|
"test/cubicle/cubicle_aggregation_test.rb",
|
|
82
88
|
"test/cubicle/cubicle_query_test.rb",
|
data/lib/cubicle.rb
CHANGED
|
@@ -25,6 +25,8 @@ dir = File.dirname(__FILE__)
|
|
|
25
25
|
"data/level",
|
|
26
26
|
"data/hierarchy",
|
|
27
27
|
"data/table",
|
|
28
|
+
"aggregation/aggregation_metadata",
|
|
29
|
+
"aggregation/cubicle_metadata",
|
|
28
30
|
"aggregation/aggregation_view",
|
|
29
31
|
"aggregation/aggregation_manager",
|
|
30
32
|
"aggregation/map_reduce_helper",
|
|
@@ -2,10 +2,11 @@ module Cubicle
|
|
|
2
2
|
module Aggregation
|
|
3
3
|
class AggregationManager
|
|
4
4
|
|
|
5
|
-
attr_reader :aggregation
|
|
5
|
+
attr_reader :aggregation, :metadata
|
|
6
6
|
|
|
7
7
|
def initialize(aggregation)
|
|
8
8
|
@aggregation = aggregation
|
|
9
|
+
@metadata = Cubicle::Aggregation::CubicleMetadata.new(aggregation)
|
|
9
10
|
end
|
|
10
11
|
|
|
11
12
|
def database
|
|
@@ -32,26 +33,28 @@ module Cubicle
|
|
|
32
33
|
|
|
33
34
|
find_options[:sort] = prepare_order_by(query)
|
|
34
35
|
filter = {}
|
|
36
|
+
|
|
35
37
|
if query == aggregation || query.transient?
|
|
36
|
-
|
|
38
|
+
reduction = aggregate(query,options)
|
|
37
39
|
else
|
|
38
40
|
process_if_required
|
|
39
|
-
|
|
41
|
+
agg_data = aggregation_for(query)
|
|
42
|
+
reduction = agg_data.collection
|
|
40
43
|
#if the query exactly matches the aggregation in terms of requested members, we can issue a simple find
|
|
41
44
|
#otherwise, a second map reduce is required to reduce the data set one last time
|
|
42
|
-
if query.all_dimensions? || (
|
|
45
|
+
if query.all_dimensions? || (agg_data.member_names - query.member_names - [:all_measures]).blank?
|
|
43
46
|
filter = prepare_filter(query,options[:where] || {})
|
|
44
47
|
else
|
|
45
|
-
|
|
48
|
+
reduction = aggregate(query,:source_collection=>agg_data.target_collection_name)
|
|
46
49
|
end
|
|
47
50
|
end
|
|
48
51
|
|
|
49
|
-
if
|
|
50
|
-
Cubicle::Data::Table.new(query,[],0)
|
|
52
|
+
if reduction.blank?
|
|
53
|
+
Cubicle::Data::Table.new(query,[],0)
|
|
51
54
|
else
|
|
52
|
-
count =
|
|
53
|
-
results =
|
|
54
|
-
|
|
55
|
+
count = reduction.count
|
|
56
|
+
results = reduction.find(filter,find_options).to_a
|
|
57
|
+
reduction.drop if reduction.name =~ /^tmp.mr.*/
|
|
55
58
|
Cubicle::Data::Table.new(query, results, count)
|
|
56
59
|
end
|
|
57
60
|
|
|
@@ -77,44 +80,42 @@ module Cubicle
|
|
|
77
80
|
|
|
78
81
|
def expire!
|
|
79
82
|
collection.drop
|
|
80
|
-
|
|
83
|
+
@metadata.expire!
|
|
81
84
|
end
|
|
82
85
|
|
|
83
|
-
|
|
86
|
+
def aggregate(query,options={})
|
|
87
|
+
view = AggregationView.new(aggregation,query)
|
|
84
88
|
|
|
85
|
-
|
|
86
|
-
database.collection_names.select {|col_name|col_name=~/#{aggregation.target_collection_name}_aggregation_(.*)/}
|
|
87
|
-
end
|
|
89
|
+
map, reduce = MapReduceHelper.generate_map_function(query), MapReduceHelper.generate_reduce_function
|
|
88
90
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
91
|
+
options[:finalize] = MapReduceHelper.generate_finalize_function(query)
|
|
92
|
+
options["query"] = expand_template(prepare_filter(query,options[:where] || {}),view)
|
|
93
|
+
|
|
94
|
+
query.source_collection_name = options.delete(:source_collection) || query.source_collection_name || aggregation.source_collection_name
|
|
92
95
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
96
|
+
target_collection = options.delete(:target_collection)
|
|
97
|
+
target_collection ||= query.target_collection_name if query.respond_to?(:target_collection_name)
|
|
98
|
+
|
|
99
|
+
options[:out] = target_collection unless target_collection.blank? || query.transient?
|
|
100
|
+
|
|
101
|
+
#This is defensive - some tests run without ever initializing any collections
|
|
102
|
+
unless database.collection_names.include?(query.source_collection_name)
|
|
103
|
+
Cubicle.logger.info "No collection was found in the database with a name of #{query.source_collection_name}"
|
|
104
|
+
return []
|
|
98
105
|
end
|
|
99
106
|
|
|
100
|
-
|
|
101
|
-
#we are sorting by length because the aggregation with the least number of members
|
|
102
|
-
#is likely to be the most efficient data source as it will likely contain the smallest number of rows.
|
|
103
|
-
#this will not always be true, and situations may exist where it is rarely true, however the alternative
|
|
104
|
-
#is to actually count rows of candidates, which seems a bit wasteful. Of course only the profiler knows,
|
|
105
|
-
#but until there is some reason to believe the aggregation caching process needs be highly performant,
|
|
106
|
-
#this should do for now.
|
|
107
|
-
candidates = existing.select {|candidate|(dimension_names - candidate).blank?}.sort {|a,b|a.length <=> b.length}
|
|
107
|
+
result = database[query.source_collection_name].map_reduce(expand_template(map, view),reduce,options)
|
|
108
108
|
|
|
109
|
-
|
|
110
|
-
#we'll just use the base cubes aggregation collection
|
|
111
|
-
return target_collection_name if candidates.blank?
|
|
112
|
-
"#{target_collection_name}_aggregation_#{candidates[0].join('.')}"
|
|
109
|
+
ensure_indexes(target_collection,query.dimension_names) if target_collection
|
|
113
110
|
|
|
111
|
+
result
|
|
114
112
|
end
|
|
115
113
|
|
|
114
|
+
protected
|
|
115
|
+
|
|
116
|
+
|
|
116
117
|
def aggregation_for(query)
|
|
117
|
-
return collection if query.all_dimensions?
|
|
118
|
+
#return collection if query.all_dimensions?
|
|
118
119
|
|
|
119
120
|
aggregation_query = query.clone
|
|
120
121
|
#If the query needs to filter on a field, it had better be in the aggregation...if it isn't a $where filter...
|
|
@@ -122,15 +123,7 @@ module Cubicle
|
|
|
122
123
|
filter.keys.each {|filter_key|aggregation_query.select(filter_key) unless filter_key=~/\$where/} unless filter.blank?
|
|
123
124
|
|
|
124
125
|
dimension_names = aggregation_query.dimension_names.sort
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
unless database.collection_names.include?(agg_col_name)
|
|
128
|
-
source_col_name = find_best_source_collection(dimension_names)
|
|
129
|
-
exec_query = aggregation.query(dimension_names + [:all_measures], :source_collection=>source_col_name, :defer=>true)
|
|
130
|
-
aggregate(exec_query, :target_collection=>agg_col_name)
|
|
131
|
-
end
|
|
132
|
-
|
|
133
|
-
database[agg_col_name]
|
|
126
|
+
@metadata.aggregation_for(dimension_names)
|
|
134
127
|
end
|
|
135
128
|
|
|
136
129
|
def ensure_indexes(collection_name,dimension_names)
|
|
@@ -146,31 +139,6 @@ module Cubicle
|
|
|
146
139
|
#col.create_index(dimension_names.map{|dim|[dim,1]})
|
|
147
140
|
end
|
|
148
141
|
|
|
149
|
-
def aggregate(query,options={})
|
|
150
|
-
view = AggregationView.new(aggregation,query)
|
|
151
|
-
|
|
152
|
-
map, reduce = MapReduceHelper.generate_map_function(query), MapReduceHelper.generate_reduce_function
|
|
153
|
-
|
|
154
|
-
options[:finalize] = MapReduceHelper.generate_finalize_function(query)
|
|
155
|
-
options["query"] = expand_template(prepare_filter(query,options[:where] || {}),view)
|
|
156
|
-
|
|
157
|
-
query.source_collection_name = options.delete(:source_collection) || query.source_collection_name || aggregation.source_collection_name
|
|
158
|
-
|
|
159
|
-
target_collection = options.delete(:target_collection)
|
|
160
|
-
target_collection ||= query.target_collection_name if query.respond_to?(:target_collection_name)
|
|
161
|
-
|
|
162
|
-
options[:out] = target_collection unless target_collection.blank? || query.transient?
|
|
163
|
-
|
|
164
|
-
#This is defensive - some tests run without ever initializing any collections
|
|
165
|
-
return [] unless database.collection_names.include?(query.source_collection_name)
|
|
166
|
-
|
|
167
|
-
result = database[query.source_collection_name].map_reduce(expand_template(map, view),reduce,options)
|
|
168
|
-
|
|
169
|
-
ensure_indexes(target_collection,query.dimension_names) if target_collection
|
|
170
|
-
|
|
171
|
-
result
|
|
172
|
-
end
|
|
173
|
-
|
|
174
142
|
def expand_template(template,view)
|
|
175
143
|
return "" unless template
|
|
176
144
|
return Mustache.render(template,view) if template.is_a?(String)
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
module Cubicle
|
|
2
|
+
module Aggregation
|
|
3
|
+
class AggregationMetadata
|
|
4
|
+
class << self
|
|
5
|
+
|
|
6
|
+
def collection
|
|
7
|
+
@@aggregations_collection_name ||= "#{Cubicle::Aggregation::CubicleMetadata.collection.name}.aggregations"
|
|
8
|
+
Cubicle.mongo.database[@@aggregations_collection_name]
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def collection=(collection_name)
|
|
12
|
+
@@aggregations_collection_name = collection_name
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def min_records_to_reduce
|
|
16
|
+
@min_records_to_reduce ||= 100
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def min_records_to_reduce=(min)
|
|
20
|
+
@min_records_to_reduce = min
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def expire(aggregation)
|
|
24
|
+
aggregation_name = case aggregation
|
|
25
|
+
when String then aggregation
|
|
26
|
+
when Symbol then aggregation.to_s
|
|
27
|
+
when Cubicle::Aggregation::CubicleMetadata then aggregation.aggregation.name
|
|
28
|
+
else aggregation.name
|
|
29
|
+
end
|
|
30
|
+
Cubicle.mongo.database.collection_names.each do |col|
|
|
31
|
+
Cubicle.mongo.database[col].drop if col =~ /cubicle.aggregation.#{aggregation_name}._*/i
|
|
32
|
+
collection.remove(:aggregation=>aggregation_name)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def initialize(cubicle_metadata,member_names_or_attribute_hash)
|
|
38
|
+
@cubicle_metadata = cubicle_metadata
|
|
39
|
+
if (member_names_or_attribute_hash.kind_of?(Hash))
|
|
40
|
+
@attributes = member_names_or_attribute_hash
|
|
41
|
+
else
|
|
42
|
+
member_names = member_names_or_attribute_hash
|
|
43
|
+
@candidate_aggregation = self.class.collection.find(
|
|
44
|
+
:aggregation=>@cubicle_metadata.aggregation.name,
|
|
45
|
+
:member_names=>{"$all"=>member_names}, :document_count=>{"$gte"=>0}).sort([:document_count, :asc]).limit(1).next_document
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
#since the operator used in the query was $all, having equal lengths in the original and returned
|
|
49
|
+
#member array means that they are identical, which means that regardless of the number of documents
|
|
50
|
+
#in the aggregation, it is the candidate we want. Otherwise, we'll check to see if we
|
|
51
|
+
#boil down the data further, or just make our soup with what we've got.
|
|
52
|
+
@attributes = @candidate_aggregation if @candidate_aggregation &&
|
|
53
|
+
(@candidate_aggregation["member_names"].length == member_names.length ||
|
|
54
|
+
@candidate_aggregation["document_count"] < self.class.min_records_to_reduce)
|
|
55
|
+
|
|
56
|
+
unless @attributes
|
|
57
|
+
@attributes = HashWithIndifferentAccess.new({:aggregation=>@cubicle_metadata.aggregation.name,
|
|
58
|
+
:member_names=>member_names,
|
|
59
|
+
:document_count=>-1})
|
|
60
|
+
|
|
61
|
+
#materialize the aggregation, and, if the operation was successful,
|
|
62
|
+
#register it as available for use by future queries
|
|
63
|
+
@attributes[:_id] = self.class.collection.insert(@attributes)
|
|
64
|
+
materialize!
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def target_collection_name
|
|
71
|
+
"cubicle.aggregation.#{@cubicle_metadata.aggregation.name}._#{@attributes["_id"].to_s}"
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def source_collection_name
|
|
75
|
+
if @candidate_aggregation
|
|
76
|
+
candidate = Cubicle::Aggregation::AggregationMetadata.new(@cubicle_metadata,@candidate_aggregation)
|
|
77
|
+
return candidate.target_collection_name
|
|
78
|
+
end
|
|
79
|
+
@cubicle_metadata.aggregation.target_collection_name
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def member_names; @attributes["member_names"] || []; end
|
|
83
|
+
|
|
84
|
+
def materialized?
|
|
85
|
+
document_count >= 0 &&
|
|
86
|
+
(!@collection.blank? ||
|
|
87
|
+
Cubicle.mongo.database.collection_names.include?(target_collection_name))
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def collection
|
|
91
|
+
@collection ||= Cubicle.mongo.database[target_collection_name] if materialized?
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def collection=(collection)
|
|
95
|
+
@collection = collection
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def document_count
|
|
99
|
+
@attributes["document_count"]
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
protected
|
|
103
|
+
def update_document_count!(new_doc_count)
|
|
104
|
+
self.class.collection.update({:_id=>@attributes[:_id]}, "$set"=>{:document_count=>new_doc_count})
|
|
105
|
+
@attributes["document_count"]=new_doc_count
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def materialize!
|
|
109
|
+
unless materialized?
|
|
110
|
+
exec_query = @cubicle_metadata.aggregation.query(member_names + [:all_measures],
|
|
111
|
+
:source_collection=>source_collection_name,
|
|
112
|
+
:defer=>true)
|
|
113
|
+
self.collection = @cubicle_metadata.aggregation.aggregator.aggregate(exec_query,
|
|
114
|
+
:target_collection=>target_collection_name)
|
|
115
|
+
end
|
|
116
|
+
update_document_count!(@collection.count) unless @collection.blank?
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
module Cubicle
|
|
2
|
+
module Aggregation
|
|
3
|
+
class CubicleMetadata
|
|
4
|
+
|
|
5
|
+
class << self
|
|
6
|
+
|
|
7
|
+
def collection
|
|
8
|
+
@@collection_name ||= "cubicle.metadata"
|
|
9
|
+
Cubicle.mongo.database[@@collection_name]
|
|
10
|
+
end
|
|
11
|
+
def collection=(collection_name)
|
|
12
|
+
@@collection_name = collection_name
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
attr_reader :aggregation
|
|
17
|
+
def initialize(aggregation)
|
|
18
|
+
@aggregation = aggregation
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def aggregation_for(member_names = [])
|
|
22
|
+
AggregationMetadata.new(self,member_names)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def expire!
|
|
26
|
+
AggregationMetadata.expire(self)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -11,7 +11,7 @@ module Cubicle
|
|
|
11
11
|
def target_collection_name(collection_name = nil)
|
|
12
12
|
return nil if transient?
|
|
13
13
|
return @target_name = collection_name if collection_name
|
|
14
|
-
@target_name ||= "
|
|
14
|
+
@target_name ||= "cubicle.fact.#{name.blank? ? source_collection_name : name.underscore}"
|
|
15
15
|
end
|
|
16
16
|
alias target_collection_name= target_collection_name
|
|
17
17
|
|
data/lib/cubicle/version.rb
CHANGED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
require "test_helper"
|
|
2
|
+
|
|
3
|
+
class AggregationMetadataTest < ActiveSupport::TestCase
|
|
4
|
+
context "Class level collection names" do
|
|
5
|
+
should "use appropriate default values for the aggregations collection" do
|
|
6
|
+
assert_equal "cubicle.metadata.aggregations", Cubicle::Aggregation::AggregationMetadata.collection.name
|
|
7
|
+
end
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
context "AggregationMetadata.update_document_count" do
|
|
11
|
+
setup do
|
|
12
|
+
@cm = Cubicle::Aggregation::CubicleMetadata.new(DefectCubicle)
|
|
13
|
+
end
|
|
14
|
+
should "update the document count for a given aggregation instance" do
|
|
15
|
+
agg_info = Cubicle::Aggregation::AggregationMetadata.new(@cm,[:product])
|
|
16
|
+
agg_info.send(:update_document_count!,1024)
|
|
17
|
+
assert_equal 1024, agg_info.document_count
|
|
18
|
+
assert_equal false,agg_info.materialized?
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
context "AggregationMetadata#new" do
|
|
23
|
+
setup do
|
|
24
|
+
@cm = Cubicle::Aggregation::CubicleMetadata.new(DefectCubicle)
|
|
25
|
+
end
|
|
26
|
+
should "create initialize an instance of AggregationMetadata in the database" do
|
|
27
|
+
agg_info = Cubicle::Aggregation::AggregationMetadata.new(@cm,[:product,:region])
|
|
28
|
+
assert /cubicle.aggregation.DefectCubicle._+/ =~ agg_info.target_collection_name
|
|
29
|
+
assert_equal [:product,:region], agg_info.member_names
|
|
30
|
+
assert_equal false, agg_info.materialized?
|
|
31
|
+
assert_nil agg_info.collection
|
|
32
|
+
end
|
|
33
|
+
should "fetch an existing aggregation from the database" do
|
|
34
|
+
ag = Cubicle::Aggregation::AggregationMetadata.new(@cm,[:product,:region])
|
|
35
|
+
ag.send(:update_document_count!,1)
|
|
36
|
+
col_name = ag.target_collection_name
|
|
37
|
+
assert_equal col_name, Cubicle::Aggregation::AggregationMetadata.new(@cm,[:product,:region]).target_collection_name
|
|
38
|
+
end
|
|
39
|
+
should "ignore an existing aggregation that does not satisfy all fields" do
|
|
40
|
+
ag = Cubicle::Aggregation::AggregationMetadata.new(@cm,[:product])
|
|
41
|
+
ag.send(:update_document_count!,1)
|
|
42
|
+
col_name = ag.target_collection_name
|
|
43
|
+
assert col_name != Cubicle::Aggregation::AggregationMetadata.new(@cm,[:product,:region]).target_collection_name
|
|
44
|
+
end
|
|
45
|
+
should "select an existing aggregation with rows below the minimum threshold instead of creating a new one" do
|
|
46
|
+
agg_info = Cubicle::Aggregation::AggregationMetadata.new(@cm,[:product,:region,:operator])
|
|
47
|
+
agg_info.send(:update_document_count!,99)
|
|
48
|
+
assert_equal agg_info.target_collection_name, Cubicle::Aggregation::AggregationMetadata.new(@cm,[:product]).target_collection_name
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
should "ignore an existing aggregation with too many rows, but store that aggregation as a candidate source for use when materializing the aggregation" do
|
|
52
|
+
agg_info = Cubicle::Aggregation::AggregationMetadata.new(@cm,[:product,:region,:operator])
|
|
53
|
+
agg_info.send(:update_document_count!,101)
|
|
54
|
+
new_agg_info = Cubicle::Aggregation::AggregationMetadata.new(@cm,[:product])
|
|
55
|
+
assert agg_info.target_collection_name != new_agg_info.target_collection_name
|
|
56
|
+
assert_equal agg_info.target_collection_name, new_agg_info.source_collection_name
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
context "AggregationMetadata#materialize!" do
|
|
61
|
+
should "run a map reduce and produce the resulting collection" do
|
|
62
|
+
Defect.create_test_data
|
|
63
|
+
DefectCubicle.process
|
|
64
|
+
@cm = Cubicle::Aggregation::CubicleMetadata.new(DefectCubicle)
|
|
65
|
+
agg_info = Cubicle::Aggregation::AggregationMetadata.new(@cm,[:product])
|
|
66
|
+
aggregation = agg_info.collection
|
|
67
|
+
assert_not_nil aggregation
|
|
68
|
+
assert aggregation.count > 0
|
|
69
|
+
assert_equal aggregation.count, agg_info.document_count
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
context "AggregationMetadata.expire" do
|
|
74
|
+
should "drop any aggregation columns and remove metadata rows from the database" do
|
|
75
|
+
Defect.create_test_data
|
|
76
|
+
DefectCubicle.process
|
|
77
|
+
@cm = Cubicle::Aggregation::CubicleMetadata.new(DefectCubicle)
|
|
78
|
+
agg_info = Cubicle::Aggregation::AggregationMetadata.new(@cm,[:product])
|
|
79
|
+
|
|
80
|
+
assert Cubicle.mongo.database.collection_names.include?(agg_info.target_collection_name)
|
|
81
|
+
assert Cubicle::Aggregation::AggregationMetadata.collection.find(:aggregation=>"DefectCubicle").count > 0
|
|
82
|
+
|
|
83
|
+
Cubicle::Aggregation::AggregationMetadata.expire(@cm)
|
|
84
|
+
|
|
85
|
+
assert !Cubicle.mongo.database.collection_names.include?(agg_info.target_collection_name)
|
|
86
|
+
assert_equal 0, Cubicle::Aggregation::AggregationMetadata.collection.find(:aggregation=>"DefectCubicle").count
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|