cubicle 0.1.24 → 0.1.25
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +6 -0
- data/cubicle.gemspec +2 -2
- data/lib/cubicle.rb +5 -1
- data/lib/cubicle/aggregation/aggregation_manager.rb +2 -2
- data/lib/cubicle/data.rb +28 -28
- data/lib/cubicle/data/hierarchy.rb +55 -55
- data/lib/cubicle/data/level.rb +61 -62
- data/lib/cubicle/data/table.rb +28 -7
- data/lib/cubicle/ordered_hash_with_indifferent_access.rb +1 -2
- data/lib/cubicle/query.rb +7 -3
- data/lib/cubicle/version.rb +1 -1
- data/test/cubicle/cubicle_query_test.rb +3 -1
- data/test/cubicle/data/level_test.rb +34 -41
- data/test/log/test.log +34422 -0
- metadata +3 -3
data/CHANGELOG.rdoc
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
==0.1.25
|
2
|
+
*Modified data extraction to ensure that member and query level aliases are respected in the final result. Also
|
3
|
+
stopped automatically dropping temporary map reduce tables during query execution for performance reasons. Instead,
|
4
|
+
added a static method to the Cubicle module, clear_temp_tables, which can be called at application startup or teardown:
|
5
|
+
Cubicle.clear_temp_tables()
|
6
|
+
|
1
7
|
==0.1.24
|
2
8
|
*Added more detail to the profile for the 'find' action
|
3
9
|
|
data/cubicle.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{cubicle}
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.25"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Nathan Stults"]
|
12
|
-
s.date = %q{2010-05-
|
12
|
+
s.date = %q{2010-05-19}
|
13
13
|
s.description = %q{Cubicle provides a dsl and aggregation caching framework for automating the generation, execution and caching of map reduce queries when using MongoDB in Ruby. Cubicle also includes a MongoMapper plugin for quickly performing ad-hoc, multi-level group-by queries against a MongoMapper model.}
|
14
14
|
s.email = %q{hereiam@sonic.net}
|
15
15
|
s.extra_rdoc_files = [
|
data/lib/cubicle.rb
CHANGED
@@ -52,7 +52,11 @@ module Cubicle
|
|
52
52
|
|
53
53
|
def self.logger
|
54
54
|
@logger ||= (Cubicle.mongo.logger || Logger.new("cubicle.log"))
|
55
|
-
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.clear_temp_tables
|
58
|
+
self.mongo.database.collection_names.each{|cn|self.mongo.database[cn].drop if cn =~ /tmp.mr.mapreduce/i}
|
59
|
+
end
|
56
60
|
end
|
57
61
|
|
58
62
|
#Turn off HTML escaping in Mustache
|
@@ -46,7 +46,7 @@ module Cubicle
|
|
46
46
|
if query.all_dimensions? || (agg_data.member_names - query.member_names - [:all_measures]).blank?
|
47
47
|
filter = prepare_filter(query,options[:where] || {})
|
48
48
|
else
|
49
|
-
reduction = aggregate(query,:source_collection=>agg_data.target_collection_name, :reason=>"Last mile reduction - source aggregation has too many members")
|
49
|
+
reduction = aggregate(query,:source_collection=>agg_data.target_collection_name, :reason=>"Last mile reduction - source aggregation has too many members (#{agg_data.member_names.join(",").inspect})")
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
@@ -57,7 +57,7 @@ module Cubicle
|
|
57
57
|
@profiler.measure(:find, :source=>reduction.name, :reason=>"Fetch final query results", :query=>find_options) do
|
58
58
|
count = reduction.count
|
59
59
|
results = reduction.find(filter,find_options).to_a
|
60
|
-
reduction.drop if reduction.name =~ /^tmp.mr.*/
|
60
|
+
#reduction.drop if reduction.name =~ /^tmp.mr.*/
|
61
61
|
Cubicle::Data::Table.new(query, results, count)
|
62
62
|
end
|
63
63
|
|
data/lib/cubicle/data.rb
CHANGED
@@ -1,29 +1,29 @@
|
|
1
|
-
module Cubicle
|
2
|
-
module Data
|
3
|
-
|
4
|
-
def self.aggregate(data,measures)
|
5
|
-
aggregated = OrderedHashWithIndifferentAccess.new {|hash,key|hash[key]=[]}
|
6
|
-
#in step one, we will gather our values into columns to give to the measure
|
7
|
-
#definitions to aggregation.
|
8
|
-
data.each do |row|
|
9
|
-
measures.each do |measure|
|
10
|
-
if (row.include?(measure.name))
|
11
|
-
val = row[measure.name]
|
12
|
-
aggregated[measure.name] << val if val.kind_of?(Numeric)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
#in step two, we will let the measures reduce the columns of values to a single number, preferably using
|
17
|
-
#black magic or human sacrifice
|
18
|
-
measures.each do |measure|
|
19
|
-
aggregated[measure.name] = measure.aggregate(aggregated[measure.name])
|
20
|
-
end
|
21
|
-
|
22
|
-
#give each measure a final shot to operate on the results. This is useful for measures that
|
23
|
-
#act on the results of other aggregations, like Ratio does.
|
24
|
-
measures.each {|measure|measure.finalize_aggregation(aggregated)}
|
25
|
-
aggregated
|
26
|
-
end
|
27
|
-
|
28
|
-
end
|
1
|
+
module Cubicle
|
2
|
+
module Data
|
3
|
+
|
4
|
+
def self.aggregate(data,measures)
|
5
|
+
aggregated = OrderedHashWithIndifferentAccess.new {|hash,key|hash[key]=[]}
|
6
|
+
#in step one, we will gather our values into columns to give to the measure
|
7
|
+
#definitions to aggregation.
|
8
|
+
data.each do |row|
|
9
|
+
measures.each do |measure|
|
10
|
+
if (row.include?(measure.name))
|
11
|
+
val = row[measure.name]
|
12
|
+
(aggregated[measure.name] ||= []) << val if val.kind_of?(Numeric)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
#in step two, we will let the measures reduce the columns of values to a single number, preferably using
|
17
|
+
#black magic or human sacrifice
|
18
|
+
measures.each do |measure|
|
19
|
+
aggregated[measure.name] = measure.aggregate(aggregated[measure.name])
|
20
|
+
end
|
21
|
+
|
22
|
+
#give each measure a final shot to operate on the results. This is useful for measures that
|
23
|
+
#act on the results of other aggregations, like Ratio does.
|
24
|
+
measures.each {|measure|measure.finalize_aggregation(aggregated)}
|
25
|
+
aggregated
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
29
|
end
|
@@ -1,55 +1,55 @@
|
|
1
|
-
module Cubicle
|
2
|
-
module Data
|
3
|
-
class Hierarchy < Cubicle::Data::Level
|
4
|
-
include Member
|
5
|
-
|
6
|
-
attr_reader :measures
|
7
|
-
def initialize(root_dimension,measures)
|
8
|
-
super(root_dimension)
|
9
|
-
@measures = measures
|
10
|
-
@member_name = name
|
11
|
-
end
|
12
|
-
|
13
|
-
def self.hierarchize_table(table, dimension_names=nil)
|
14
|
-
dimension_names = [table.time_dimension_name || table.dimension_names].flatten if dimension_names.blank?
|
15
|
-
Cubicle::Data::Hierarchy.extract_dimensions(dimension_names,table,table.dup)
|
16
|
-
end
|
17
|
-
private
|
18
|
-
|
19
|
-
def self.extract_dimensions(dimension_names, data, table,parent_level=nil)
|
20
|
-
data, dimension_names = data.dup, dimension_names.dup
|
21
|
-
|
22
|
-
return data if dimension_names.blank?
|
23
|
-
|
24
|
-
dim_name = dimension_names.shift
|
25
|
-
dim = table.dimensions.find{|d|d.name==dim_name}
|
26
|
-
level = parent_level ? Cubicle::Data::Level.new(dim,parent_level) : Cubicle::Data::Hierarchy.new(dim,data.measures)
|
27
|
-
data.each do |tuple|
|
28
|
-
member_name = tuple.delete(dim_name.to_s) || "Unknown"
|
29
|
-
level[member_name] << tuple
|
30
|
-
end
|
31
|
-
|
32
|
-
level.each do |key,value|
|
33
|
-
level[key] = Cubicle::Data::Hierarchy.extract_dimensions(dimension_names,value,table,level)
|
34
|
-
end
|
35
|
-
|
36
|
-
Cubicle::Data::Hierarchy.expand_time_dimension_if_required(level,table)
|
37
|
-
|
38
|
-
level
|
39
|
-
end
|
40
|
-
|
41
|
-
def self.expand_time_dimension_if_required(data_level,table)
|
42
|
-
return unless data_level.leaf_level? && table.time_dimension_name && table.time_dimension_name.to_s == data_level.name.to_s &&
|
43
|
-
table.time_range && table.time_period
|
44
|
-
|
45
|
-
table.time_range.by!(table.time_period)
|
46
|
-
|
47
|
-
table.time_range.each do |date|
|
48
|
-
formatted_date = date.to_cubicle(table.time_period)
|
49
|
-
data_level[formatted_date] = [OrderedHashWithIndifferentAccess.new] unless data_level.include?(formatted_date)
|
50
|
-
end
|
51
|
-
data_level.keys.sort!
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
1
|
+
module Cubicle
|
2
|
+
module Data
|
3
|
+
class Hierarchy < Cubicle::Data::Level
|
4
|
+
include Member
|
5
|
+
|
6
|
+
attr_reader :measures
|
7
|
+
def initialize(root_dimension,measures)
|
8
|
+
super(root_dimension)
|
9
|
+
@measures = measures
|
10
|
+
@member_name = name
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.hierarchize_table(table, dimension_names=nil)
|
14
|
+
dimension_names = [table.time_dimension_name || table.dimension_names].flatten if dimension_names.blank?
|
15
|
+
Cubicle::Data::Hierarchy.extract_dimensions(dimension_names,table,table.dup)
|
16
|
+
end
|
17
|
+
private
|
18
|
+
|
19
|
+
def self.extract_dimensions(dimension_names, data, table,parent_level=nil)
|
20
|
+
data, dimension_names = data.dup, dimension_names.dup
|
21
|
+
|
22
|
+
return data if dimension_names.blank?
|
23
|
+
|
24
|
+
dim_name = dimension_names.shift
|
25
|
+
dim = table.dimensions.find{|d|d.name==dim_name}
|
26
|
+
level = parent_level ? Cubicle::Data::Level.new(dim,parent_level) : Cubicle::Data::Hierarchy.new(dim,data.measures)
|
27
|
+
data.each do |tuple|
|
28
|
+
member_name = tuple.delete(dim_name.to_s) || "Unknown"
|
29
|
+
(level[member_name] ||= []) << tuple
|
30
|
+
end
|
31
|
+
|
32
|
+
level.each do |key,value|
|
33
|
+
level[key] = Cubicle::Data::Hierarchy.extract_dimensions(dimension_names,value,table,level)
|
34
|
+
end
|
35
|
+
|
36
|
+
Cubicle::Data::Hierarchy.expand_time_dimension_if_required(level,table)
|
37
|
+
|
38
|
+
level
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.expand_time_dimension_if_required(data_level,table)
|
42
|
+
return unless data_level.leaf_level? && table.time_dimension_name && table.time_dimension_name.to_s == data_level.name.to_s &&
|
43
|
+
table.time_range && table.time_period
|
44
|
+
|
45
|
+
table.time_range.by!(table.time_period)
|
46
|
+
|
47
|
+
table.time_range.each do |date|
|
48
|
+
formatted_date = date.to_cubicle(table.time_period)
|
49
|
+
data_level[formatted_date] = [OrderedHashWithIndifferentAccess.new] unless data_level.include?(formatted_date)
|
50
|
+
end
|
51
|
+
data_level.keys.sort!
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/cubicle/data/level.rb
CHANGED
@@ -1,62 +1,61 @@
|
|
1
|
-
module Cubicle
|
2
|
-
module Data
|
3
|
-
class Level < OrderedHashWithIndifferentAccess
|
4
|
-
|
5
|
-
def initialize(dimension,parent_level=nil)
|
6
|
-
@dimension = dimension
|
7
|
-
@parent_level = parent_level
|
8
|
-
super() {|hash,key|hash[key]=[]}#Always have an array freshly baked when strangers call
|
9
|
-
end
|
10
|
-
|
11
|
-
attr_reader :dimension, :parent_level
|
12
|
-
attr_accessor :missing_member_default
|
13
|
-
|
14
|
-
alias member_names keys
|
15
|
-
alias members values
|
16
|
-
|
17
|
-
def name
|
18
|
-
@dimension.name
|
19
|
-
end
|
20
|
-
|
21
|
-
def flatten(member_name = nil, opts={}, &block)
|
22
|
-
|
23
|
-
default_val = opts[:default] || @missing_member_default || 0
|
24
|
-
|
25
|
-
self.values.inject([]) do |output, data|
|
26
|
-
data.
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
member.
|
56
|
-
member.
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
end
|
1
|
+
module Cubicle
|
2
|
+
module Data
|
3
|
+
class Level < OrderedHashWithIndifferentAccess
|
4
|
+
|
5
|
+
def initialize(dimension,parent_level=nil)
|
6
|
+
@dimension = dimension
|
7
|
+
@parent_level = parent_level
|
8
|
+
super() {|hash,key|hash[key]=[]}#Always have an array freshly baked when strangers call
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_reader :dimension, :parent_level
|
12
|
+
attr_accessor :missing_member_default
|
13
|
+
|
14
|
+
alias member_names keys
|
15
|
+
alias members values
|
16
|
+
|
17
|
+
def name
|
18
|
+
@dimension.name
|
19
|
+
end
|
20
|
+
|
21
|
+
def flatten(member_name = nil, opts={}, &block)
|
22
|
+
|
23
|
+
default_val = opts[:default] || @missing_member_default || 0
|
24
|
+
|
25
|
+
self.values.inject([]) do |output, data|
|
26
|
+
value = data.measure_values
|
27
|
+
value.missing_member_default = default_val if value.respond_to?(:missing_member_default)
|
28
|
+
|
29
|
+
if block_given?
|
30
|
+
flat_val = block.arity == 1 ? (yield value) : (value.instance_eval(&block))
|
31
|
+
end
|
32
|
+
flat_val ||= value[member_name] if member_name && value.include?(member_name)
|
33
|
+
flat_val ||= default_val
|
34
|
+
output << flat_val
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def leaf_level?
|
39
|
+
return self.length < 1 ||
|
40
|
+
!self[self.keys[0]].is_a?(Cubicle::Data::Level)
|
41
|
+
end
|
42
|
+
|
43
|
+
def []=(key,val)
|
44
|
+
prepare_level_member(val,key,self)
|
45
|
+
super(key.to_s,val)
|
46
|
+
end
|
47
|
+
|
48
|
+
def hierarchy
|
49
|
+
parent_level || self
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
def prepare_level_member(member,member_name,parent_level)
|
54
|
+
member.class_eval("include Cubicle::Data::Member")
|
55
|
+
member.member_name = member_name
|
56
|
+
member.parent_level = parent_level
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
data/lib/cubicle/data/table.rb
CHANGED
@@ -9,7 +9,7 @@ module Cubicle
|
|
9
9
|
@time_dimension_name = query.time_dimension.name if query.respond_to?(:time_dimension) && query.time_dimension
|
10
10
|
@time_period = query.time_period if query.respond_to?(:time_period)
|
11
11
|
@time_range = query.time_range if query.respond_to?(:time_range)
|
12
|
-
extract_data(query_results)
|
12
|
+
extract_data(query,query_results)
|
13
13
|
@total_count = total_count if total_count
|
14
14
|
end
|
15
15
|
|
@@ -45,16 +45,37 @@ module Cubicle
|
|
45
45
|
|
46
46
|
private
|
47
47
|
|
48
|
-
def extract_data(data)
|
48
|
+
def extract_data(query,data)
|
49
49
|
data.each do |result|
|
50
50
|
new = result.dup
|
51
51
|
self << OrderedHashWithIndifferentAccess.new(new.delete("_id").merge(new.delete("value")))
|
52
|
-
|
53
|
-
|
54
|
-
|
52
|
+
|
53
|
+
finalize_aggregations(self[-1])
|
54
|
+
|
55
|
+
apply_aliases(query,self[-1])
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def finalize_aggregations(row)
|
60
|
+
#these should be processed first, because they are often used as parts of the other calc measures
|
61
|
+
measures.select{|m|m.distinct_count?}.each do |m|
|
62
|
+
m.finalize_aggregation(row)
|
63
|
+
end
|
64
|
+
measures.select{|m|!m.distinct_count?}.each do |m|
|
65
|
+
m.finalize_aggregation(row)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def apply_aliases(query,row)
|
70
|
+
members = query.dimensions + query.measures
|
71
|
+
members.select{|m|m.alias_list}.each do |m|
|
72
|
+
m.alias_list.each do |m_alias|
|
73
|
+
row[m_alias.to_s] = row[m.name.to_s]
|
55
74
|
end
|
56
|
-
|
57
|
-
|
75
|
+
end
|
76
|
+
if (query.respond_to?(:query_aliases) && query.query_aliases)
|
77
|
+
query.query_aliases.each do |key,value|
|
78
|
+
row[key.to_s] = row[value.to_s]
|
58
79
|
end
|
59
80
|
end
|
60
81
|
end
|
data/lib/cubicle/query.rb
CHANGED
@@ -2,7 +2,7 @@ module Cubicle
|
|
2
2
|
class Query
|
3
3
|
include Dsl
|
4
4
|
|
5
|
-
attr_reader :time_period, :transient, :aggregation, :named_expressions
|
5
|
+
attr_reader :time_period, :transient, :aggregation, :named_expressions, :query_aliases
|
6
6
|
attr_accessor :source_collection_name
|
7
7
|
|
8
8
|
def initialize(aggregation)
|
@@ -104,7 +104,9 @@ module Cubicle
|
|
104
104
|
|
105
105
|
def convert_dimension(dimension)
|
106
106
|
return dimension if transient?
|
107
|
-
Cubicle::Dimension.new(dimension.name, :expression=>"this._id.#{dimension.name}")
|
107
|
+
d = Cubicle::Dimension.new(dimension.name, :expression=>"this._id.#{dimension.name}")
|
108
|
+
d.alias_list = dimension.alias_list
|
109
|
+
d
|
108
110
|
end
|
109
111
|
|
110
112
|
def convert_measure(measure)
|
@@ -133,7 +135,9 @@ module Cubicle
|
|
133
135
|
count_field = expression + "_count"
|
134
136
|
expression = "#{expression}*#{count_field}"
|
135
137
|
end
|
136
|
-
Cubicle::Measure.new(measure.name, :expression=>expression,:aggregation_method=>aggregation, :distinct=>measure.distinct_count?)
|
138
|
+
m = Cubicle::Measure.new(measure.name, :expression=>expression,:aggregation_method=>aggregation, :distinct=>measure.distinct_count?)
|
139
|
+
m.alias_list = measure.alias_list
|
140
|
+
m
|
137
141
|
end
|
138
142
|
|
139
143
|
def unalias(*name_or_names)
|
data/lib/cubicle/version.rb
CHANGED