cubicle 0.1.24 → 0.1.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +6 -0
- data/cubicle.gemspec +2 -2
- data/lib/cubicle.rb +5 -1
- data/lib/cubicle/aggregation/aggregation_manager.rb +2 -2
- data/lib/cubicle/data.rb +28 -28
- data/lib/cubicle/data/hierarchy.rb +55 -55
- data/lib/cubicle/data/level.rb +61 -62
- data/lib/cubicle/data/table.rb +28 -7
- data/lib/cubicle/ordered_hash_with_indifferent_access.rb +1 -2
- data/lib/cubicle/query.rb +7 -3
- data/lib/cubicle/version.rb +1 -1
- data/test/cubicle/cubicle_query_test.rb +3 -1
- data/test/cubicle/data/level_test.rb +34 -41
- data/test/log/test.log +34422 -0
- metadata +3 -3
data/CHANGELOG.rdoc
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
==0.1.25
|
2
|
+
*Modified data extraction to ensure that member and query level aliases are respected in the final result. Also
|
3
|
+
stopped automatically dropping temporary map reduce tables during query execution for performance reasons. Instead,
|
4
|
+
added a static method to the Cubicle module, clear_temp_tables, which can be called at application startup or teardown:
|
5
|
+
Cubicle.clear_temp_tables()
|
6
|
+
|
1
7
|
==0.1.24
|
2
8
|
*Added more detail to the profile for the 'find' action
|
3
9
|
|
data/cubicle.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{cubicle}
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.25"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Nathan Stults"]
|
12
|
-
s.date = %q{2010-05-
|
12
|
+
s.date = %q{2010-05-19}
|
13
13
|
s.description = %q{Cubicle provides a dsl and aggregation caching framework for automating the generation, execution and caching of map reduce queries when using MongoDB in Ruby. Cubicle also includes a MongoMapper plugin for quickly performing ad-hoc, multi-level group-by queries against a MongoMapper model.}
|
14
14
|
s.email = %q{hereiam@sonic.net}
|
15
15
|
s.extra_rdoc_files = [
|
data/lib/cubicle.rb
CHANGED
@@ -52,7 +52,11 @@ module Cubicle
|
|
52
52
|
|
53
53
|
def self.logger
|
54
54
|
@logger ||= (Cubicle.mongo.logger || Logger.new("cubicle.log"))
|
55
|
-
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.clear_temp_tables
|
58
|
+
self.mongo.database.collection_names.each{|cn|self.mongo.database[cn].drop if cn =~ /tmp.mr.mapreduce/i}
|
59
|
+
end
|
56
60
|
end
|
57
61
|
|
58
62
|
#Turn off HTML escaping in Mustache
|
@@ -46,7 +46,7 @@ module Cubicle
|
|
46
46
|
if query.all_dimensions? || (agg_data.member_names - query.member_names - [:all_measures]).blank?
|
47
47
|
filter = prepare_filter(query,options[:where] || {})
|
48
48
|
else
|
49
|
-
reduction = aggregate(query,:source_collection=>agg_data.target_collection_name, :reason=>"Last mile reduction - source aggregation has too many members")
|
49
|
+
reduction = aggregate(query,:source_collection=>agg_data.target_collection_name, :reason=>"Last mile reduction - source aggregation has too many members (#{agg_data.member_names.join(",").inspect})")
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
@@ -57,7 +57,7 @@ module Cubicle
|
|
57
57
|
@profiler.measure(:find, :source=>reduction.name, :reason=>"Fetch final query results", :query=>find_options) do
|
58
58
|
count = reduction.count
|
59
59
|
results = reduction.find(filter,find_options).to_a
|
60
|
-
reduction.drop if reduction.name =~ /^tmp.mr.*/
|
60
|
+
#reduction.drop if reduction.name =~ /^tmp.mr.*/
|
61
61
|
Cubicle::Data::Table.new(query, results, count)
|
62
62
|
end
|
63
63
|
|
data/lib/cubicle/data.rb
CHANGED
@@ -1,29 +1,29 @@
|
|
1
|
-
module Cubicle
|
2
|
-
module Data
|
3
|
-
|
4
|
-
def self.aggregate(data,measures)
|
5
|
-
aggregated = OrderedHashWithIndifferentAccess.new {|hash,key|hash[key]=[]}
|
6
|
-
#in step one, we will gather our values into columns to give to the measure
|
7
|
-
#definitions to aggregation.
|
8
|
-
data.each do |row|
|
9
|
-
measures.each do |measure|
|
10
|
-
if (row.include?(measure.name))
|
11
|
-
val = row[measure.name]
|
12
|
-
aggregated[measure.name] << val if val.kind_of?(Numeric)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
#in step two, we will let the measures reduce the columns of values to a single number, preferably using
|
17
|
-
#black magic or human sacrifice
|
18
|
-
measures.each do |measure|
|
19
|
-
aggregated[measure.name] = measure.aggregate(aggregated[measure.name])
|
20
|
-
end
|
21
|
-
|
22
|
-
#give each measure a final shot to operate on the results. This is useful for measures that
|
23
|
-
#act on the results of other aggregations, like Ratio does.
|
24
|
-
measures.each {|measure|measure.finalize_aggregation(aggregated)}
|
25
|
-
aggregated
|
26
|
-
end
|
27
|
-
|
28
|
-
end
|
1
|
+
module Cubicle
|
2
|
+
module Data
|
3
|
+
|
4
|
+
def self.aggregate(data,measures)
|
5
|
+
aggregated = OrderedHashWithIndifferentAccess.new {|hash,key|hash[key]=[]}
|
6
|
+
#in step one, we will gather our values into columns to give to the measure
|
7
|
+
#definitions to aggregation.
|
8
|
+
data.each do |row|
|
9
|
+
measures.each do |measure|
|
10
|
+
if (row.include?(measure.name))
|
11
|
+
val = row[measure.name]
|
12
|
+
(aggregated[measure.name] ||= []) << val if val.kind_of?(Numeric)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
#in step two, we will let the measures reduce the columns of values to a single number, preferably using
|
17
|
+
#black magic or human sacrifice
|
18
|
+
measures.each do |measure|
|
19
|
+
aggregated[measure.name] = measure.aggregate(aggregated[measure.name])
|
20
|
+
end
|
21
|
+
|
22
|
+
#give each measure a final shot to operate on the results. This is useful for measures that
|
23
|
+
#act on the results of other aggregations, like Ratio does.
|
24
|
+
measures.each {|measure|measure.finalize_aggregation(aggregated)}
|
25
|
+
aggregated
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
29
|
end
|
@@ -1,55 +1,55 @@
|
|
1
|
-
module Cubicle
|
2
|
-
module Data
|
3
|
-
class Hierarchy < Cubicle::Data::Level
|
4
|
-
include Member
|
5
|
-
|
6
|
-
attr_reader :measures
|
7
|
-
def initialize(root_dimension,measures)
|
8
|
-
super(root_dimension)
|
9
|
-
@measures = measures
|
10
|
-
@member_name = name
|
11
|
-
end
|
12
|
-
|
13
|
-
def self.hierarchize_table(table, dimension_names=nil)
|
14
|
-
dimension_names = [table.time_dimension_name || table.dimension_names].flatten if dimension_names.blank?
|
15
|
-
Cubicle::Data::Hierarchy.extract_dimensions(dimension_names,table,table.dup)
|
16
|
-
end
|
17
|
-
private
|
18
|
-
|
19
|
-
def self.extract_dimensions(dimension_names, data, table,parent_level=nil)
|
20
|
-
data, dimension_names = data.dup, dimension_names.dup
|
21
|
-
|
22
|
-
return data if dimension_names.blank?
|
23
|
-
|
24
|
-
dim_name = dimension_names.shift
|
25
|
-
dim = table.dimensions.find{|d|d.name==dim_name}
|
26
|
-
level = parent_level ? Cubicle::Data::Level.new(dim,parent_level) : Cubicle::Data::Hierarchy.new(dim,data.measures)
|
27
|
-
data.each do |tuple|
|
28
|
-
member_name = tuple.delete(dim_name.to_s) || "Unknown"
|
29
|
-
level[member_name] << tuple
|
30
|
-
end
|
31
|
-
|
32
|
-
level.each do |key,value|
|
33
|
-
level[key] = Cubicle::Data::Hierarchy.extract_dimensions(dimension_names,value,table,level)
|
34
|
-
end
|
35
|
-
|
36
|
-
Cubicle::Data::Hierarchy.expand_time_dimension_if_required(level,table)
|
37
|
-
|
38
|
-
level
|
39
|
-
end
|
40
|
-
|
41
|
-
def self.expand_time_dimension_if_required(data_level,table)
|
42
|
-
return unless data_level.leaf_level? && table.time_dimension_name && table.time_dimension_name.to_s == data_level.name.to_s &&
|
43
|
-
table.time_range && table.time_period
|
44
|
-
|
45
|
-
table.time_range.by!(table.time_period)
|
46
|
-
|
47
|
-
table.time_range.each do |date|
|
48
|
-
formatted_date = date.to_cubicle(table.time_period)
|
49
|
-
data_level[formatted_date] = [OrderedHashWithIndifferentAccess.new] unless data_level.include?(formatted_date)
|
50
|
-
end
|
51
|
-
data_level.keys.sort!
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
1
|
+
module Cubicle
|
2
|
+
module Data
|
3
|
+
class Hierarchy < Cubicle::Data::Level
|
4
|
+
include Member
|
5
|
+
|
6
|
+
attr_reader :measures
|
7
|
+
def initialize(root_dimension,measures)
|
8
|
+
super(root_dimension)
|
9
|
+
@measures = measures
|
10
|
+
@member_name = name
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.hierarchize_table(table, dimension_names=nil)
|
14
|
+
dimension_names = [table.time_dimension_name || table.dimension_names].flatten if dimension_names.blank?
|
15
|
+
Cubicle::Data::Hierarchy.extract_dimensions(dimension_names,table,table.dup)
|
16
|
+
end
|
17
|
+
private
|
18
|
+
|
19
|
+
def self.extract_dimensions(dimension_names, data, table,parent_level=nil)
|
20
|
+
data, dimension_names = data.dup, dimension_names.dup
|
21
|
+
|
22
|
+
return data if dimension_names.blank?
|
23
|
+
|
24
|
+
dim_name = dimension_names.shift
|
25
|
+
dim = table.dimensions.find{|d|d.name==dim_name}
|
26
|
+
level = parent_level ? Cubicle::Data::Level.new(dim,parent_level) : Cubicle::Data::Hierarchy.new(dim,data.measures)
|
27
|
+
data.each do |tuple|
|
28
|
+
member_name = tuple.delete(dim_name.to_s) || "Unknown"
|
29
|
+
(level[member_name] ||= []) << tuple
|
30
|
+
end
|
31
|
+
|
32
|
+
level.each do |key,value|
|
33
|
+
level[key] = Cubicle::Data::Hierarchy.extract_dimensions(dimension_names,value,table,level)
|
34
|
+
end
|
35
|
+
|
36
|
+
Cubicle::Data::Hierarchy.expand_time_dimension_if_required(level,table)
|
37
|
+
|
38
|
+
level
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.expand_time_dimension_if_required(data_level,table)
|
42
|
+
return unless data_level.leaf_level? && table.time_dimension_name && table.time_dimension_name.to_s == data_level.name.to_s &&
|
43
|
+
table.time_range && table.time_period
|
44
|
+
|
45
|
+
table.time_range.by!(table.time_period)
|
46
|
+
|
47
|
+
table.time_range.each do |date|
|
48
|
+
formatted_date = date.to_cubicle(table.time_period)
|
49
|
+
data_level[formatted_date] = [OrderedHashWithIndifferentAccess.new] unless data_level.include?(formatted_date)
|
50
|
+
end
|
51
|
+
data_level.keys.sort!
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/cubicle/data/level.rb
CHANGED
@@ -1,62 +1,61 @@
|
|
1
|
-
module Cubicle
|
2
|
-
module Data
|
3
|
-
class Level < OrderedHashWithIndifferentAccess
|
4
|
-
|
5
|
-
def initialize(dimension,parent_level=nil)
|
6
|
-
@dimension = dimension
|
7
|
-
@parent_level = parent_level
|
8
|
-
super() {|hash,key|hash[key]=[]}#Always have an array freshly baked when strangers call
|
9
|
-
end
|
10
|
-
|
11
|
-
attr_reader :dimension, :parent_level
|
12
|
-
attr_accessor :missing_member_default
|
13
|
-
|
14
|
-
alias member_names keys
|
15
|
-
alias members values
|
16
|
-
|
17
|
-
def name
|
18
|
-
@dimension.name
|
19
|
-
end
|
20
|
-
|
21
|
-
def flatten(member_name = nil, opts={}, &block)
|
22
|
-
|
23
|
-
default_val = opts[:default] || @missing_member_default || 0
|
24
|
-
|
25
|
-
self.values.inject([]) do |output, data|
|
26
|
-
data.
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
member.
|
56
|
-
member.
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
end
|
1
|
+
module Cubicle
|
2
|
+
module Data
|
3
|
+
class Level < OrderedHashWithIndifferentAccess
|
4
|
+
|
5
|
+
def initialize(dimension,parent_level=nil)
|
6
|
+
@dimension = dimension
|
7
|
+
@parent_level = parent_level
|
8
|
+
super() {|hash,key|hash[key]=[]}#Always have an array freshly baked when strangers call
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_reader :dimension, :parent_level
|
12
|
+
attr_accessor :missing_member_default
|
13
|
+
|
14
|
+
alias member_names keys
|
15
|
+
alias members values
|
16
|
+
|
17
|
+
def name
|
18
|
+
@dimension.name
|
19
|
+
end
|
20
|
+
|
21
|
+
def flatten(member_name = nil, opts={}, &block)
|
22
|
+
|
23
|
+
default_val = opts[:default] || @missing_member_default || 0
|
24
|
+
|
25
|
+
self.values.inject([]) do |output, data|
|
26
|
+
value = data.measure_values
|
27
|
+
value.missing_member_default = default_val if value.respond_to?(:missing_member_default)
|
28
|
+
|
29
|
+
if block_given?
|
30
|
+
flat_val = block.arity == 1 ? (yield value) : (value.instance_eval(&block))
|
31
|
+
end
|
32
|
+
flat_val ||= value[member_name] if member_name && value.include?(member_name)
|
33
|
+
flat_val ||= default_val
|
34
|
+
output << flat_val
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def leaf_level?
|
39
|
+
return self.length < 1 ||
|
40
|
+
!self[self.keys[0]].is_a?(Cubicle::Data::Level)
|
41
|
+
end
|
42
|
+
|
43
|
+
def []=(key,val)
|
44
|
+
prepare_level_member(val,key,self)
|
45
|
+
super(key.to_s,val)
|
46
|
+
end
|
47
|
+
|
48
|
+
def hierarchy
|
49
|
+
parent_level || self
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
def prepare_level_member(member,member_name,parent_level)
|
54
|
+
member.class_eval("include Cubicle::Data::Member")
|
55
|
+
member.member_name = member_name
|
56
|
+
member.parent_level = parent_level
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
data/lib/cubicle/data/table.rb
CHANGED
@@ -9,7 +9,7 @@ module Cubicle
|
|
9
9
|
@time_dimension_name = query.time_dimension.name if query.respond_to?(:time_dimension) && query.time_dimension
|
10
10
|
@time_period = query.time_period if query.respond_to?(:time_period)
|
11
11
|
@time_range = query.time_range if query.respond_to?(:time_range)
|
12
|
-
extract_data(query_results)
|
12
|
+
extract_data(query,query_results)
|
13
13
|
@total_count = total_count if total_count
|
14
14
|
end
|
15
15
|
|
@@ -45,16 +45,37 @@ module Cubicle
|
|
45
45
|
|
46
46
|
private
|
47
47
|
|
48
|
-
def extract_data(data)
|
48
|
+
def extract_data(query,data)
|
49
49
|
data.each do |result|
|
50
50
|
new = result.dup
|
51
51
|
self << OrderedHashWithIndifferentAccess.new(new.delete("_id").merge(new.delete("value")))
|
52
|
-
|
53
|
-
|
54
|
-
|
52
|
+
|
53
|
+
finalize_aggregations(self[-1])
|
54
|
+
|
55
|
+
apply_aliases(query,self[-1])
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def finalize_aggregations(row)
|
60
|
+
#these should be processed first, because they are often used as parts of the other calc measures
|
61
|
+
measures.select{|m|m.distinct_count?}.each do |m|
|
62
|
+
m.finalize_aggregation(row)
|
63
|
+
end
|
64
|
+
measures.select{|m|!m.distinct_count?}.each do |m|
|
65
|
+
m.finalize_aggregation(row)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def apply_aliases(query,row)
|
70
|
+
members = query.dimensions + query.measures
|
71
|
+
members.select{|m|m.alias_list}.each do |m|
|
72
|
+
m.alias_list.each do |m_alias|
|
73
|
+
row[m_alias.to_s] = row[m.name.to_s]
|
55
74
|
end
|
56
|
-
|
57
|
-
|
75
|
+
end
|
76
|
+
if (query.respond_to?(:query_aliases) && query.query_aliases)
|
77
|
+
query.query_aliases.each do |key,value|
|
78
|
+
row[key.to_s] = row[value.to_s]
|
58
79
|
end
|
59
80
|
end
|
60
81
|
end
|
data/lib/cubicle/query.rb
CHANGED
@@ -2,7 +2,7 @@ module Cubicle
|
|
2
2
|
class Query
|
3
3
|
include Dsl
|
4
4
|
|
5
|
-
attr_reader :time_period, :transient, :aggregation, :named_expressions
|
5
|
+
attr_reader :time_period, :transient, :aggregation, :named_expressions, :query_aliases
|
6
6
|
attr_accessor :source_collection_name
|
7
7
|
|
8
8
|
def initialize(aggregation)
|
@@ -104,7 +104,9 @@ module Cubicle
|
|
104
104
|
|
105
105
|
def convert_dimension(dimension)
|
106
106
|
return dimension if transient?
|
107
|
-
Cubicle::Dimension.new(dimension.name, :expression=>"this._id.#{dimension.name}")
|
107
|
+
d = Cubicle::Dimension.new(dimension.name, :expression=>"this._id.#{dimension.name}")
|
108
|
+
d.alias_list = dimension.alias_list
|
109
|
+
d
|
108
110
|
end
|
109
111
|
|
110
112
|
def convert_measure(measure)
|
@@ -133,7 +135,9 @@ module Cubicle
|
|
133
135
|
count_field = expression + "_count"
|
134
136
|
expression = "#{expression}*#{count_field}"
|
135
137
|
end
|
136
|
-
Cubicle::Measure.new(measure.name, :expression=>expression,:aggregation_method=>aggregation, :distinct=>measure.distinct_count?)
|
138
|
+
m = Cubicle::Measure.new(measure.name, :expression=>expression,:aggregation_method=>aggregation, :distinct=>measure.distinct_count?)
|
139
|
+
m.alias_list = measure.alias_list
|
140
|
+
m
|
137
141
|
end
|
138
142
|
|
139
143
|
def unalias(*name_or_names)
|
data/lib/cubicle/version.rb
CHANGED