cubicle 0.1.24 → 0.1.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,9 @@
1
+ ==0.1.25
2
+ *Modified data extraction to ensure that member and query level aliases are respected in the final result. Also
3
+ stopped automatically dropping temporary map reduce tables during query execution for performance reasons. Instead,
4
+ added a static method to the Cubicle module, clear_temp_tables, which can be called at application startup or teardown:
5
+ Cubicle.clear_temp_tables()
6
+
1
7
  ==0.1.24
2
8
  *Added more detail to the profile for the 'find' action
3
9
 
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{cubicle}
8
- s.version = "0.1.24"
8
+ s.version = "0.1.25"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Nathan Stults"]
12
- s.date = %q{2010-05-13}
12
+ s.date = %q{2010-05-19}
13
13
  s.description = %q{Cubicle provides a dsl and aggregation caching framework for automating the generation, execution and caching of map reduce queries when using MongoDB in Ruby. Cubicle also includes a MongoMapper plugin for quickly performing ad-hoc, multi-level group-by queries against a MongoMapper model.}
14
14
  s.email = %q{hereiam@sonic.net}
15
15
  s.extra_rdoc_files = [
@@ -52,7 +52,11 @@ module Cubicle
52
52
 
53
53
  def self.logger
54
54
  @logger ||= (Cubicle.mongo.logger || Logger.new("cubicle.log"))
55
- end
55
+ end
56
+
57
+ def self.clear_temp_tables
58
+ self.mongo.database.collection_names.each{|cn|self.mongo.database[cn].drop if cn =~ /tmp.mr.mapreduce/i}
59
+ end
56
60
  end
57
61
 
58
62
  #Turn off HTML escaping in Mustache
@@ -46,7 +46,7 @@ module Cubicle
46
46
  if query.all_dimensions? || (agg_data.member_names - query.member_names - [:all_measures]).blank?
47
47
  filter = prepare_filter(query,options[:where] || {})
48
48
  else
49
- reduction = aggregate(query,:source_collection=>agg_data.target_collection_name, :reason=>"Last mile reduction - source aggregation has too many members")
49
+ reduction = aggregate(query,:source_collection=>agg_data.target_collection_name, :reason=>"Last mile reduction - source aggregation has too many members (#{agg_data.member_names.join(",").inspect})")
50
50
  end
51
51
  end
52
52
 
@@ -57,7 +57,7 @@ module Cubicle
57
57
  @profiler.measure(:find, :source=>reduction.name, :reason=>"Fetch final query results", :query=>find_options) do
58
58
  count = reduction.count
59
59
  results = reduction.find(filter,find_options).to_a
60
- reduction.drop if reduction.name =~ /^tmp.mr.*/
60
+ #reduction.drop if reduction.name =~ /^tmp.mr.*/
61
61
  Cubicle::Data::Table.new(query, results, count)
62
62
  end
63
63
 
@@ -1,29 +1,29 @@
1
- module Cubicle
2
- module Data
3
-
4
- def self.aggregate(data,measures)
5
- aggregated = OrderedHashWithIndifferentAccess.new {|hash,key|hash[key]=[]}
6
- #in step one, we will gather our values into columns to give to the measure
7
- #definitions to aggregation.
8
- data.each do |row|
9
- measures.each do |measure|
10
- if (row.include?(measure.name))
11
- val = row[measure.name]
12
- aggregated[measure.name] << val if val.kind_of?(Numeric)
13
- end
14
- end
15
- end
16
- #in step two, we will let the measures reduce the columns of values to a single number, preferably using
17
- #black magic or human sacrifice
18
- measures.each do |measure|
19
- aggregated[measure.name] = measure.aggregate(aggregated[measure.name])
20
- end
21
-
22
- #give each measure a final shot to operate on the results. This is useful for measures that
23
- #act on the results of other aggregations, like Ratio does.
24
- measures.each {|measure|measure.finalize_aggregation(aggregated)}
25
- aggregated
26
- end
27
-
28
- end
1
+ module Cubicle
2
+ module Data
3
+
4
+ def self.aggregate(data,measures)
5
+ aggregated = OrderedHashWithIndifferentAccess.new {|hash,key|hash[key]=[]}
6
+ #in step one, we will gather our values into columns to give to the measure
7
+ #definitions to aggregation.
8
+ data.each do |row|
9
+ measures.each do |measure|
10
+ if (row.include?(measure.name))
11
+ val = row[measure.name]
12
+ (aggregated[measure.name] ||= []) << val if val.kind_of?(Numeric)
13
+ end
14
+ end
15
+ end
16
+ #in step two, we will let the measures reduce the columns of values to a single number, preferably using
17
+ #black magic or human sacrifice
18
+ measures.each do |measure|
19
+ aggregated[measure.name] = measure.aggregate(aggregated[measure.name])
20
+ end
21
+
22
+ #give each measure a final shot to operate on the results. This is useful for measures that
23
+ #act on the results of other aggregations, like Ratio does.
24
+ measures.each {|measure|measure.finalize_aggregation(aggregated)}
25
+ aggregated
26
+ end
27
+
28
+ end
29
29
  end
@@ -1,55 +1,55 @@
1
- module Cubicle
2
- module Data
3
- class Hierarchy < Cubicle::Data::Level
4
- include Member
5
-
6
- attr_reader :measures
7
- def initialize(root_dimension,measures)
8
- super(root_dimension)
9
- @measures = measures
10
- @member_name = name
11
- end
12
-
13
- def self.hierarchize_table(table, dimension_names=nil)
14
- dimension_names = [table.time_dimension_name || table.dimension_names].flatten if dimension_names.blank?
15
- Cubicle::Data::Hierarchy.extract_dimensions(dimension_names,table,table.dup)
16
- end
17
- private
18
-
19
- def self.extract_dimensions(dimension_names, data, table,parent_level=nil)
20
- data, dimension_names = data.dup, dimension_names.dup
21
-
22
- return data if dimension_names.blank?
23
-
24
- dim_name = dimension_names.shift
25
- dim = table.dimensions.find{|d|d.name==dim_name}
26
- level = parent_level ? Cubicle::Data::Level.new(dim,parent_level) : Cubicle::Data::Hierarchy.new(dim,data.measures)
27
- data.each do |tuple|
28
- member_name = tuple.delete(dim_name.to_s) || "Unknown"
29
- level[member_name] << tuple
30
- end
31
-
32
- level.each do |key,value|
33
- level[key] = Cubicle::Data::Hierarchy.extract_dimensions(dimension_names,value,table,level)
34
- end
35
-
36
- Cubicle::Data::Hierarchy.expand_time_dimension_if_required(level,table)
37
-
38
- level
39
- end
40
-
41
- def self.expand_time_dimension_if_required(data_level,table)
42
- return unless data_level.leaf_level? && table.time_dimension_name && table.time_dimension_name.to_s == data_level.name.to_s &&
43
- table.time_range && table.time_period
44
-
45
- table.time_range.by!(table.time_period)
46
-
47
- table.time_range.each do |date|
48
- formatted_date = date.to_cubicle(table.time_period)
49
- data_level[formatted_date] = [OrderedHashWithIndifferentAccess.new] unless data_level.include?(formatted_date)
50
- end
51
- data_level.keys.sort!
52
- end
53
- end
54
- end
55
- end
1
+ module Cubicle
2
+ module Data
3
+ class Hierarchy < Cubicle::Data::Level
4
+ include Member
5
+
6
+ attr_reader :measures
7
+ def initialize(root_dimension,measures)
8
+ super(root_dimension)
9
+ @measures = measures
10
+ @member_name = name
11
+ end
12
+
13
+ def self.hierarchize_table(table, dimension_names=nil)
14
+ dimension_names = [table.time_dimension_name || table.dimension_names].flatten if dimension_names.blank?
15
+ Cubicle::Data::Hierarchy.extract_dimensions(dimension_names,table,table.dup)
16
+ end
17
+ private
18
+
19
+ def self.extract_dimensions(dimension_names, data, table,parent_level=nil)
20
+ data, dimension_names = data.dup, dimension_names.dup
21
+
22
+ return data if dimension_names.blank?
23
+
24
+ dim_name = dimension_names.shift
25
+ dim = table.dimensions.find{|d|d.name==dim_name}
26
+ level = parent_level ? Cubicle::Data::Level.new(dim,parent_level) : Cubicle::Data::Hierarchy.new(dim,data.measures)
27
+ data.each do |tuple|
28
+ member_name = tuple.delete(dim_name.to_s) || "Unknown"
29
+ (level[member_name] ||= []) << tuple
30
+ end
31
+
32
+ level.each do |key,value|
33
+ level[key] = Cubicle::Data::Hierarchy.extract_dimensions(dimension_names,value,table,level)
34
+ end
35
+
36
+ Cubicle::Data::Hierarchy.expand_time_dimension_if_required(level,table)
37
+
38
+ level
39
+ end
40
+
41
+ def self.expand_time_dimension_if_required(data_level,table)
42
+ return unless data_level.leaf_level? && table.time_dimension_name && table.time_dimension_name.to_s == data_level.name.to_s &&
43
+ table.time_range && table.time_period
44
+
45
+ table.time_range.by!(table.time_period)
46
+
47
+ table.time_range.each do |date|
48
+ formatted_date = date.to_cubicle(table.time_period)
49
+ data_level[formatted_date] = [OrderedHashWithIndifferentAccess.new] unless data_level.include?(formatted_date)
50
+ end
51
+ data_level.keys.sort!
52
+ end
53
+ end
54
+ end
55
+ end
@@ -1,62 +1,61 @@
1
- module Cubicle
2
- module Data
3
- class Level < OrderedHashWithIndifferentAccess
4
-
5
- def initialize(dimension,parent_level=nil)
6
- @dimension = dimension
7
- @parent_level = parent_level
8
- super() {|hash,key|hash[key]=[]}#Always have an array freshly baked when strangers call
9
- end
10
-
11
- attr_reader :dimension, :parent_level
12
- attr_accessor :missing_member_default
13
-
14
- alias member_names keys
15
- alias members values
16
-
17
- def name
18
- @dimension.name
19
- end
20
-
21
- def flatten(member_name = nil, opts={}, &block)
22
-
23
- default_val = opts[:default] || @missing_member_default || 0
24
-
25
- self.values.inject([]) do |output, data|
26
- data.inject(output) do |flattened, value|
27
- value.missing_member_default = default_val if value.respond_to?(:missing_member_default)
28
-
29
- if block_given?
30
- flat_val = block.arity == 1 ? (yield value) : (value.instance_eval(&block))
31
- end
32
- flat_val ||= value[member_name] if member_name && value.include?(member_name)
33
- flat_val ||= default_val
34
- flattened << flat_val
35
- end
36
- end
37
- end
38
-
39
- def leaf_level?
40
- return self.length < 1 ||
41
- !self[self.keys[0]].is_a?(Cubicle::Data::Level)
42
- end
43
-
44
- def []=(key,val)
45
- prepare_level_member(val,key,self)
46
- super(key.to_s,val)
47
- end
48
-
49
- def hierarchy
50
- parent_level || self
51
- end
52
-
53
- private
54
- def prepare_level_member(member,member_name,parent_level)
55
- member.class_eval("include Cubicle::Data::Member")
56
- member.member_name = member_name
57
- member.parent_level = parent_level
58
- end
59
-
60
- end
61
- end
62
- end
1
+ module Cubicle
2
+ module Data
3
+ class Level < OrderedHashWithIndifferentAccess
4
+
5
+ def initialize(dimension,parent_level=nil)
6
+ @dimension = dimension
7
+ @parent_level = parent_level
8
+ super() {|hash,key|hash[key]=[]}#Always have an array freshly baked when strangers call
9
+ end
10
+
11
+ attr_reader :dimension, :parent_level
12
+ attr_accessor :missing_member_default
13
+
14
+ alias member_names keys
15
+ alias members values
16
+
17
+ def name
18
+ @dimension.name
19
+ end
20
+
21
+ def flatten(member_name = nil, opts={}, &block)
22
+
23
+ default_val = opts[:default] || @missing_member_default || 0
24
+
25
+ self.values.inject([]) do |output, data|
26
+ value = data.measure_values
27
+ value.missing_member_default = default_val if value.respond_to?(:missing_member_default)
28
+
29
+ if block_given?
30
+ flat_val = block.arity == 1 ? (yield value) : (value.instance_eval(&block))
31
+ end
32
+ flat_val ||= value[member_name] if member_name && value.include?(member_name)
33
+ flat_val ||= default_val
34
+ output << flat_val
35
+ end
36
+ end
37
+
38
+ def leaf_level?
39
+ return self.length < 1 ||
40
+ !self[self.keys[0]].is_a?(Cubicle::Data::Level)
41
+ end
42
+
43
+ def []=(key,val)
44
+ prepare_level_member(val,key,self)
45
+ super(key.to_s,val)
46
+ end
47
+
48
+ def hierarchy
49
+ parent_level || self
50
+ end
51
+
52
+ private
53
+ def prepare_level_member(member,member_name,parent_level)
54
+ member.class_eval("include Cubicle::Data::Member")
55
+ member.member_name = member_name
56
+ member.parent_level = parent_level
57
+ end
58
+
59
+ end
60
+ end
61
+ end
@@ -9,7 +9,7 @@ module Cubicle
9
9
  @time_dimension_name = query.time_dimension.name if query.respond_to?(:time_dimension) && query.time_dimension
10
10
  @time_period = query.time_period if query.respond_to?(:time_period)
11
11
  @time_range = query.time_range if query.respond_to?(:time_range)
12
- extract_data(query_results)
12
+ extract_data(query,query_results)
13
13
  @total_count = total_count if total_count
14
14
  end
15
15
 
@@ -45,16 +45,37 @@ module Cubicle
45
45
 
46
46
  private
47
47
 
48
- def extract_data(data)
48
+ def extract_data(query,data)
49
49
  data.each do |result|
50
50
  new = result.dup
51
51
  self << OrderedHashWithIndifferentAccess.new(new.delete("_id").merge(new.delete("value")))
52
- #these should be processed first, because they are often used as parts of the other calc measures
53
- measures.select{|m|m.distinct_count?}.each do |m|
54
- m.finalize_aggregation(self[-1])
52
+
53
+ finalize_aggregations(self[-1])
54
+
55
+ apply_aliases(query,self[-1])
56
+ end
57
+ end
58
+
59
+ def finalize_aggregations(row)
60
+ #these should be processed first, because they are often used as parts of the other calc measures
61
+ measures.select{|m|m.distinct_count?}.each do |m|
62
+ m.finalize_aggregation(row)
63
+ end
64
+ measures.select{|m|!m.distinct_count?}.each do |m|
65
+ m.finalize_aggregation(row)
66
+ end
67
+ end
68
+
69
+ def apply_aliases(query,row)
70
+ members = query.dimensions + query.measures
71
+ members.select{|m|m.alias_list}.each do |m|
72
+ m.alias_list.each do |m_alias|
73
+ row[m_alias.to_s] = row[m.name.to_s]
55
74
  end
56
- measures.select{|m|!m.distinct_count?}.each do |m|
57
- m.finalize_aggregation(self[-1])
75
+ end
76
+ if (query.respond_to?(:query_aliases) && query.query_aliases)
77
+ query.query_aliases.each do |key,value|
78
+ row[key.to_s] = row[value.to_s]
58
79
  end
59
80
  end
60
81
  end
@@ -1,7 +1,6 @@
1
1
  class OrderedHashWithIndifferentAccess < OrderedHash
2
- def initialize(initial_data={},&block)
2
+ def initialize(initial_data={})
3
3
  merge!(initial_data.stringify_keys)
4
- super(&block) if block
5
4
  end
6
5
 
7
6
 
@@ -2,7 +2,7 @@ module Cubicle
2
2
  class Query
3
3
  include Dsl
4
4
 
5
- attr_reader :time_period, :transient, :aggregation, :named_expressions
5
+ attr_reader :time_period, :transient, :aggregation, :named_expressions, :query_aliases
6
6
  attr_accessor :source_collection_name
7
7
 
8
8
  def initialize(aggregation)
@@ -104,7 +104,9 @@ module Cubicle
104
104
 
105
105
  def convert_dimension(dimension)
106
106
  return dimension if transient?
107
- Cubicle::Dimension.new(dimension.name, :expression=>"this._id.#{dimension.name}")
107
+ d = Cubicle::Dimension.new(dimension.name, :expression=>"this._id.#{dimension.name}")
108
+ d.alias_list = dimension.alias_list
109
+ d
108
110
  end
109
111
 
110
112
  def convert_measure(measure)
@@ -133,7 +135,9 @@ module Cubicle
133
135
  count_field = expression + "_count"
134
136
  expression = "#{expression}*#{count_field}"
135
137
  end
136
- Cubicle::Measure.new(measure.name, :expression=>expression,:aggregation_method=>aggregation, :distinct=>measure.distinct_count?)
138
+ m = Cubicle::Measure.new(measure.name, :expression=>expression,:aggregation_method=>aggregation, :distinct=>measure.distinct_count?)
139
+ m.alias_list = measure.alias_list
140
+ m
137
141
  end
138
142
 
139
143
  def unalias(*name_or_names)
@@ -1,3 +1,3 @@
1
1
  module Cubicle
2
- VERSION = '0.1.24'
2
+ VERSION = '0.1.25'
3
3
  end