cubicle 0.1.24 → 0.1.25

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,9 @@
1
+ ==0.1.25
2
+ *Modified data extraction to ensure that member and query level aliases are respected in the final result. Also
3
+ stopped automatically dropping temporary map reduce tables during query execution for performance reasons. Instead,
4
+ added a static method to the Cubicle module, clear_temp_tables, which can be called at application startup or teardown:
5
+ Cubicle.clear_temp_tables()
6
+
1
7
  ==0.1.24
2
8
  *Added more detail to the profile for the 'find' action
3
9
 
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{cubicle}
8
- s.version = "0.1.24"
8
+ s.version = "0.1.25"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Nathan Stults"]
12
- s.date = %q{2010-05-13}
12
+ s.date = %q{2010-05-19}
13
13
  s.description = %q{Cubicle provides a dsl and aggregation caching framework for automating the generation, execution and caching of map reduce queries when using MongoDB in Ruby. Cubicle also includes a MongoMapper plugin for quickly performing ad-hoc, multi-level group-by queries against a MongoMapper model.}
14
14
  s.email = %q{hereiam@sonic.net}
15
15
  s.extra_rdoc_files = [
@@ -52,7 +52,11 @@ module Cubicle
52
52
 
53
53
  def self.logger
54
54
  @logger ||= (Cubicle.mongo.logger || Logger.new("cubicle.log"))
55
- end
55
+ end
56
+
57
+ def self.clear_temp_tables
58
+ self.mongo.database.collection_names.each{|cn|self.mongo.database[cn].drop if cn =~ /tmp.mr.mapreduce/i}
59
+ end
56
60
  end
57
61
 
58
62
  #Turn off HTML escaping in Mustache
@@ -46,7 +46,7 @@ module Cubicle
46
46
  if query.all_dimensions? || (agg_data.member_names - query.member_names - [:all_measures]).blank?
47
47
  filter = prepare_filter(query,options[:where] || {})
48
48
  else
49
- reduction = aggregate(query,:source_collection=>agg_data.target_collection_name, :reason=>"Last mile reduction - source aggregation has too many members")
49
+ reduction = aggregate(query,:source_collection=>agg_data.target_collection_name, :reason=>"Last mile reduction - source aggregation has too many members (#{agg_data.member_names.join(",").inspect})")
50
50
  end
51
51
  end
52
52
 
@@ -57,7 +57,7 @@ module Cubicle
57
57
  @profiler.measure(:find, :source=>reduction.name, :reason=>"Fetch final query results", :query=>find_options) do
58
58
  count = reduction.count
59
59
  results = reduction.find(filter,find_options).to_a
60
- reduction.drop if reduction.name =~ /^tmp.mr.*/
60
+ #reduction.drop if reduction.name =~ /^tmp.mr.*/
61
61
  Cubicle::Data::Table.new(query, results, count)
62
62
  end
63
63
 
@@ -1,29 +1,29 @@
1
- module Cubicle
2
- module Data
3
-
4
- def self.aggregate(data,measures)
5
- aggregated = OrderedHashWithIndifferentAccess.new {|hash,key|hash[key]=[]}
6
- #in step one, we will gather our values into columns to give to the measure
7
- #definitions to aggregation.
8
- data.each do |row|
9
- measures.each do |measure|
10
- if (row.include?(measure.name))
11
- val = row[measure.name]
12
- aggregated[measure.name] << val if val.kind_of?(Numeric)
13
- end
14
- end
15
- end
16
- #in step two, we will let the measures reduce the columns of values to a single number, preferably using
17
- #black magic or human sacrifice
18
- measures.each do |measure|
19
- aggregated[measure.name] = measure.aggregate(aggregated[measure.name])
20
- end
21
-
22
- #give each measure a final shot to operate on the results. This is useful for measures that
23
- #act on the results of other aggregations, like Ratio does.
24
- measures.each {|measure|measure.finalize_aggregation(aggregated)}
25
- aggregated
26
- end
27
-
28
- end
1
+ module Cubicle
2
+ module Data
3
+
4
+ def self.aggregate(data,measures)
5
+ aggregated = OrderedHashWithIndifferentAccess.new {|hash,key|hash[key]=[]}
6
+ #in step one, we will gather our values into columns to give to the measure
7
+ #definitions to aggregation.
8
+ data.each do |row|
9
+ measures.each do |measure|
10
+ if (row.include?(measure.name))
11
+ val = row[measure.name]
12
+ (aggregated[measure.name] ||= []) << val if val.kind_of?(Numeric)
13
+ end
14
+ end
15
+ end
16
+ #in step two, we will let the measures reduce the columns of values to a single number, preferably using
17
+ #black magic or human sacrifice
18
+ measures.each do |measure|
19
+ aggregated[measure.name] = measure.aggregate(aggregated[measure.name])
20
+ end
21
+
22
+ #give each measure a final shot to operate on the results. This is useful for measures that
23
+ #act on the results of other aggregations, like Ratio does.
24
+ measures.each {|measure|measure.finalize_aggregation(aggregated)}
25
+ aggregated
26
+ end
27
+
28
+ end
29
29
  end
@@ -1,55 +1,55 @@
1
- module Cubicle
2
- module Data
3
- class Hierarchy < Cubicle::Data::Level
4
- include Member
5
-
6
- attr_reader :measures
7
- def initialize(root_dimension,measures)
8
- super(root_dimension)
9
- @measures = measures
10
- @member_name = name
11
- end
12
-
13
- def self.hierarchize_table(table, dimension_names=nil)
14
- dimension_names = [table.time_dimension_name || table.dimension_names].flatten if dimension_names.blank?
15
- Cubicle::Data::Hierarchy.extract_dimensions(dimension_names,table,table.dup)
16
- end
17
- private
18
-
19
- def self.extract_dimensions(dimension_names, data, table,parent_level=nil)
20
- data, dimension_names = data.dup, dimension_names.dup
21
-
22
- return data if dimension_names.blank?
23
-
24
- dim_name = dimension_names.shift
25
- dim = table.dimensions.find{|d|d.name==dim_name}
26
- level = parent_level ? Cubicle::Data::Level.new(dim,parent_level) : Cubicle::Data::Hierarchy.new(dim,data.measures)
27
- data.each do |tuple|
28
- member_name = tuple.delete(dim_name.to_s) || "Unknown"
29
- level[member_name] << tuple
30
- end
31
-
32
- level.each do |key,value|
33
- level[key] = Cubicle::Data::Hierarchy.extract_dimensions(dimension_names,value,table,level)
34
- end
35
-
36
- Cubicle::Data::Hierarchy.expand_time_dimension_if_required(level,table)
37
-
38
- level
39
- end
40
-
41
- def self.expand_time_dimension_if_required(data_level,table)
42
- return unless data_level.leaf_level? && table.time_dimension_name && table.time_dimension_name.to_s == data_level.name.to_s &&
43
- table.time_range && table.time_period
44
-
45
- table.time_range.by!(table.time_period)
46
-
47
- table.time_range.each do |date|
48
- formatted_date = date.to_cubicle(table.time_period)
49
- data_level[formatted_date] = [OrderedHashWithIndifferentAccess.new] unless data_level.include?(formatted_date)
50
- end
51
- data_level.keys.sort!
52
- end
53
- end
54
- end
55
- end
1
+ module Cubicle
2
+ module Data
3
+ class Hierarchy < Cubicle::Data::Level
4
+ include Member
5
+
6
+ attr_reader :measures
7
+ def initialize(root_dimension,measures)
8
+ super(root_dimension)
9
+ @measures = measures
10
+ @member_name = name
11
+ end
12
+
13
+ def self.hierarchize_table(table, dimension_names=nil)
14
+ dimension_names = [table.time_dimension_name || table.dimension_names].flatten if dimension_names.blank?
15
+ Cubicle::Data::Hierarchy.extract_dimensions(dimension_names,table,table.dup)
16
+ end
17
+ private
18
+
19
+ def self.extract_dimensions(dimension_names, data, table,parent_level=nil)
20
+ data, dimension_names = data.dup, dimension_names.dup
21
+
22
+ return data if dimension_names.blank?
23
+
24
+ dim_name = dimension_names.shift
25
+ dim = table.dimensions.find{|d|d.name==dim_name}
26
+ level = parent_level ? Cubicle::Data::Level.new(dim,parent_level) : Cubicle::Data::Hierarchy.new(dim,data.measures)
27
+ data.each do |tuple|
28
+ member_name = tuple.delete(dim_name.to_s) || "Unknown"
29
+ (level[member_name] ||= []) << tuple
30
+ end
31
+
32
+ level.each do |key,value|
33
+ level[key] = Cubicle::Data::Hierarchy.extract_dimensions(dimension_names,value,table,level)
34
+ end
35
+
36
+ Cubicle::Data::Hierarchy.expand_time_dimension_if_required(level,table)
37
+
38
+ level
39
+ end
40
+
41
+ def self.expand_time_dimension_if_required(data_level,table)
42
+ return unless data_level.leaf_level? && table.time_dimension_name && table.time_dimension_name.to_s == data_level.name.to_s &&
43
+ table.time_range && table.time_period
44
+
45
+ table.time_range.by!(table.time_period)
46
+
47
+ table.time_range.each do |date|
48
+ formatted_date = date.to_cubicle(table.time_period)
49
+ data_level[formatted_date] = [OrderedHashWithIndifferentAccess.new] unless data_level.include?(formatted_date)
50
+ end
51
+ data_level.keys.sort!
52
+ end
53
+ end
54
+ end
55
+ end
@@ -1,62 +1,61 @@
1
- module Cubicle
2
- module Data
3
- class Level < OrderedHashWithIndifferentAccess
4
-
5
- def initialize(dimension,parent_level=nil)
6
- @dimension = dimension
7
- @parent_level = parent_level
8
- super() {|hash,key|hash[key]=[]}#Always have an array freshly baked when strangers call
9
- end
10
-
11
- attr_reader :dimension, :parent_level
12
- attr_accessor :missing_member_default
13
-
14
- alias member_names keys
15
- alias members values
16
-
17
- def name
18
- @dimension.name
19
- end
20
-
21
- def flatten(member_name = nil, opts={}, &block)
22
-
23
- default_val = opts[:default] || @missing_member_default || 0
24
-
25
- self.values.inject([]) do |output, data|
26
- data.inject(output) do |flattened, value|
27
- value.missing_member_default = default_val if value.respond_to?(:missing_member_default)
28
-
29
- if block_given?
30
- flat_val = block.arity == 1 ? (yield value) : (value.instance_eval(&block))
31
- end
32
- flat_val ||= value[member_name] if member_name && value.include?(member_name)
33
- flat_val ||= default_val
34
- flattened << flat_val
35
- end
36
- end
37
- end
38
-
39
- def leaf_level?
40
- return self.length < 1 ||
41
- !self[self.keys[0]].is_a?(Cubicle::Data::Level)
42
- end
43
-
44
- def []=(key,val)
45
- prepare_level_member(val,key,self)
46
- super(key.to_s,val)
47
- end
48
-
49
- def hierarchy
50
- parent_level || self
51
- end
52
-
53
- private
54
- def prepare_level_member(member,member_name,parent_level)
55
- member.class_eval("include Cubicle::Data::Member")
56
- member.member_name = member_name
57
- member.parent_level = parent_level
58
- end
59
-
60
- end
61
- end
62
- end
1
+ module Cubicle
2
+ module Data
3
+ class Level < OrderedHashWithIndifferentAccess
4
+
5
+ def initialize(dimension,parent_level=nil)
6
+ @dimension = dimension
7
+ @parent_level = parent_level
8
+ super() {|hash,key|hash[key]=[]}#Always have an array freshly baked when strangers call
9
+ end
10
+
11
+ attr_reader :dimension, :parent_level
12
+ attr_accessor :missing_member_default
13
+
14
+ alias member_names keys
15
+ alias members values
16
+
17
+ def name
18
+ @dimension.name
19
+ end
20
+
21
+ def flatten(member_name = nil, opts={}, &block)
22
+
23
+ default_val = opts[:default] || @missing_member_default || 0
24
+
25
+ self.values.inject([]) do |output, data|
26
+ value = data.measure_values
27
+ value.missing_member_default = default_val if value.respond_to?(:missing_member_default)
28
+
29
+ if block_given?
30
+ flat_val = block.arity == 1 ? (yield value) : (value.instance_eval(&block))
31
+ end
32
+ flat_val ||= value[member_name] if member_name && value.include?(member_name)
33
+ flat_val ||= default_val
34
+ output << flat_val
35
+ end
36
+ end
37
+
38
+ def leaf_level?
39
+ return self.length < 1 ||
40
+ !self[self.keys[0]].is_a?(Cubicle::Data::Level)
41
+ end
42
+
43
+ def []=(key,val)
44
+ prepare_level_member(val,key,self)
45
+ super(key.to_s,val)
46
+ end
47
+
48
+ def hierarchy
49
+ parent_level || self
50
+ end
51
+
52
+ private
53
+ def prepare_level_member(member,member_name,parent_level)
54
+ member.class_eval("include Cubicle::Data::Member")
55
+ member.member_name = member_name
56
+ member.parent_level = parent_level
57
+ end
58
+
59
+ end
60
+ end
61
+ end
@@ -9,7 +9,7 @@ module Cubicle
9
9
  @time_dimension_name = query.time_dimension.name if query.respond_to?(:time_dimension) && query.time_dimension
10
10
  @time_period = query.time_period if query.respond_to?(:time_period)
11
11
  @time_range = query.time_range if query.respond_to?(:time_range)
12
- extract_data(query_results)
12
+ extract_data(query,query_results)
13
13
  @total_count = total_count if total_count
14
14
  end
15
15
 
@@ -45,16 +45,37 @@ module Cubicle
45
45
 
46
46
  private
47
47
 
48
- def extract_data(data)
48
+ def extract_data(query,data)
49
49
  data.each do |result|
50
50
  new = result.dup
51
51
  self << OrderedHashWithIndifferentAccess.new(new.delete("_id").merge(new.delete("value")))
52
- #these should be processed first, because they are often used as parts of the other calc measures
53
- measures.select{|m|m.distinct_count?}.each do |m|
54
- m.finalize_aggregation(self[-1])
52
+
53
+ finalize_aggregations(self[-1])
54
+
55
+ apply_aliases(query,self[-1])
56
+ end
57
+ end
58
+
59
+ def finalize_aggregations(row)
60
+ #these should be processed first, because they are often used as parts of the other calc measures
61
+ measures.select{|m|m.distinct_count?}.each do |m|
62
+ m.finalize_aggregation(row)
63
+ end
64
+ measures.select{|m|!m.distinct_count?}.each do |m|
65
+ m.finalize_aggregation(row)
66
+ end
67
+ end
68
+
69
+ def apply_aliases(query,row)
70
+ members = query.dimensions + query.measures
71
+ members.select{|m|m.alias_list}.each do |m|
72
+ m.alias_list.each do |m_alias|
73
+ row[m_alias.to_s] = row[m.name.to_s]
55
74
  end
56
- measures.select{|m|!m.distinct_count?}.each do |m|
57
- m.finalize_aggregation(self[-1])
75
+ end
76
+ if (query.respond_to?(:query_aliases) && query.query_aliases)
77
+ query.query_aliases.each do |key,value|
78
+ row[key.to_s] = row[value.to_s]
58
79
  end
59
80
  end
60
81
  end
@@ -1,7 +1,6 @@
1
1
  class OrderedHashWithIndifferentAccess < OrderedHash
2
- def initialize(initial_data={},&block)
2
+ def initialize(initial_data={})
3
3
  merge!(initial_data.stringify_keys)
4
- super(&block) if block
5
4
  end
6
5
 
7
6
 
@@ -2,7 +2,7 @@ module Cubicle
2
2
  class Query
3
3
  include Dsl
4
4
 
5
- attr_reader :time_period, :transient, :aggregation, :named_expressions
5
+ attr_reader :time_period, :transient, :aggregation, :named_expressions, :query_aliases
6
6
  attr_accessor :source_collection_name
7
7
 
8
8
  def initialize(aggregation)
@@ -104,7 +104,9 @@ module Cubicle
104
104
 
105
105
  def convert_dimension(dimension)
106
106
  return dimension if transient?
107
- Cubicle::Dimension.new(dimension.name, :expression=>"this._id.#{dimension.name}")
107
+ d = Cubicle::Dimension.new(dimension.name, :expression=>"this._id.#{dimension.name}")
108
+ d.alias_list = dimension.alias_list
109
+ d
108
110
  end
109
111
 
110
112
  def convert_measure(measure)
@@ -133,7 +135,9 @@ module Cubicle
133
135
  count_field = expression + "_count"
134
136
  expression = "#{expression}*#{count_field}"
135
137
  end
136
- Cubicle::Measure.new(measure.name, :expression=>expression,:aggregation_method=>aggregation, :distinct=>measure.distinct_count?)
138
+ m = Cubicle::Measure.new(measure.name, :expression=>expression,:aggregation_method=>aggregation, :distinct=>measure.distinct_count?)
139
+ m.alias_list = measure.alias_list
140
+ m
137
141
  end
138
142
 
139
143
  def unalias(*name_or_names)
@@ -1,3 +1,3 @@
1
1
  module Cubicle
2
- VERSION = '0.1.24'
2
+ VERSION = '0.1.25'
3
3
  end