cubicle 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. data/CHANGELOG.rdoc +14 -0
  2. data/README.rdoc +188 -174
  3. data/cubicle.gemspec +26 -10
  4. data/lib/cubicle.rb +47 -422
  5. data/lib/cubicle/aggregation.rb +58 -7
  6. data/lib/cubicle/aggregation/ad_hoc.rb +12 -0
  7. data/lib/cubicle/aggregation/aggregation_manager.rb +212 -0
  8. data/lib/cubicle/aggregation/dsl.rb +108 -0
  9. data/lib/cubicle/aggregation/map_reduce_helper.rb +55 -0
  10. data/lib/cubicle/data.rb +29 -84
  11. data/lib/cubicle/data/hierarchy.rb +55 -0
  12. data/lib/cubicle/data/level.rb +62 -0
  13. data/lib/cubicle/data/member.rb +28 -0
  14. data/lib/cubicle/data/table.rb +56 -0
  15. data/lib/cubicle/measure.rb +30 -20
  16. data/lib/cubicle/mongo_mapper/aggregate_plugin.rb +1 -1
  17. data/lib/cubicle/ordered_hash_with_indifferent_access.rb +27 -0
  18. data/lib/cubicle/query.rb +21 -194
  19. data/lib/cubicle/query/dsl.rb +118 -0
  20. data/lib/cubicle/query/dsl/time_intelligence.rb +89 -0
  21. data/lib/cubicle/ratio.rb +28 -12
  22. data/lib/cubicle/version.rb +2 -2
  23. data/test/cubicle/aggregation/ad_hoc_test.rb +21 -0
  24. data/test/cubicle/cubicle_aggregation_test.rb +84 -20
  25. data/test/cubicle/cubicle_query_test.rb +36 -0
  26. data/test/cubicle/data/data_test.rb +30 -0
  27. data/test/cubicle/data/level_test.rb +42 -0
  28. data/test/cubicle/data/member_test.rb +40 -0
  29. data/test/cubicle/{cubicle_data_test.rb → data/table_test.rb} +50 -50
  30. data/test/cubicle/duration_test.rb +46 -48
  31. data/test/cubicle/ordered_hash_with_indifferent_access_test.rb +19 -0
  32. data/test/cubicles/defect_cubicle.rb +31 -31
  33. data/test/log/test.log +102066 -0
  34. metadata +26 -10
  35. data/lib/cubicle/data_level.rb +0 -60
  36. data/test/cubicle/cubicle_data_level_test.rb +0 -58
  37. data/test/cubicle/cubicle_test.rb +0 -85
@@ -0,0 +1,118 @@
1
+ module Cubicle
2
+ class Query
3
+ module Dsl
4
+ include TimeIntelligence
5
+ def select_all
6
+ select :all_dimensions, :all_measures
7
+ end
8
+
9
+ def transient!
10
+ @transient = true
11
+ @source_collection_name = nil
12
+ end
13
+
14
+ def select(*args)
15
+ args = unalias(args[0].is_a?(Array) ? args[0] : args)
16
+ if (args.include?(:all))
17
+ select_all
18
+ return
19
+ end
20
+
21
+ if (args.include?(:all_measures))
22
+ @all_measures = true
23
+ @measures = Cubicle::MemberList.new
24
+ end
25
+ if (args.include?(:all_dimensions))
26
+ @all_dimensions = true
27
+ @dimensions = Cubicle::MemberList.new
28
+ end
29
+
30
+ return if args.length == 1 && selected?(args[0])
31
+
32
+ found=[:all_measures,:all_dimensions]
33
+
34
+ if args.length == 1 && !all_dimensions? && args[0].is_a?(Cubicle::Dimension)
35
+ @dimensions << convert_dimension(args.pop)
36
+ elsif args.length == 1 && !all_measures? && args[0].is_a?(Cubicle::Measure)
37
+ @measures << convert_measure(args.pop)
38
+ else
39
+ #remove from the list any dimensions or measures that are already
40
+ #selected. This allows select to be idempotent,
41
+ #which is useful for ensuring certain members are selected
42
+ #even though the user may already have selected them previously
43
+ args.each do |member_name|
44
+ if (member = @aggregation.dimensions[member_name])
45
+ @dimensions << convert_dimension(member)
46
+ elsif (member = @aggregation.measures[member_name])
47
+ @measures << convert_measure(member)
48
+ end
49
+ found << member_name if member || selected?(member_name)
50
+ end
51
+ end
52
+ args = args - found
53
+ raise "You selected one or more members that do not exist in the underlying data source:#{args.inspect}" unless args.blank?
54
+ self
55
+ end
56
+
57
+ def limit(in_limit = nil)
58
+ return @limit unless in_limit
59
+ @limit = in_limit
60
+ return self
61
+ end
62
+
63
+ def offset(in_offset = nil)
64
+ return @offset unless in_offset
65
+ @offset = in_offset
66
+ return self
67
+ end
68
+ alias skip offset
69
+
70
+ def by(*args)
71
+ return @by unless args.length > 0
72
+ #Resolve any query level aliases
73
+ args = unalias(args)
74
+ #We'll need these in the result set
75
+ select *args
76
+ #replace any alias names with actual member names
77
+ @by = args.map{|member_name|@aggregation.find_member(member_name).name}
78
+ return if @time_dimension #If a time dimension has been explicitly specified, the following isn't helpful.
79
+
80
+ #Now let's see if we can find ourselves a time dimension
81
+ if (@aggregation.time_dimension && time_dimension.included_in?(args))
82
+ time_dimension(@aggregation.time_dimension)
83
+ else
84
+ args.each do |by_member|
85
+ if (detected = detect_time_period by_member)
86
+ time_dimension by_member
87
+ @time_period = detected
88
+ break
89
+ end
90
+ end
91
+ end
92
+ end
93
+
94
+ def order_by(*args)
95
+ return @order_by unless args.length > 0
96
+ args.each do |order|
97
+ @order_by << (order.is_a?(Array) ? [unalias(order[0]),order[1]] : [unalias(order),:asc])
98
+ end
99
+ self
100
+ end
101
+
102
+ def where(filter = nil)
103
+ return prepare_filter unless filter
104
+ filter.each do |key,value|
105
+ (@where ||= {})[unalias(key)] = value
106
+ end
107
+ self
108
+ end
109
+
110
+ def alias_member(alias_hash)
111
+ alias_hash.each {|key,value|@query_aliases[value] = key}
112
+ self
113
+ end
114
+ alias alias_members alias_member
115
+ end
116
+ end
117
+
118
+ end
@@ -0,0 +1,89 @@
1
+ module Cubicle
2
+ class Query
3
+ module Dsl
4
+ module TimeIntelligence
5
+ def time_range(date_range = nil)
6
+ return nil unless date_range || @from_date || @to_date
7
+ unless date_range
8
+ start,stop = @from_date || Time.now, @to_date || Time.now
9
+ return @to_date_filter=="$lte" ? start..stop : start...stop
10
+ end
11
+
12
+ @to_date_filter = date_range.exclude_end? ? "$lt" : "$lte"
13
+ @from_date, @to_date = date_range.first, date_range.last if date_range
14
+ end
15
+
16
+ def time_dimension(dimension = nil)
17
+ return (@time_dimension ||= @aggregation.time_dimension) unless dimension
18
+ @time_dimension = dimension.is_a?(Cubicle::Dimension) ? dimension : @aggregation.dimensions[unalias(dimension)]
19
+ raise "No dimension matching the name #{unalias(dimension)} could be found in the underlying data source" unless @time_dimension
20
+ #select @time_dimension unless selected?(dimension)
21
+ end
22
+ alias date_dimension time_dimension
23
+
24
+ def last(duration,as_of = Time.now)
25
+ duration = 1.send(duration) if [:year,:month,:week,:day].include?(duration)
26
+ period = duration.parts[0][0]
27
+ @from_date = duration.ago(as_of).advance(period=>1)
28
+ @to_date = as_of
29
+ end
30
+ alias for_the_last last
31
+
32
+ def last_complete(duration,as_of = Time.now)
33
+ duration = 1.send(duration) if [:year,:month,:week,:day].include?(duration)
34
+ period = duration.parts[0][0]
35
+ @to_date = as_of.beginning_of(period)
36
+ @from_date = duration.ago(@to_date)
37
+ @to_date_filter = "$lt"
38
+ end
39
+ alias for_the_last_complete last_complete
40
+
41
+ def next(duration,as_of = Time.now)
42
+ duration = 1.send(duration) if [:year,:month,:week,:day].include?(duration)
43
+ period = duration.parts[0][0]
44
+ @to_date = duration.from_now(as_of).advance(period=>-1)
45
+ @from_date = as_of
46
+ end
47
+ alias for_the_next next
48
+
49
+ def this(period,as_of = Time.now)
50
+ @from_date = as_of.beginning_of(period)
51
+ @to_date = as_of
52
+ self
53
+ end
54
+
55
+ def from(time = nil)
56
+ return @from_date unless time
57
+ @from_date = if time.is_a?(Symbol)
58
+ Time.send(time) if Time.respond_to?(time)
59
+ Date.send(time).to_time if Date.respond_to?(time)
60
+ else
61
+ time.to_time
62
+ end
63
+ self
64
+ end
65
+
66
+ def until(time = nil)
67
+ return @to_date unless time
68
+ @to_date = if time.is_a?(Symbol)
69
+ Time.send(time) if Time.respond_to?(time)
70
+ Date.send(time).to_time if Date.respond_to?(time)
71
+ else
72
+ time.to_time
73
+ end
74
+ self
75
+ end
76
+
77
+ def ytd(as_of = Time.now)
78
+ this :year, as_of
79
+ end
80
+ alias year_to_date ytd
81
+
82
+ def mtd(as_of = Time.now)
83
+ this :month, as_of
84
+ end
85
+ alias month_to_date mtd
86
+ end
87
+ end
88
+ end
89
+ end
data/lib/cubicle/ratio.rb CHANGED
@@ -1,12 +1,28 @@
1
- module Cubicle
2
- class Ratio < CalculatedMeasure
3
-
4
- attr_reader :numerator, :denominator
5
- def initialize(member_name,numerator,denominator,opts={})
6
- @numerator, @denominator = numerator, denominator
7
- opts[:expression]="(value.#{denominator} > 0 && value.#{numerator} ? value.#{numerator}/value.#{denominator} : 0)"
8
- super(member_name,opts)
9
- end
10
-
11
- end
12
- end
1
+ module Cubicle
2
+ class Ratio < CalculatedMeasure
3
+
4
+ attr_reader :numerator, :denominator
5
+ def initialize(member_name,numerator,denominator,opts={})
6
+ @numerator, @denominator = numerator, denominator
7
+ opts[:expression]="(value.#{denominator} > 0 && value.#{numerator} ? value.#{numerator}/value.#{denominator} : 0)"
8
+ super(member_name,opts)
9
+ end
10
+
11
+ def aggregate(values)
12
+ 0
13
+ end
14
+
15
+ def finalize_aggregation(aggregation)
16
+ n = aggregation[numerator].to_f
17
+ d = aggregation[denominator].to_f
18
+
19
+ #If the numerator is greater than zero, when we'll do the division
20
+ #even if d is zero. This will result in a NaN, which indicates something
21
+ #wrong with the data, which is fine. However, if the numerator is zero,
22
+ #then maybe there just isn't any data, in which case NaN is pretty pessimistic -
23
+ #we'll return 0 instead in this case.
24
+ aggregation[name] = n > 0 ? n/d : 0
25
+ end
26
+
27
+ end
28
+ end
@@ -1,3 +1,3 @@
1
- module Cubicle
2
- VERSION = '0.1.2'
1
+ module Cubicle
2
+ VERSION = '0.1.3'
3
3
  end
@@ -0,0 +1,21 @@
1
+ require "test_helper"
2
+
3
+ class AdHocTest < ActiveSupport::TestCase
4
+ context "Executing an ad hoc query via an aggregation" do
5
+ setup do
6
+ Defect.create_test_data
7
+ @results = Cubicle::Aggregation::AdHoc.new("defects") do
8
+ dimension :product, :field_name=>"product.name"
9
+ count :total, :field_name=>"defect_id"
10
+ end.query
11
+ end
12
+ should "return appropriately aggregated data" do
13
+ assert_equal "Brush Fire Bottle Rockets", @results[0]["product"]
14
+ assert_equal 1, @results[0]["total"]
15
+ assert_equal "Evil's Pickling Spice", @results[1]["product"]
16
+ assert_equal 1, @results[1]["total"]
17
+ assert_equal "Sad Day Moonshine", @results[2]["product"]
18
+ assert_equal 3, @results[2]["total"]
19
+ end
20
+ end
21
+ end
@@ -1,21 +1,85 @@
1
- require "test_helper"
2
-
3
- class AggregationTest < ActiveSupport::TestCase
4
- context "Executing an ad hoc query via an aggregation" do
5
- setup do
6
- Defect.create_test_data
7
- @results = Cubicle::Aggregation.new("defects") do
8
- dimension :product, :field_name=>"product.name"
9
- count :total, :field_name=>"defect_id"
10
- end.query
11
- end
12
- should "return appropriately aggregated data" do
13
- assert_equal "Brush Fire Bottle Rockets", @results[0]["product"]
14
- assert_equal 1, @results[0]["total"]
15
- assert_equal "Evil's Pickling Spice", @results[1]["product"]
16
- assert_equal 1, @results[1]["total"]
17
- assert_equal "Sad Day Moonshine", @results[2]["product"]
18
- assert_equal 3, @results[2]["total"]
19
- end
20
- end
1
+ require "test_helper"
2
+
3
+ class CubicleAggregationTest < ActiveSupport::TestCase
4
+
5
+ context "Given a query with several dimensions and measures" do
6
+ context "Cubicle#select" do
7
+ setup do
8
+ Defect.create_test_data
9
+ end
10
+ context "without arguments" do
11
+ setup do
12
+ @results = DefectCubicle.query
13
+ end
14
+ should "return a collection of appropriate aggregated values based on the cubicle parameters" do
15
+ assert_equal 4, @results.length
16
+
17
+ assert_equal "2009-12-09", @results[0]["manufacture_date"]
18
+ assert_equal "2009-12", @results[0]["month"]
19
+ assert_equal "2009", @results[0]["year"]
20
+ assert_equal "Brush Fire Bottle Rockets", @results[0]["product"]
21
+ assert_equal "South", @results[0]["region"]
22
+ assert_equal "Buddy", @results[0]["operator"]
23
+ assert_equal "Repaired", @results[0]["outcome"]
24
+ assert_equal 1, @results[0]["total_defects"]
25
+ assert_equal 0, @results[0]["preventable_defects"]
26
+ assert_equal 0.43, @results[0]["total_cost"]
27
+ assert_equal 0.43, @results[0]["avg_cost"]
28
+ assert_equal 0, @results[0]["preventable_pct"]
29
+
30
+ assert_equal "2010-01-01", @results[1]["manufacture_date"]
31
+ assert_equal "2010-01", @results[1]["month"]
32
+ assert_equal "2010", @results[1]["year"]
33
+ assert_equal "Sad Day Moonshine", @results[1]["product"]
34
+ assert_equal "West", @results[1]["region"]
35
+ assert_equal "Franny", @results[1]["operator"]
36
+ assert_equal "Repaired", @results[1]["outcome"]
37
+ assert_equal 2, @results[1]["total_defects"]
38
+ assert_equal 1, @results[1]["preventable_defects"]
39
+ assert_in_delta 12.97, @results[1]["total_cost"], 0.0001
40
+ assert_in_delta 6.485, @results[1]["avg_cost"],0.0001
41
+ assert_equal 0.5, @results[1]["preventable_pct"]
42
+
43
+ assert_equal "2010-01-05", @results[2]["manufacture_date"]
44
+ assert_equal "2010-01", @results[2]["month"]
45
+ assert_equal "2010", @results[2]["year"]
46
+ assert_equal "Evil's Pickling Spice", @results[2]["product"]
47
+ assert_equal "Midwest", @results[2]["region"]
48
+ assert_equal "Seymour", @results[2]["operator"]
49
+ assert_equal "Discarded", @results[2]["outcome"]
50
+ assert_equal 1, @results[2]["total_defects"]
51
+ assert_equal 1, @results[2]["preventable_defects"]
52
+ assert_equal 0.02, @results[2]["total_cost"]
53
+ assert_equal 0.02, @results[2]["avg_cost"]
54
+ assert_equal 1, @results[2]["preventable_pct"]
55
+
56
+ assert_equal "2010-02-01", @results[3]["manufacture_date"]
57
+ assert_equal "2010-02", @results[3]["month"]
58
+ assert_equal "2010", @results[3]["year"]
59
+ assert_equal "Sad Day Moonshine", @results[3]["product"]
60
+ assert_equal "West", @results[3]["region"]
61
+ assert_equal "Zooey", @results[3]["operator"]
62
+ assert_equal "Consumed", @results[3]["outcome"]
63
+ assert_equal 1, @results[3]["total_defects"]
64
+ assert_equal 1, @results[3]["preventable_defects"]
65
+ assert_equal 2.94, @results[3]["total_cost"]
66
+ assert_equal 2.94, @results[3]["avg_cost"]
67
+ assert_equal 1, @results[3]["preventable_pct"]
68
+ end
69
+ end
70
+
71
+ context "Processing a cube" do
72
+ setup do
73
+ DefectCubicle.expire!
74
+ DefectCubicle.process
75
+ end
76
+ should "should create the specified aggregations" do
77
+ assert Cubicle.mongo.database.collection_names.include? "defect_cubicles_cubicle_aggregation_month.product.year"
78
+ assert Cubicle.mongo.database.collection_names.include? "defect_cubicles_cubicle_aggregation_month.region"
79
+ end
80
+
81
+ end
82
+ end
83
+ end
84
+
21
85
  end
@@ -351,5 +351,41 @@ class CubicleQueryTest < ActiveSupport::TestCase
351
351
  Cubicle::DateTime.db_time_format = :iso8601
352
352
  end
353
353
  end
354
+ context "when a query level alias has been specified" do
355
+ should "respect the alias in the by clause" do
356
+ query_results = DefectCubicle.query do
357
+ alias_member :date=>:my_crazy_date
358
+ select :all_measures
359
+ by :my_crazy_date
360
+ end
361
+ assert_equal :manufacture_date, query_results.name
362
+ assert_equal "2009-12-09", query_results.member_names[0]
363
+ end
364
+ should "respect the alias in the where clause" do
365
+ results = DefectCubicle.query do
366
+ alias_member :product=>:my_crazy_product
367
+ select :product, :all_measures
368
+ where :my_crazy_product=>"Sad Day Moonshine"
369
+ end
370
+ assert_equal 1, results.length
371
+ assert_equal "Sad Day Moonshine", results[0]["product"]
372
+ assert_equal 3, results[0]["total_defects"]
373
+ assert_equal 2, results[0]["preventable_defects"]
374
+ assert_in_delta 15.91, results[0]["total_cost"],0.0001
375
+ assert_in_delta 15.91/3, results[0]["avg_cost"],0.0001
376
+ assert_in_delta 2/3.0, results[0]["preventable_pct"],0.0001
377
+ end
378
+ should "respect the alias in the order by clause" do
379
+ results = DefectCubicle.query do
380
+ alias_member :product=>:my_crazy_product
381
+ select :product,:all_measures
382
+ order_by [:my_crazy_product, :desc]
383
+ end
384
+ assert_equal 3, results.length
385
+ assert_equal "Sad Day Moonshine", results[0]["product"]
386
+ assert_equal "Evil's Pickling Spice", results[1]["product"]
387
+ assert_equal "Brush Fire Bottle Rockets", results[2]["product"]
388
+ end
389
+ end
354
390
  end
355
391
  end
@@ -0,0 +1,30 @@
1
+ require "test_helper"
2
+
3
+ class DataTest < ActiveSupport::TestCase
4
+ context "Data#aggregate" do
5
+ should "Aggregate a given table of numbers according to the provided measures" do
6
+ #[{"m1"=>1.0, "m2"=>1.0, "m3"=>1.0}, {"m1"=>2.0, "m2"=>3.0, "m3"=>4.0}, {"m1"=>3.0, "m2"=>5.0, "m3"=>7.0}]
7
+ data=3.times.inject([]) do |set,index|
8
+ set << HashWithIndifferentAccess.new({:m1=>1.0 * index+1, :m2=>2.0 * index+1, :m3=>3.0 * index+1})
9
+ end
10
+ measures = [Cubicle::Measure.new(:m1, :aggregation_method=>:sum),
11
+ Cubicle::Measure.new(:m2, :aggregation_method=>:count),
12
+ Cubicle::Measure.new(:m3, :aggregation_method=>:average)]
13
+ aggregation = Cubicle::Data.aggregate(data,measures)
14
+ assert_equal 6.0, aggregation[:m1]
15
+ assert_equal 9.0, aggregation[:m2]
16
+ assert_equal 4.0, aggregation[:m3]
17
+ end
18
+ should "Finalize the aggregation" do
19
+ #[{"m1"=>1.0, "m2"=>1.0}, {"m1"=>2.0, "m2"=>3.0}, {"m1"=>3.0, "m2"=>5.0}]
20
+ data=3.times.inject([]) do |set,index|
21
+ set << HashWithIndifferentAccess.new({:m1=>1.0 * index+1, :m2=>2.0 * index+1})
22
+ end
23
+ measures = [Cubicle::Measure.new(:m1, :aggregation_method=>:sum),
24
+ Cubicle::Measure.new(:m2, :aggregation_method=>:count),
25
+ Cubicle::Ratio.new(:m3, :m1, :m2)]
26
+ aggregation = Cubicle::Data.aggregate(data,measures)
27
+ assert_equal 6.0/9.0, aggregation[:m3]
28
+ end
29
+ end
30
+ end