activewarehouse 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/README +27 -14
  2. data/Rakefile +16 -5
  3. data/doc/references.txt +4 -0
  4. data/generators/bridge/templates/migration.rb +9 -2
  5. data/generators/bridge/templates/unit_test.rb +8 -0
  6. data/generators/date_dimension/USAGE +1 -0
  7. data/generators/date_dimension/date_dimension_generator.rb +16 -0
  8. data/generators/date_dimension/templates/fixture.yml +5 -0
  9. data/generators/date_dimension/templates/migration.rb +31 -0
  10. data/generators/date_dimension/templates/model.rb +3 -0
  11. data/generators/date_dimension/templates/unit_test.rb +8 -0
  12. data/generators/dimension/templates/migration.rb +1 -10
  13. data/generators/dimension_view/dimension_view_generator.rb +2 -2
  14. data/generators/dimension_view/templates/migration.rb +8 -2
  15. data/generators/fact/templates/migration.rb +2 -0
  16. data/generators/time_dimension/USAGE +1 -0
  17. data/generators/time_dimension/templates/fixture.yml +5 -0
  18. data/generators/time_dimension/templates/migration.rb +12 -0
  19. data/generators/time_dimension/templates/model.rb +3 -0
  20. data/generators/time_dimension/templates/unit_test.rb +8 -0
  21. data/generators/time_dimension/time_dimension_generator.rb +14 -0
  22. data/lib/active_warehouse.rb +13 -2
  23. data/lib/active_warehouse/aggregate.rb +54 -253
  24. data/lib/active_warehouse/aggregate/dwarf/node.rb +36 -0
  25. data/lib/active_warehouse/aggregate/dwarf_aggregate.rb +369 -0
  26. data/lib/active_warehouse/aggregate/dwarf_common.rb +44 -0
  27. data/lib/active_warehouse/aggregate/dwarf_printer.rb +34 -0
  28. data/lib/active_warehouse/aggregate/no_aggregate.rb +194 -0
  29. data/lib/active_warehouse/aggregate/pid_aggregate.rb +29 -0
  30. data/lib/active_warehouse/aggregate/pipelined_rolap_aggregate.rb +129 -0
  31. data/lib/active_warehouse/aggregate/rolap_aggregate.rb +181 -0
  32. data/lib/active_warehouse/aggregate/rolap_common.rb +89 -0
  33. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_1.sql +12 -0
  34. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_10.sql +7166 -0
  35. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_11.sql +14334 -0
  36. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_12.sql +28670 -0
  37. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_13.sql +57342 -0
  38. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_2.sql +26 -0
  39. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_3.sql +54 -0
  40. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_4.sql +110 -0
  41. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_5.sql +222 -0
  42. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_6.sql +446 -0
  43. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_7.sql +894 -0
  44. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_8.sql +1790 -0
  45. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_9.sql +3582 -0
  46. data/lib/active_warehouse/aggregate_field.rb +49 -0
  47. data/lib/active_warehouse/{dimension/bridge.rb → bridge.rb} +7 -3
  48. data/lib/active_warehouse/bridge/hierarchy_bridge.rb +46 -0
  49. data/lib/active_warehouse/builder.rb +2 -1
  50. data/lib/active_warehouse/builder/date_dimension_builder.rb +5 -2
  51. data/lib/active_warehouse/builder/generator/generator.rb +13 -0
  52. data/lib/active_warehouse/builder/generator/name_generator.rb +20 -0
  53. data/lib/active_warehouse/builder/generator/paragraph_generator.rb +11 -0
  54. data/lib/active_warehouse/builder/random_data_builder.rb +21 -11
  55. data/lib/active_warehouse/builder/test_data_builder.rb +54 -0
  56. data/lib/active_warehouse/calculated_field.rb +27 -0
  57. data/lib/active_warehouse/compat/compat.rb +4 -4
  58. data/lib/active_warehouse/cube.rb +126 -225
  59. data/lib/active_warehouse/cube_query_result.rb +69 -0
  60. data/lib/active_warehouse/dimension.rb +64 -29
  61. data/lib/active_warehouse/dimension/date_dimension.rb +15 -0
  62. data/lib/active_warehouse/dimension/dimension_reflection.rb +21 -0
  63. data/lib/active_warehouse/dimension/dimension_view.rb +17 -2
  64. data/lib/active_warehouse/dimension/hierarchical_dimension.rb +43 -5
  65. data/lib/active_warehouse/dimension/slowly_changing_dimension.rb +22 -12
  66. data/lib/active_warehouse/fact.rb +119 -40
  67. data/lib/active_warehouse/field.rb +74 -0
  68. data/lib/active_warehouse/ordered_hash.rb +34 -0
  69. data/lib/active_warehouse/prejoin_fact.rb +97 -0
  70. data/lib/active_warehouse/report/abstract_report.rb +40 -14
  71. data/lib/active_warehouse/report/chart_report.rb +3 -3
  72. data/lib/active_warehouse/report/table_report.rb +8 -3
  73. data/lib/active_warehouse/version.rb +1 -1
  74. data/lib/active_warehouse/view/report_helper.rb +144 -34
  75. data/tasks/active_warehouse_tasks.rake +28 -10
  76. metadata +107 -30
@@ -0,0 +1,49 @@
1
+ module ActiveWarehouse
2
+ # Encapsulates a fact column in a fact table. These fields
3
+ # represent columns that should be aggregated.
4
+ class AggregateField < Field
5
+
6
+ attr_reader :strategy_name
7
+
8
+ # +fact_class+ is the class of the fact table this field is found in.
9
+ # +column_definition+ is the ActiveRecord ColumnDefinition instance for this
10
+ # column.
11
+ # +strategy_name+ is the name of th aggregation strategy to be used, defaults to :sum
12
+ # +field_options+ is a hash of raw options from the original aggregate definition.
13
+ def initialize(fact_class, column_definition, strategy_name = :sum, field_options = {})
14
+ super(fact_class, column_definition.name, column_definition.type, field_options)
15
+ @column_definition = column_definition
16
+ @limit = column_definition.limit
17
+ @scale = column_definition.scale
18
+ @precision = column_definition.precision
19
+ @strategy_name = strategy_name
20
+ end
21
+
22
+ # delegates to owning_class, returns the Fact that has this field
23
+ def fact_class
24
+ owning_class
25
+ end
26
+
27
+ def is_semiadditive?
28
+ !field_options[:semiadditive].nil?
29
+ end
30
+
31
+ # returns the Dimension that this semiadditive fact is over
32
+ def semiadditive_over
33
+ Dimension.to_dimension(field_options[:semiadditive])
34
+ end
35
+
36
+ # overrides Field.label, prepending the aggregation strategy name to label
37
+ def label
38
+ @label ? @label : "#{super}_#{strategy_name}"
39
+ end
40
+
41
+ def levels_from_parent
42
+ field_options[:levels_from_parent].nil? ? [] : field_options[:levels_from_parent]
43
+ end
44
+
45
+ def type_cast(value)
46
+ @column_definition.type_cast(value)
47
+ end
48
+ end
49
+ end
@@ -2,9 +2,11 @@ module ActiveWarehouse #:nodoc
2
2
  # Implements a bridge table.
3
3
  class Bridge < ActiveRecord::Base
4
4
  class << self
5
- # Get the table name. By default the table name will be the name of the bridge in singular form.
5
+ # Get the table name. By default the table name will be the name of the
6
+ # bridge in singular form.
6
7
  #
7
- # Example: DepartmentHierarchyBridge will have a table called department_hierarchy_bridge
8
+ # Example: DepartmentHierarchyBridge will have a table called
9
+ # department_hierarchy_bridge
8
10
  def table_name
9
11
  name = self.name.demodulize.underscore
10
12
  set_table_name(name)
@@ -12,4 +14,6 @@ module ActiveWarehouse #:nodoc
12
14
  end
13
15
  end
14
16
  end
15
- end
17
+ end
18
+
19
+ require 'active_warehouse/bridge/hierarchy_bridge'
@@ -0,0 +1,46 @@
1
+ module ActiveWarehouse #:nodoc:
2
+ # Bridge class that models ragged hierarchies.
3
+ class HierarchyBridge < Bridge
4
+ class << self
5
+ def set_levels_from_parent(name)
6
+ @levels_from_parent = name
7
+ end
8
+
9
+ def levels_from_parent
10
+ @levels_from_parent ||= "levels_from_parent"
11
+ end
12
+
13
+ def set_effective_date(name)
14
+ @effective_date = name
15
+ end
16
+
17
+ def effective_date
18
+ @effective_date ||= "effective_date"
19
+ end
20
+
21
+ def set_expiration_date(name)
22
+ @expiration_date = name
23
+ end
24
+
25
+ def expiration_date
26
+ @expiration_date ||= "expiration_date"
27
+ end
28
+
29
+ def set_top_flag(name)
30
+ @top_flag = name
31
+ end
32
+
33
+ def top_flag
34
+ @top_flag ||= "top_flag"
35
+ end
36
+
37
+ def set_top_flag_value(value)
38
+ @top_flag_value = value
39
+ end
40
+
41
+ def top_flag_value
42
+ @top_flag_value ||= 'Y'
43
+ end
44
+ end
45
+ end
46
+ end
@@ -1,2 +1,3 @@
1
1
  require 'active_warehouse/builder/date_dimension_builder'
2
- require 'active_warehouse/builder/random_data_builder'
2
+ require 'active_warehouse/builder/random_data_builder'
3
+ require 'active_warehouse/builder/test_data_builder'
@@ -30,8 +30,8 @@ module ActiveWarehouse #:nodoc:
30
30
  # accessed by name.
31
31
  def build(options={})
32
32
  records = []
33
- date = start_date
34
- while date <= end_date
33
+ date = start_date.to_time
34
+ while date <= end_date.to_time
35
35
  record = {}
36
36
  record[:date] = date.strftime("%m/%d/%Y")
37
37
  record[:full_date_description] = date.strftime("%B %d,%Y")
@@ -52,6 +52,7 @@ module ActiveWarehouse #:nodoc:
52
52
  record[:calendar_month_number_in_year] = date.month
53
53
  record[:calendar_year_month] = date.strftime("%Y-%m")
54
54
  record[:calendar_quarter] = "Q#{date.quarter}"
55
+ record[:calendar_quarter_number_in_year] = date.quarter
55
56
  record[:calendar_year_quarter] = "#{date.strftime('%Y')}-#{record[:calendar_quarter]}"
56
57
  #record[:calendar_half_year] =
57
58
  record[:calendar_year] = "#{date.year}"
@@ -62,8 +63,10 @@ module ActiveWarehouse #:nodoc:
62
63
  record[:fiscal_year_month] = "FY#{date.fiscal_year}-" + date.fiscal_year_month.to_s.rjust(2, '0')
63
64
  record[:fiscal_quarter] = "FY Q#{date.fiscal_year_quarter}"
64
65
  record[:fiscal_year_quarter] = "FY#{date.fiscal_year}-Q#{date.fiscal_year_quarter}"
66
+ record[:fiscal_year_quarter_number] = date.fiscal_year_quarter
65
67
  #record[:fiscal_half_year] =
66
68
  record[:fiscal_year] = "FY#{date.fiscal_year}"
69
+ record[:fiscal_year_number] = date.fiscal_year
67
70
  record[:holiday_indicator] = holiday_indicators.include?(date) ? 'Holiday' : 'Nonholiday'
68
71
  record[:weekday_indicator] = weekday_indicators[date.wday]
69
72
  record[:selling_season] = 'None'
@@ -0,0 +1,13 @@
1
+ module ActiveWarehouse
2
+ module Builder
3
+ module Generator
4
+ # Base class for generators
5
+ class Generator
6
+ # Get the next value from the generator.
7
+ def next(options={})
8
+ raise "Abstract method"
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,20 @@
1
+ module ActiveWarehouse #:nodoc:
2
+ module Builder #:nodoc:
3
+ module Generator #:nodoc:
4
+ # Generate a name consisting of one or more words from word groups
5
+ class NameGenerator < ActiveWarehouse::Builder::Generator::Generator
6
+ def next(options={})
7
+ options[:separator] ||= ' '
8
+ parts = []
9
+ word_groups = options[:word_groups]
10
+ 0.upto(word_groups.first.length) do |i|
11
+ word_groups.each do |word_group|
12
+ parts << word_group[i]
13
+ end
14
+ end
15
+ parts.join(options[:separator])
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,11 @@
1
+ module ActiveWarehouse
2
+ module Builder
3
+ module Generator
4
+ class ParagraphGenerator < ActiveWarehouse::Builder::Generator::Generator
5
+ def next(options={})
6
+
7
+ end
8
+ end
9
+ end
10
+ end
11
+ end
@@ -8,6 +8,7 @@ module ActiveWarehouse #:nodoc:
8
8
  # Hash of names mapped to generators where the name is the column name
9
9
  attr_reader :column_generators
10
10
 
11
+ # Initialize the random data builder
11
12
  def initialize
12
13
  @generators = {
13
14
  Fixnum => FixnumGenerator.new,
@@ -20,6 +21,9 @@ module ActiveWarehouse #:nodoc:
20
21
  @column_generators = {}
21
22
  end
22
23
 
24
+ # Build the data for the specified class. Name may be a Class (which must descend from ActiveWarehouse::Dimension
25
+ # or ActiveWarehouse::Fact), a String or a Symbol. String or Symbol will be converted to a class name and then
26
+ # passed back to this method.
23
27
  def build(name, options={})
24
28
  case name
25
29
  when Class
@@ -44,6 +48,7 @@ module ActiveWarehouse #:nodoc:
44
48
  # Build test dimension data for the specified dimension name.
45
49
  #
46
50
  # Options:
51
+ #
47
52
  # * <tt>:rows</tt>: The number of rows to create (defaults to 100)
48
53
  # * <tt>:generators</tt>: A map of generators where each key is Fixnum, Float, Date, Time, String, or Object and the
49
54
  # value is extends from AbstractGenerator.
@@ -57,6 +62,9 @@ module ActiveWarehouse #:nodoc:
57
62
  row = {}
58
63
  dimension_class.content_columns.each do |column|
59
64
  generator = (options[:generators][column.klass] || @column_generators[column.name] || @generators[column.klass])
65
+ if generator.nil?
66
+ raise ArgumentError, "No generator found, unknown column type?: #{column.klass}"
67
+ end
60
68
  row[column.name] = generator.generate(column, options)
61
69
  end
62
70
  rows << row
@@ -74,6 +82,7 @@ module ActiveWarehouse #:nodoc:
74
82
  # * <tt>:fk_limit</tt>: A Hash of foreign key limits, where each key is the name of column and the value is
75
83
  # a number. For example options[:fk_limit][:date_id] = 1000 would limit the foreign key values to something between
76
84
  # 1 and 1000, inclusive.
85
+ # * <tt>:dimensions</tt>: The number of available dimension FKs
77
86
  def build_fact(name, options={})
78
87
  options[:rows] ||= 100
79
88
  options[:generators] ||= {}
@@ -87,9 +96,10 @@ module ActiveWarehouse #:nodoc:
87
96
  generator = (options[:generators][column.klass] || @generators[column.klass])
88
97
  row[column.name] = generator.generate(column, options)
89
98
  end
90
- fact_class.foreign_key_columns.each do |column|
91
- fk_limit = (options[:fk_limit][column.name] || 100) - 1
92
- row[column.name] = rand(fk_limit) + 1
99
+ fact_class.dimension_relationships.each do |name, reflection|
100
+ # it would be better to get a count of rows from the dimension tables
101
+ fk_limit = (options[:fk_limit][reflection.primary_key_name] || options[:dimensions] || 100) - 1
102
+ row[reflection.primary_key_name] = rand(fk_limit) + 1
93
103
  end
94
104
  rows << row
95
105
  end
@@ -112,8 +122,8 @@ module ActiveWarehouse #:nodoc:
112
122
  # Generate a random date value
113
123
  #
114
124
  # Options:
115
- # *<tt>:start_date</tt>: The start date as a Date or Time object (default 1 year ago)
116
- # *<tt>:end_date</tt>: The end date as a Date or Time object (default now)
125
+ # * <tt>:start_date</tt>: The start date as a Date or Time object (default 1 year ago)
126
+ # * <tt>:end_date</tt>: The end date as a Date or Time object (default now)
117
127
  def generate(column, options={})
118
128
  end_date = (options[:end_date] || Time.now).to_date
119
129
  start_date = (options[:start_date] || 1.year.ago).to_date
@@ -125,8 +135,8 @@ module ActiveWarehouse #:nodoc:
125
135
  # Basic Time generator
126
136
  #
127
137
  # Options:
128
- # *<tt>:start_date</tt>: The start date as a Date or Time object (default 1 year ago)
129
- # *<tt>:end_date</tt>: The end date as a Date or Time object (default now)
138
+ # * <tt>:start_date</tt>: The start date as a Date or Time object (default 1 year ago)
139
+ # * <tt>:end_date</tt>: The end date as a Date or Time object (default now)
130
140
  class TimeGenerator < DateGenerator #:nodoc:
131
141
  # Generate a random Time value
132
142
  def generate(column, options={})
@@ -139,8 +149,8 @@ module ActiveWarehouse #:nodoc:
139
149
  # Generate an integer from 0 to options[:max] inclusive
140
150
  #
141
151
  # Options:
142
- # *<tt>:max</tt>: The maximum allowed value (default 1000)
143
- # *<tt>:min</tt>: The minimum allowed value (default 0)
152
+ # * <tt>:max</tt>: The maximum allowed value (default 1000)
153
+ # * <tt>:min</tt>: The minimum allowed value (default 0)
144
154
  def generate(column, options={})
145
155
  options[:max] ||= 1000
146
156
  options[:min] ||= 0
@@ -153,7 +163,7 @@ module ActiveWarehouse #:nodoc:
153
163
  # Generate a float from 0 to options[:max] inclusive (default 1000)
154
164
  #
155
165
  # Options:
156
- # *<tt>:max</tt>: The maximum allowed value (default 1000)
166
+ # * <tt>:max</tt>: The maximum allowed value (default 1000)
157
167
  def generate(column, options={})
158
168
  options[:max] ||= 1000
159
169
  rand * options[:max].to_f
@@ -165,7 +175,7 @@ module ActiveWarehouse #:nodoc:
165
175
  # Generate a big decimal from 0 to options[:max] inclusive (default 1000)
166
176
  #
167
177
  # Options:
168
- # *<tt>:max</tt>: The maximum allowed value (default 1000)
178
+ # * <tt>:max</tt>: The maximum allowed value (default 1000)
169
179
  def generate(column, options={})
170
180
  options[:max] ||= 1000
171
181
  BigDecimal.new((rand * options[:max].to_f).to_s) # TODO: need BigDecimal type?
@@ -0,0 +1,54 @@
1
+ Dir[File.dirname(__FILE__) + "/generator/*.rb"].each { |file| require(file) }
2
+
3
+ module ActiveWarehouse #:nodoc:
4
+ module Builder #:nodoc:
5
+ # Unlike the RandomDataBuilder, which puts truly random data in the warehouse, this
6
+ # generator uses collections of possible values to construct semi-understandable data
7
+ class TestDataBuilder
8
+ def initialize
9
+
10
+ end
11
+
12
+ # Usage:
13
+ #
14
+ # fields = [:id,:product_name,:product_description,:suggested_retail_price]
15
+ # field_definitions = {
16
+ # :id => :sequence, # symbol or string
17
+ # :product_name => [['Foo','Bar']['Baz','Bing']], # array
18
+ # :product_description => IpsumLorumGenerator # class
19
+ # :suggested_retail_price => RandomNumberGenerator.new(0.00, 100.00) # generator instance
20
+ # }
21
+ def build(fields, field_definitions, options={})
22
+ options[:number] ||= 100
23
+ rows = []
24
+ generators = {}
25
+ # set up all of the generators first
26
+ field_definitions.each do |name, fd|
27
+ case fd
28
+ when Class
29
+ generators[name] = fd.new
30
+ when String, Symbol
31
+ generators[name] = "#{fd}Generator".classify.constantize.new
32
+ when Array
33
+ generators[name] = NameGenerator.new(fd)
34
+ when Generator
35
+ generators[name] = fd
36
+ else
37
+ raise "Invalid generator specified: #{fd}"
38
+ end
39
+ end
40
+
41
+ # generate all of the rows
42
+ 0.upto(options[:number]) do
43
+ row = {}
44
+ fields.each do |field|
45
+ row[field] = generators[field].next(options)
46
+ end
47
+ rows << row
48
+ end
49
+
50
+ rows
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,27 @@
1
+ module ActiveWarehouse #:nodoc:
2
+ # A field that uses a Proc to calculate the value
3
+ class CalculatedField < Field
4
+ attr_reader :block
5
+ # Initialize the calculated field
6
+ #
7
+ # +fact_class+ is the fact class that the field is calculated in
8
+ # +name+ is the name of the calculated field
9
+ # +type+ is the type of the calculated field (defaults to :integer)
10
+ # +field_options+ is a Hash of options for the field
11
+ #
12
+ # This method accepts a block which should take a single argument that is the record
13
+ # itself.
14
+ def initialize(fact_class, name, type = :integer, field_options = {}, &block)
15
+ unless block_given?
16
+ raise ArgumentError, "A block is required for the calculated field #{name} in #{fact_class}"
17
+ end
18
+ super(fact_class, name.to_s, type, field_options)
19
+ @block = block
20
+ end
21
+
22
+ # Calculate the field value using the Hash of type-casted values
23
+ def calculate(values)
24
+ @block.call(values)
25
+ end
26
+ end
27
+ end
@@ -1,7 +1,7 @@
1
1
  # Provides 1.1.6 compatibility
2
- module ActiveRecord
3
- module Calculations
4
- module ClassMethods
2
+ module ActiveRecord #:nodoc:
3
+ module Calculations #:nodoc:
4
+ module ClassMethods #:nodoc:
5
5
  protected
6
6
  def construct_count_options_from_legacy_args(*args)
7
7
  options = {}
@@ -37,7 +37,7 @@ module ActiveRecord
37
37
  end
38
38
  end
39
39
 
40
- class Module
40
+ class Module #:nodoc:
41
41
  def alias_method_chain(target, feature)
42
42
  # Strip out punctuation on predicates or bang methods since
43
43
  # e.g. target?_without_feature is not a valid method name.
@@ -1,8 +1,12 @@
1
1
  module ActiveWarehouse
2
- # A Cube represents a collection of dimensions operating on a fact. The Cube provides a front-end for getting at the
3
- # underlying data. The Cube manages the creation and population of all underlying aggregates.
2
+ # A Cube represents a collection of dimensions operating on a fact. The Cube
3
+ # provides a front-end for getting at the
4
+ # underlying data. Cubes support pluggable aggregation. The default aggregation
5
+ # is the NoAggregate which goes directly
6
+ # to the fact and dimensions to answer queries.
4
7
  class Cube
5
8
  class << self
9
+
6
10
  # Callback which is invoked when subclasses are created
7
11
  def inherited(subclass)
8
12
  subclasses << subclass
@@ -13,54 +17,82 @@ module ActiveWarehouse
13
17
  @subclasses ||= []
14
18
  end
15
19
 
16
- # Defines the dimensions that this cube pivots on.
20
+ # Defines the dimensions that this cube pivots on. If the fact name and
21
+ # cube name are different (for example, if a PurchaseCube does not report
22
+ # on a PurchaseFact) then you *must* declare the <code>reports_on</code>
23
+ # first.
17
24
  def pivots_on(*dimension_list)
18
- # TODO: Validate if the fact is set
25
+ @dimensions_hierarchies = OrderedHash.new
26
+ @dimensions = []
19
27
  dimension_list.each do |dimension|
20
- dimensions << dimension
28
+ case dimension
29
+ when Symbol, String
30
+ dimensions << dimension.to_sym
31
+ dimensions_hierarchies[dimension.to_sym] = fact_class.dimension_class(dimension).hierarchies
32
+ when Hash
33
+ dimension_name = dimension.keys.first.to_sym
34
+ dimensions << dimension_name
35
+ dimensions_hierarchies[dimension_name] = [dimension[dimension_name]].flatten
36
+ else
37
+ raise ArgumentError, "Each argument to pivot_on must be a symbol, string or Hash"
38
+ end
21
39
  end
22
40
  end
23
41
  alias :pivot_on :pivots_on
24
42
 
25
- # Defines the fact that this cube reports on
26
- def reports_on(fact)
27
- # TODO: Validate if one or more dimension is set
28
- @fact = fact
43
+ # Defines the fact name, without the 'Fact' suffix, that this cube
44
+ # reports on. For instance, if you have PurchaseFact, you could then
45
+ # call <code>reports_on :purchase</code>.
46
+ #
47
+ # The default value for reports_on is to take the name of the cube,
48
+ # i.e. PurchaseCube, and remove the Cube suffix. The assumption is that
49
+ # your Cube name matches your Fact name.
50
+ def reports_on(fact_name)
51
+ @fact_name = fact_name
29
52
  end
30
53
  alias :report_on :reports_on
31
54
 
32
- # Rebuild all aggregate classes. Set :force => true to force the rebuild of aggregate classes.
55
+ # Rebuild the data warehouse.
33
56
  def rebuild(options={})
34
- logger.debug "Rebuilding aggregates for cube #{name}"
35
- options[:force] ||= false
36
- build_aggregate_classes(options)
57
+ populate(options)
37
58
  end
38
59
 
39
- # Populate all aggregates. Set :force => true to force the population of the aggregate class.
60
+ # Populate the data warehouse. Delegate to aggregate.populate
40
61
  def populate(options={})
41
- options[:force] ||= false
42
- aggregates.each do |agg_id, agg_clazz|
43
- if agg_clazz.needs_rebuild? || options[:force]
44
- logger.debug "Populating aggregate class #{agg_clazz.name}"
45
- agg_clazz.populate
46
- end
47
- end
48
- end
49
-
50
- # Get the fact that this cube reports on
51
- def fact
52
- @fact
62
+ aggregate.populate
53
63
  end
54
64
 
55
65
  # Get the dimensions that this cube pivots on
56
66
  def dimensions
57
- @dimensions ||= []
67
+ @dimensions ||= fact_class.dimension_relationships.collect{|k,v| k}
58
68
  end
59
69
 
60
- # Get the aggregate classes for this dimension
61
- def aggregates
62
- rebuild if @aggregates.nil?
63
- @aggregates
70
+ # Get an OrderedHash of each dimension mapped to its hierarchies which
71
+ # will be included in the cube
72
+ def dimensions_hierarchies
73
+ if @dimensions_hierarchies.nil?
74
+ @dimensions_hierarchies = OrderedHash.new
75
+ dimensions.each do |dimension|
76
+ @dimensions_hierarchies[dimension] = fact_class.dimension_class(dimension).hierarchies
77
+ end
78
+ end
79
+ @dimensions_hierarchies
80
+ end
81
+
82
+ # returns true if this cube pivots on a hierarchical dimension.
83
+ def pivot_on_hierarchical_dimension?
84
+ dimension_classes.each do |dimension|
85
+ return true if dimension.hierarchical_dimension?
86
+ end
87
+ return false
88
+ end
89
+
90
+ # returns the aggregate fields for this cube
91
+ # removing the aggregate fields that are defined in fact class that are
92
+ # related to hierarchical dimension, but the cube doesn't pivot on any
93
+ # hierarchical dimensions
94
+ def aggregate_fields
95
+ fact_class.aggregate_fields.reject {|field| !pivot_on_hierarchical_dimension? and !field.levels_from_parent.empty? }
64
96
  end
65
97
 
66
98
  # Get the class name for the specified cube name
@@ -73,7 +105,7 @@ module ActiveWarehouse
73
105
 
74
106
  # Get the aggregated fact class name
75
107
  def fact_class_name
76
- Fact.class_name(fact)
108
+ ActiveWarehouse::Fact.class_name(@fact_name || name.sub(/Cube$/,'').underscore.to_sym)
77
109
  end
78
110
 
79
111
  # Get the aggregated fact class instance
@@ -83,221 +115,90 @@ module ActiveWarehouse
83
115
 
84
116
  # Get a list of dimension class instances
85
117
  def dimension_classes
86
- dimensions.collect {|dimension| Dimension.class_name(dimension).constantize}
118
+ dimensions.collect do |dimension_name|
119
+ dimension_class(dimension_name)
120
+ end
121
+ end
122
+
123
+ # Get the dimension class for the specified dimension name
124
+ def dimension_class(dimension_name)
125
+ fact_class.dimension_relationships[dimension_name.to_sym].class_name.constantize
87
126
  end
88
127
 
128
+ # Get the cube logger
89
129
  def logger
90
130
  @logger ||= Logger.new('cube.log')
91
131
  end
92
132
 
133
+ # Get the time when the fact or any dimension referenced in this cube
134
+ # was last modified
93
135
  def last_modified
94
- lm = Fact.class_for_name(fact).last_modified
136
+ lm = fact_class.last_modified
95
137
  dimensions.each do |dimension|
96
- dim = Dimension.class_for_name(dimension)
138
+ dim = ActiveWarehouse::Dimension.class_for_name(dimension)
97
139
  lm = dim.last_modified if dim.last_modified > lm
98
140
  end
99
141
  lm
100
142
  end
101
143
 
102
- protected
103
- def build_aggregate_classes(options={})
104
- @aggregates = {}
105
- existing_dimension_pairs = []
106
- logger.debug "Building aggregate classes"
107
- dimensions.each do |column_dimension|
108
- dimensions.each do |row_dimension|
109
- next if column_dimension == row_dimension
110
- next if existing_dimension_pairs.include? [column_dimension,row_dimension]
111
- next if existing_dimension_pairs.include? [row_dimension,column_dimension]
112
-
113
- existing_dimension_pairs << [column_dimension,row_dimension]
114
- col_dim_class = Dimension.class_for_name(column_dimension)
115
- col_dim_class.hierarchy_levels.each_key do |column_hierarchy_name|
116
- row_dim_class = Dimension.class_for_name(row_dimension)
117
- row_dim_class.hierarchy_levels.each_key do |row_hierarchy_name|
118
- # Construct the aggregate meta data instance
119
- meta_data_attributes = {
120
- :cube_name => self.name,
121
- :dimension1 => column_dimension.to_s,
122
- :dimension1_hierarchy => column_hierarchy_name.to_s,
123
- :dimension2 => row_dimension.to_s,
124
- :dimension2_hierarchy => row_hierarchy_name.to_s
125
- }
126
- conditions = []
127
- condition_args = []
128
- meta_data_attributes.each do |key, value|
129
- conditions << "#{key} = ?"
130
- condition_args << value
131
- end
132
- conditions = [conditions.join(' and ')] + condition_args
133
- meta_data = AggregateMetaData.find(:first, :conditions => conditions)
134
- unless meta_data
135
- meta_data = AggregateMetaData.create(meta_data_attributes)
136
- end
137
-
138
- # Construct the aggregate class instance
139
- aggregate_class = Class.new(ActiveWarehouse::Aggregate)
140
- aggregate_class.name = "Agg#{meta_data.id}"
141
- logger.debug "Constructed aggregate #{aggregate_class.name}"
142
- aggregate_class.cube = self
143
- aggregate_class.dimension1 = column_dimension
144
- aggregate_class.dimension1_hierarchy_name = column_hierarchy_name
145
- aggregate_class.dimension2 = row_dimension
146
- aggregate_class.dimension2_hierarchy_name = row_hierarchy_name
147
-
148
- # Create the underlying aggregate storage table
149
- # TODO: fix the bug of data not being found when a storage table rebuild occurs
150
- force_storage_table_rebuild = options[:force] || aggregate_class.needs_rebuild?(last_modified)
151
- logger.debug "Force storage table rebuild? #{force_storage_table_rebuild}"
152
- aggregate_class.create_storage_table(force_storage_table_rebuild)
153
-
154
- # Keep a reference to the aggregate class instance
155
- @aggregates[meta_data.id] = aggregate_class
156
- end
157
- end
158
- end
159
- end
144
+ # The temp directory for storing files during warehouse rebuilds
145
+ attr_accessor :temp_dir
146
+ def temp_dir
147
+ @temp_dir ||= '/tmp'
148
+ end
149
+
150
+ # Specify the ActiveRecord class to connect through
151
+ # Note: this is a potential directive in a Cube subclass
152
+ attr_accessor :connect_through
153
+ def connect_through
154
+ @connect_through ||= ActiveRecord::Base
155
+ end
156
+
157
+ # Get an adapter connection
158
+ def connection
159
+ connect_through.connection
160
+ end
161
+
162
+ # Defaults to NoAggregate strategy.
163
+ def aggregate
164
+ @aggregate ||= ActiveWarehouse::Aggregate::NoAggregate.new(self)
165
+ end
166
+
167
+ def aggregate_class(agg_class)
168
+ @aggregate = agg_class.new(self)
160
169
  end
161
170
 
162
171
  end
163
172
 
164
173
  public
165
- def aggregate_map(column_dimension, column_hierarchy, row_dimension, row_hierarchy, cstage=0, rstage=0)
166
- # Fill known cells
167
- agg_map = AggregateMap.new
168
- agg_records = nil
169
- # s = Benchmark.realtime do
170
- agg_records = aggregate_records(column_dimension, column_hierarchy, cstage, row_dimension, row_hierarchy, rstage)
171
- #end
172
- # cs = 0
173
- # as = 0
174
- # calc = 0
175
- # x = 0
176
- calculated_fields = self.class.fact_class.calculated_fields
177
- calculated_field_options = self.class.fact_class.calculated_field_options
178
- #puts "loading aggregate_records took #{s}s"
179
- #s = Benchmark.realtime do
180
- #puts "there are #{agg_records.length} agg_records"
181
- #puts "there are #{self.class.fact_class.calculated_fields.length} calculated fields in class #{self.class.fact_class}"
182
- agg_records.each do |agg_record|
183
- # agg_record is an instance of Aggregate
184
- # collect the aggregate record data fields into an array
185
- data_array = nil
186
- #cs += Benchmark.realtime do
187
- data_array = agg_record.data_fields.collect{ |data_field_name| agg_record.send(data_field_name.to_sym) }
188
- #end
189
-
190
- # convert to an average where necessary
191
- # TODO: implement
192
-
193
- # add calculated fields to the data array
194
- #calc += Benchmark.realtime do
195
- calculated_fields.each do |calculated_field|
196
- options = calculated_field_options[calculated_field]
197
- data_array << options[:block].call(agg_record)
198
- end
199
- #end
200
-
201
- # add the data array to the aggregate map
202
- #as += Benchmark.realtime do
203
- agg_map.add_data(agg_record.dimension2_path, agg_record.dimension1_path, data_array)
204
- #end
205
- end
206
-
207
- #end
208
- #puts "creating the agg_map took #{s}s"
209
- #puts "total time spent collecting the data: #{cs}s, avg:#{cs/agg_records.length}s (#{(cs/s) * 100}%)"
210
- #puts "total time spent adding the data: #{as}s, avg:#{as/agg_records.length}s (#{(as/s) * 100}%)"
211
- #puts "total time spent calculating fields: #{calc}s, avg:#{calc/agg_records.length}s (#{(calc/s) * 100}%)"
212
- agg_map
174
+ # Query the cube. The column dimension, column hierarchy, row dimension and
175
+ # row hierarchy are all required.
176
+ #
177
+ # The conditions value is a String that represents a SQL condition appended
178
+ # to the where clause. TODO: this may eventually be converted to another
179
+ # query language.
180
+ #
181
+ # The cstage value represents the current column drill down stage and
182
+ # defaults to 0.
183
+ #
184
+ # The rstage value represents the current row drill down stage and defaults
185
+ # to 0. Filters contains key/value pairs where the key is a string of
186
+ # 'dimension.column' and the value is the value to filter by. For example:
187
+ #
188
+ # filters = {'date.calendar_year' => 2007, 'product.category' => 'Food'}
189
+ # query(:date, :cy, :store, :region, 1, 0, filters)
190
+ #
191
+ # Note that product.category refers to a dimension which is not actually
192
+ # visible but which is both part of the cube and is used for filtering.
193
+ def query(*args)
194
+ self.class.aggregate.query(*args)
213
195
  end
214
196
 
215
- protected
216
- # Return all of the Aggregate records for the specified dimensions and hierarchies
217
- def aggregate_records(column_dimension, column_hierarchy, cstage, row_dimension, row_hierarchy, rstage)
218
- k = Aggregate.key(column_dimension, column_hierarchy, row_dimension, row_hierarchy)
219
- if aggregates[k].nil?
220
- self.class.logger.debug("Aggregate #{k} not found in cache")
221
- conditions = ['cube_name = ?', self.class.name]
222
- conditions[0] << ' and dimension1 = ? and dimension1_hierarchy = ? and dimension2 = ? and dimension2_hierarchy = ?'
223
- conditions << column_dimension.to_s
224
- conditions << column_hierarchy.to_s
225
- conditions << row_dimension.to_s
226
- conditions << row_hierarchy.to_s
227
-
228
- conditions_reversed = ['cube_name = ?', self.class.name]
229
- conditions_reversed[0] << ' and dimension1 = ? and dimension1_hierarchy = ? and dimension2 = ? and dimension2_hierarchy = ?'
230
- conditions_reversed << row_dimension.to_s
231
- conditions_reversed << row_hierarchy.to_s
232
- conditions_reversed << column_dimension.to_s
233
- conditions_reversed << column_hierarchy.to_s
234
-
235
- aggregate_meta_data = AggregateMetaData.find(:first, :conditions => conditions)
236
- aggregate_meta_data ||= AggregateMetaData.find(:first, :conditions => conditions_reversed)
237
- if aggregate_meta_data.nil?
238
- self.class.rebuild
239
- aggregate_meta_data = AggregateMetaData.find(:first, :conditions => conditions)
240
- raise "Cannot find aggregate meta data for key #{k}" if aggregate_meta_data.nil?
241
- end
242
- aggregate_class = self.class.aggregates[aggregate_meta_data.id]
243
- if aggregate_class.nil?
244
- self.class.rebuild
245
- aggregate_class = self.class.aggregates[aggregate_meta_data.id]
246
- raise "Cannot find aggregate for id #{aggregate_meta_data.id}" if aggregate_class.nil?
247
- end
248
-
249
- aggregates[k] = aggregate_class.find(:all,
250
- :conditions => ['(dimension1_stage = ? and dimension2_stage = ?) or (dimension1_stage = ? and dimension2_stage = ?)',
251
- cstage, rstage, rstage, cstage])
252
- end
253
- aggregates[k]
197
+ # Get the database connection (delegates to Cube.connection class method)
198
+ def connection
199
+ self.class.connection
254
200
  end
255
201
 
256
- # Get a hash of all aggregate data
257
- def aggregates
258
- @aggregates ||= {}
259
- end
260
202
  end
261
203
 
262
- # In-memory map of aggregate values
263
- class AggregateMap
264
- attr_reader :length
265
-
266
- # Initialize the aggregate map
267
- def initialize
268
- @m = {}
269
- end
270
-
271
- # Return true if the aggregate map includes the specified row path
272
- def has_row_path?(row_path)
273
- @m.has_key?(row_path)
274
- end
275
-
276
- # Get the value for the specified row path, column path and field index
277
- def value(row_path, col_path, field_index)
278
- #puts "Getting value for #{row_path}, #{col_path} [field=#{field_index}]"
279
- row = @m[row_path]
280
- return 0 if row.nil?
281
- col = row[col_path]
282
- return 0 if col.nil?
283
- return col[field_index] || 0
284
- end
285
-
286
- # Get an array of the values for the specified row path and column path
287
- def values(row_path, col_path)
288
- row = @m[row_path]
289
- return Array.new(length, 0) if row.nil?
290
- col = row[col_path]
291
- return Array.new(length, 0) if col.nil?
292
- col
293
- end
294
-
295
- # Add an array of data for the given row and column path
296
- def add_data(row_path, col_path, data_array)
297
- @length ||= data_array.length
298
- #puts "Adding data for #{row_path}, #{col_path} [data=[#{data_array.join(',')}]]"
299
- @m[row_path] ||= {}
300
- @m[row_path][col_path] = data_array
301
- end
302
- end
303
204
  end