activewarehouse 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/README +27 -14
  2. data/Rakefile +16 -5
  3. data/doc/references.txt +4 -0
  4. data/generators/bridge/templates/migration.rb +9 -2
  5. data/generators/bridge/templates/unit_test.rb +8 -0
  6. data/generators/date_dimension/USAGE +1 -0
  7. data/generators/date_dimension/date_dimension_generator.rb +16 -0
  8. data/generators/date_dimension/templates/fixture.yml +5 -0
  9. data/generators/date_dimension/templates/migration.rb +31 -0
  10. data/generators/date_dimension/templates/model.rb +3 -0
  11. data/generators/date_dimension/templates/unit_test.rb +8 -0
  12. data/generators/dimension/templates/migration.rb +1 -10
  13. data/generators/dimension_view/dimension_view_generator.rb +2 -2
  14. data/generators/dimension_view/templates/migration.rb +8 -2
  15. data/generators/fact/templates/migration.rb +2 -0
  16. data/generators/time_dimension/USAGE +1 -0
  17. data/generators/time_dimension/templates/fixture.yml +5 -0
  18. data/generators/time_dimension/templates/migration.rb +12 -0
  19. data/generators/time_dimension/templates/model.rb +3 -0
  20. data/generators/time_dimension/templates/unit_test.rb +8 -0
  21. data/generators/time_dimension/time_dimension_generator.rb +14 -0
  22. data/lib/active_warehouse.rb +13 -2
  23. data/lib/active_warehouse/aggregate.rb +54 -253
  24. data/lib/active_warehouse/aggregate/dwarf/node.rb +36 -0
  25. data/lib/active_warehouse/aggregate/dwarf_aggregate.rb +369 -0
  26. data/lib/active_warehouse/aggregate/dwarf_common.rb +44 -0
  27. data/lib/active_warehouse/aggregate/dwarf_printer.rb +34 -0
  28. data/lib/active_warehouse/aggregate/no_aggregate.rb +194 -0
  29. data/lib/active_warehouse/aggregate/pid_aggregate.rb +29 -0
  30. data/lib/active_warehouse/aggregate/pipelined_rolap_aggregate.rb +129 -0
  31. data/lib/active_warehouse/aggregate/rolap_aggregate.rb +181 -0
  32. data/lib/active_warehouse/aggregate/rolap_common.rb +89 -0
  33. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_1.sql +12 -0
  34. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_10.sql +7166 -0
  35. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_11.sql +14334 -0
  36. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_12.sql +28670 -0
  37. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_13.sql +57342 -0
  38. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_2.sql +26 -0
  39. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_3.sql +54 -0
  40. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_4.sql +110 -0
  41. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_5.sql +222 -0
  42. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_6.sql +446 -0
  43. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_7.sql +894 -0
  44. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_8.sql +1790 -0
  45. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_9.sql +3582 -0
  46. data/lib/active_warehouse/aggregate_field.rb +49 -0
  47. data/lib/active_warehouse/{dimension/bridge.rb → bridge.rb} +7 -3
  48. data/lib/active_warehouse/bridge/hierarchy_bridge.rb +46 -0
  49. data/lib/active_warehouse/builder.rb +2 -1
  50. data/lib/active_warehouse/builder/date_dimension_builder.rb +5 -2
  51. data/lib/active_warehouse/builder/generator/generator.rb +13 -0
  52. data/lib/active_warehouse/builder/generator/name_generator.rb +20 -0
  53. data/lib/active_warehouse/builder/generator/paragraph_generator.rb +11 -0
  54. data/lib/active_warehouse/builder/random_data_builder.rb +21 -11
  55. data/lib/active_warehouse/builder/test_data_builder.rb +54 -0
  56. data/lib/active_warehouse/calculated_field.rb +27 -0
  57. data/lib/active_warehouse/compat/compat.rb +4 -4
  58. data/lib/active_warehouse/cube.rb +126 -225
  59. data/lib/active_warehouse/cube_query_result.rb +69 -0
  60. data/lib/active_warehouse/dimension.rb +64 -29
  61. data/lib/active_warehouse/dimension/date_dimension.rb +15 -0
  62. data/lib/active_warehouse/dimension/dimension_reflection.rb +21 -0
  63. data/lib/active_warehouse/dimension/dimension_view.rb +17 -2
  64. data/lib/active_warehouse/dimension/hierarchical_dimension.rb +43 -5
  65. data/lib/active_warehouse/dimension/slowly_changing_dimension.rb +22 -12
  66. data/lib/active_warehouse/fact.rb +119 -40
  67. data/lib/active_warehouse/field.rb +74 -0
  68. data/lib/active_warehouse/ordered_hash.rb +34 -0
  69. data/lib/active_warehouse/prejoin_fact.rb +97 -0
  70. data/lib/active_warehouse/report/abstract_report.rb +40 -14
  71. data/lib/active_warehouse/report/chart_report.rb +3 -3
  72. data/lib/active_warehouse/report/table_report.rb +8 -3
  73. data/lib/active_warehouse/version.rb +1 -1
  74. data/lib/active_warehouse/view/report_helper.rb +144 -34
  75. data/tasks/active_warehouse_tasks.rake +28 -10
  76. metadata +107 -30
@@ -0,0 +1,49 @@
1
+ module ActiveWarehouse
2
+ # Encapsulates a fact column in a fact table. These fields
3
+ # represent columns that should be aggregated.
4
+ class AggregateField < Field
5
+
6
+ attr_reader :strategy_name
7
+
8
+ # +fact_class+ is the class of the fact table this field is found in.
9
+ # +column_definition+ is the ActiveRecord ColumnDefinition instance for this
10
+ # column.
11
+ # +strategy_name+ is the name of th aggregation strategy to be used, defaults to :sum
12
+ # +field_options+ is a hash of raw options from the original aggregate definition.
13
+ def initialize(fact_class, column_definition, strategy_name = :sum, field_options = {})
14
+ super(fact_class, column_definition.name, column_definition.type, field_options)
15
+ @column_definition = column_definition
16
+ @limit = column_definition.limit
17
+ @scale = column_definition.scale
18
+ @precision = column_definition.precision
19
+ @strategy_name = strategy_name
20
+ end
21
+
22
+ # delegates to owning_class, returns the Fact that has this field
23
+ def fact_class
24
+ owning_class
25
+ end
26
+
27
+ def is_semiadditive?
28
+ !field_options[:semiadditive].nil?
29
+ end
30
+
31
+ # returns the Dimension that this semiadditive fact is over
32
+ def semiadditive_over
33
+ Dimension.to_dimension(field_options[:semiadditive])
34
+ end
35
+
36
+ # overrides Field.label, prepending the aggregation strategy name to label
37
+ def label
38
+ @label ? @label : "#{super}_#{strategy_name}"
39
+ end
40
+
41
+ def levels_from_parent
42
+ field_options[:levels_from_parent].nil? ? [] : field_options[:levels_from_parent]
43
+ end
44
+
45
+ def type_cast(value)
46
+ @column_definition.type_cast(value)
47
+ end
48
+ end
49
+ end
@@ -2,9 +2,11 @@ module ActiveWarehouse #:nodoc
2
2
  # Implements a bridge table.
3
3
  class Bridge < ActiveRecord::Base
4
4
  class << self
5
- # Get the table name. By default the table name will be the name of the bridge in singular form.
5
+ # Get the table name. By default the table name will be the name of the
6
+ # bridge in singular form.
6
7
  #
7
- # Example: DepartmentHierarchyBridge will have a table called department_hierarchy_bridge
8
+ # Example: DepartmentHierarchyBridge will have a table called
9
+ # department_hierarchy_bridge
8
10
  def table_name
9
11
  name = self.name.demodulize.underscore
10
12
  set_table_name(name)
@@ -12,4 +14,6 @@ module ActiveWarehouse #:nodoc
12
14
  end
13
15
  end
14
16
  end
15
- end
17
+ end
18
+
19
+ require 'active_warehouse/bridge/hierarchy_bridge'
@@ -0,0 +1,46 @@
1
+ module ActiveWarehouse #:nodoc:
2
+ # Bridge class that models ragged hierarchies.
3
+ class HierarchyBridge < Bridge
4
+ class << self
5
+ def set_levels_from_parent(name)
6
+ @levels_from_parent = name
7
+ end
8
+
9
+ def levels_from_parent
10
+ @levels_from_parent ||= "levels_from_parent"
11
+ end
12
+
13
+ def set_effective_date(name)
14
+ @effective_date = name
15
+ end
16
+
17
+ def effective_date
18
+ @effective_date ||= "effective_date"
19
+ end
20
+
21
+ def set_expiration_date(name)
22
+ @expiration_date = name
23
+ end
24
+
25
+ def expiration_date
26
+ @expiration_date ||= "expiration_date"
27
+ end
28
+
29
+ def set_top_flag(name)
30
+ @top_flag = name
31
+ end
32
+
33
+ def top_flag
34
+ @top_flag ||= "top_flag"
35
+ end
36
+
37
+ def set_top_flag_value(value)
38
+ @top_flag_value = value
39
+ end
40
+
41
+ def top_flag_value
42
+ @top_flag_value ||= 'Y'
43
+ end
44
+ end
45
+ end
46
+ end
@@ -1,2 +1,3 @@
1
1
  require 'active_warehouse/builder/date_dimension_builder'
2
- require 'active_warehouse/builder/random_data_builder'
2
+ require 'active_warehouse/builder/random_data_builder'
3
+ require 'active_warehouse/builder/test_data_builder'
@@ -30,8 +30,8 @@ module ActiveWarehouse #:nodoc:
30
30
  # accessed by name.
31
31
  def build(options={})
32
32
  records = []
33
- date = start_date
34
- while date <= end_date
33
+ date = start_date.to_time
34
+ while date <= end_date.to_time
35
35
  record = {}
36
36
  record[:date] = date.strftime("%m/%d/%Y")
37
37
  record[:full_date_description] = date.strftime("%B %d,%Y")
@@ -52,6 +52,7 @@ module ActiveWarehouse #:nodoc:
52
52
  record[:calendar_month_number_in_year] = date.month
53
53
  record[:calendar_year_month] = date.strftime("%Y-%m")
54
54
  record[:calendar_quarter] = "Q#{date.quarter}"
55
+ record[:calendar_quarter_number_in_year] = date.quarter
55
56
  record[:calendar_year_quarter] = "#{date.strftime('%Y')}-#{record[:calendar_quarter]}"
56
57
  #record[:calendar_half_year] =
57
58
  record[:calendar_year] = "#{date.year}"
@@ -62,8 +63,10 @@ module ActiveWarehouse #:nodoc:
62
63
  record[:fiscal_year_month] = "FY#{date.fiscal_year}-" + date.fiscal_year_month.to_s.rjust(2, '0')
63
64
  record[:fiscal_quarter] = "FY Q#{date.fiscal_year_quarter}"
64
65
  record[:fiscal_year_quarter] = "FY#{date.fiscal_year}-Q#{date.fiscal_year_quarter}"
66
+ record[:fiscal_year_quarter_number] = date.fiscal_year_quarter
65
67
  #record[:fiscal_half_year] =
66
68
  record[:fiscal_year] = "FY#{date.fiscal_year}"
69
+ record[:fiscal_year_number] = date.fiscal_year
67
70
  record[:holiday_indicator] = holiday_indicators.include?(date) ? 'Holiday' : 'Nonholiday'
68
71
  record[:weekday_indicator] = weekday_indicators[date.wday]
69
72
  record[:selling_season] = 'None'
@@ -0,0 +1,13 @@
1
+ module ActiveWarehouse
2
+ module Builder
3
+ module Generator
4
+ # Base class for generators
5
+ class Generator
6
+ # Get the next value from the generator.
7
+ def next(options={})
8
+ raise "Abstract method"
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,20 @@
1
+ module ActiveWarehouse #:nodoc:
2
+ module Builder #:nodoc:
3
+ module Generator #:nodoc:
4
+ # Generate a name consisting of one or more words from word groups
5
+ class NameGenerator < ActiveWarehouse::Builder::Generator::Generator
6
+ def next(options={})
7
+ options[:separator] ||= ' '
8
+ parts = []
9
+ word_groups = options[:word_groups]
10
+ 0.upto(word_groups.first.length) do |i|
11
+ word_groups.each do |word_group|
12
+ parts << word_group[i]
13
+ end
14
+ end
15
+ parts.join(options[:separator])
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,11 @@
1
+ module ActiveWarehouse
2
+ module Builder
3
+ module Generator
4
+ class ParagraphGenerator < ActiveWarehouse::Builder::Generator::Generator
5
+ def next(options={})
6
+
7
+ end
8
+ end
9
+ end
10
+ end
11
+ end
@@ -8,6 +8,7 @@ module ActiveWarehouse #:nodoc:
8
8
  # Hash of names mapped to generators where the name is the column name
9
9
  attr_reader :column_generators
10
10
 
11
+ # Initialize the random data builder
11
12
  def initialize
12
13
  @generators = {
13
14
  Fixnum => FixnumGenerator.new,
@@ -20,6 +21,9 @@ module ActiveWarehouse #:nodoc:
20
21
  @column_generators = {}
21
22
  end
22
23
 
24
+ # Build the data for the specified class. Name may be a Class (which must descend from ActiveWarehouse::Dimension
25
+ # or ActiveWarehouse::Fact), a String or a Symbol. String or Symbol will be converted to a class name and then
26
+ # passed back to this method.
23
27
  def build(name, options={})
24
28
  case name
25
29
  when Class
@@ -44,6 +48,7 @@ module ActiveWarehouse #:nodoc:
44
48
  # Build test dimension data for the specified dimension name.
45
49
  #
46
50
  # Options:
51
+ #
47
52
  # * <tt>:rows</tt>: The number of rows to create (defaults to 100)
48
53
  # * <tt>:generators</tt>: A map of generators where each key is Fixnum, Float, Date, Time, String, or Object and the
49
54
  # value is extends from AbstractGenerator.
@@ -57,6 +62,9 @@ module ActiveWarehouse #:nodoc:
57
62
  row = {}
58
63
  dimension_class.content_columns.each do |column|
59
64
  generator = (options[:generators][column.klass] || @column_generators[column.name] || @generators[column.klass])
65
+ if generator.nil?
66
+ raise ArgumentError, "No generator found, unknown column type?: #{column.klass}"
67
+ end
60
68
  row[column.name] = generator.generate(column, options)
61
69
  end
62
70
  rows << row
@@ -74,6 +82,7 @@ module ActiveWarehouse #:nodoc:
74
82
  # * <tt>:fk_limit</tt>: A Hash of foreign key limits, where each key is the name of column and the value is
75
83
  # a number. For example options[:fk_limit][:date_id] = 1000 would limit the foreign key values to something between
76
84
  # 1 and 1000, inclusive.
85
+ # * <tt>:dimensions</tt>: The number of available dimension FKs
77
86
  def build_fact(name, options={})
78
87
  options[:rows] ||= 100
79
88
  options[:generators] ||= {}
@@ -87,9 +96,10 @@ module ActiveWarehouse #:nodoc:
87
96
  generator = (options[:generators][column.klass] || @generators[column.klass])
88
97
  row[column.name] = generator.generate(column, options)
89
98
  end
90
- fact_class.foreign_key_columns.each do |column|
91
- fk_limit = (options[:fk_limit][column.name] || 100) - 1
92
- row[column.name] = rand(fk_limit) + 1
99
+ fact_class.dimension_relationships.each do |name, reflection|
100
+ # it would be better to get a count of rows from the dimension tables
101
+ fk_limit = (options[:fk_limit][reflection.primary_key_name] || options[:dimensions] || 100) - 1
102
+ row[reflection.primary_key_name] = rand(fk_limit) + 1
93
103
  end
94
104
  rows << row
95
105
  end
@@ -112,8 +122,8 @@ module ActiveWarehouse #:nodoc:
112
122
  # Generate a random date value
113
123
  #
114
124
  # Options:
115
- # *<tt>:start_date</tt>: The start date as a Date or Time object (default 1 year ago)
116
- # *<tt>:end_date</tt>: The end date as a Date or Time object (default now)
125
+ # * <tt>:start_date</tt>: The start date as a Date or Time object (default 1 year ago)
126
+ # * <tt>:end_date</tt>: The end date as a Date or Time object (default now)
117
127
  def generate(column, options={})
118
128
  end_date = (options[:end_date] || Time.now).to_date
119
129
  start_date = (options[:start_date] || 1.year.ago).to_date
@@ -125,8 +135,8 @@ module ActiveWarehouse #:nodoc:
125
135
  # Basic Time generator
126
136
  #
127
137
  # Options:
128
- # *<tt>:start_date</tt>: The start date as a Date or Time object (default 1 year ago)
129
- # *<tt>:end_date</tt>: The end date as a Date or Time object (default now)
138
+ # * <tt>:start_date</tt>: The start date as a Date or Time object (default 1 year ago)
139
+ # * <tt>:end_date</tt>: The end date as a Date or Time object (default now)
130
140
  class TimeGenerator < DateGenerator #:nodoc:
131
141
  # Generate a random Time value
132
142
  def generate(column, options={})
@@ -139,8 +149,8 @@ module ActiveWarehouse #:nodoc:
139
149
  # Generate an integer from 0 to options[:max] inclusive
140
150
  #
141
151
  # Options:
142
- # *<tt>:max</tt>: The maximum allowed value (default 1000)
143
- # *<tt>:min</tt>: The minimum allowed value (default 0)
152
+ # * <tt>:max</tt>: The maximum allowed value (default 1000)
153
+ # * <tt>:min</tt>: The minimum allowed value (default 0)
144
154
  def generate(column, options={})
145
155
  options[:max] ||= 1000
146
156
  options[:min] ||= 0
@@ -153,7 +163,7 @@ module ActiveWarehouse #:nodoc:
153
163
  # Generate a float from 0 to options[:max] inclusive (default 1000)
154
164
  #
155
165
  # Options:
156
- # *<tt>:max</tt>: The maximum allowed value (default 1000)
166
+ # * <tt>:max</tt>: The maximum allowed value (default 1000)
157
167
  def generate(column, options={})
158
168
  options[:max] ||= 1000
159
169
  rand * options[:max].to_f
@@ -165,7 +175,7 @@ module ActiveWarehouse #:nodoc:
165
175
  # Generate a big decimal from 0 to options[:max] inclusive (default 1000)
166
176
  #
167
177
  # Options:
168
- # *<tt>:max</tt>: The maximum allowed value (default 1000)
178
+ # * <tt>:max</tt>: The maximum allowed value (default 1000)
169
179
  def generate(column, options={})
170
180
  options[:max] ||= 1000
171
181
  BigDecimal.new((rand * options[:max].to_f).to_s) # TODO: need BigDecimal type?
@@ -0,0 +1,54 @@
1
+ Dir[File.dirname(__FILE__) + "/generator/*.rb"].each { |file| require(file) }
2
+
3
+ module ActiveWarehouse #:nodoc:
4
+ module Builder #:nodoc:
5
+ # Unlike the RandomDataBuilder, which puts truly random data in the warehouse, this
6
+ # generator uses collections of possible values to construct semi-understandable data
7
+ class TestDataBuilder
8
+ def initialize
9
+
10
+ end
11
+
12
+ # Usage:
13
+ #
14
+ # fields = [:id,:product_name,:product_description,:suggested_retail_price]
15
+ # field_definitions = {
16
+ # :id => :sequence, # symbol or string
17
+ # :product_name => [['Foo','Bar']['Baz','Bing']], # array
18
+ # :product_description => IpsumLorumGenerator # class
19
+ # :suggested_retail_price => RandomNumberGenerator.new(0.00, 100.00) # generator instance
20
+ # }
21
+ def build(fields, field_definitions, options={})
22
+ options[:number] ||= 100
23
+ rows = []
24
+ generators = {}
25
+ # set up all of the generators first
26
+ field_definitions.each do |name, fd|
27
+ case fd
28
+ when Class
29
+ generators[name] = fd.new
30
+ when String, Symbol
31
+ generators[name] = "#{fd}Generator".classify.constantize.new
32
+ when Array
33
+ generators[name] = NameGenerator.new(fd)
34
+ when Generator
35
+ generators[name] = fd
36
+ else
37
+ raise "Invalid generator specified: #{fd}"
38
+ end
39
+ end
40
+
41
+ # generate all of the rows
42
+ 0.upto(options[:number]) do
43
+ row = {}
44
+ fields.each do |field|
45
+ row[field] = generators[field].next(options)
46
+ end
47
+ rows << row
48
+ end
49
+
50
+ rows
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,27 @@
1
+ module ActiveWarehouse #:nodoc:
2
+ # A field that uses a Proc to calculate the value
3
+ class CalculatedField < Field
4
+ attr_reader :block
5
+ # Initialize the calculated field
6
+ #
7
+ # +fact_class+ is the fact class that the field is calculated in
8
+ # +name+ is the name of the calculated field
9
+ # +type+ is the type of the calculated field (defaults to :integer)
10
+ # +field_options+ is a Hash of options for the field
11
+ #
12
+ # This method accepts a block which should take a single argument that is the record
13
+ # itself.
14
+ def initialize(fact_class, name, type = :integer, field_options = {}, &block)
15
+ unless block_given?
16
+ raise ArgumentError, "A block is required for the calculated field #{name} in #{fact_class}"
17
+ end
18
+ super(fact_class, name.to_s, type, field_options)
19
+ @block = block
20
+ end
21
+
22
+ # Calculate the field value using the Hash of type-casted values
23
+ def calculate(values)
24
+ @block.call(values)
25
+ end
26
+ end
27
+ end
@@ -1,7 +1,7 @@
1
1
  # Provides 1.1.6 compatibility
2
- module ActiveRecord
3
- module Calculations
4
- module ClassMethods
2
+ module ActiveRecord #:nodoc:
3
+ module Calculations #:nodoc:
4
+ module ClassMethods #:nodoc:
5
5
  protected
6
6
  def construct_count_options_from_legacy_args(*args)
7
7
  options = {}
@@ -37,7 +37,7 @@ module ActiveRecord
37
37
  end
38
38
  end
39
39
 
40
- class Module
40
+ class Module #:nodoc:
41
41
  def alias_method_chain(target, feature)
42
42
  # Strip out punctuation on predicates or bang methods since
43
43
  # e.g. target?_without_feature is not a valid method name.
@@ -1,8 +1,12 @@
1
1
  module ActiveWarehouse
2
- # A Cube represents a collection of dimensions operating on a fact. The Cube provides a front-end for getting at the
3
- # underlying data. The Cube manages the creation and population of all underlying aggregates.
2
+ # A Cube represents a collection of dimensions operating on a fact. The Cube
3
+ # provides a front-end for getting at the
4
+ # underlying data. Cubes support pluggable aggregation. The default aggregation
5
+ # is the NoAggregate which goes directly
6
+ # to the fact and dimensions to answer queries.
4
7
  class Cube
5
8
  class << self
9
+
6
10
  # Callback which is invoked when subclasses are created
7
11
  def inherited(subclass)
8
12
  subclasses << subclass
@@ -13,54 +17,82 @@ module ActiveWarehouse
13
17
  @subclasses ||= []
14
18
  end
15
19
 
16
- # Defines the dimensions that this cube pivots on.
20
+ # Defines the dimensions that this cube pivots on. If the fact name and
21
+ # cube name are different (for example, if a PurchaseCube does not report
22
+ # on a PurchaseFact) then you *must* declare the <code>reports_on</code>
23
+ # first.
17
24
  def pivots_on(*dimension_list)
18
- # TODO: Validate if the fact is set
25
+ @dimensions_hierarchies = OrderedHash.new
26
+ @dimensions = []
19
27
  dimension_list.each do |dimension|
20
- dimensions << dimension
28
+ case dimension
29
+ when Symbol, String
30
+ dimensions << dimension.to_sym
31
+ dimensions_hierarchies[dimension.to_sym] = fact_class.dimension_class(dimension).hierarchies
32
+ when Hash
33
+ dimension_name = dimension.keys.first.to_sym
34
+ dimensions << dimension_name
35
+ dimensions_hierarchies[dimension_name] = [dimension[dimension_name]].flatten
36
+ else
37
+ raise ArgumentError, "Each argument to pivot_on must be a symbol, string or Hash"
38
+ end
21
39
  end
22
40
  end
23
41
  alias :pivot_on :pivots_on
24
42
 
25
- # Defines the fact that this cube reports on
26
- def reports_on(fact)
27
- # TODO: Validate if one or more dimension is set
28
- @fact = fact
43
+ # Defines the fact name, without the 'Fact' suffix, that this cube
44
+ # reports on. For instance, if you have PurchaseFact, you could then
45
+ # call <code>reports_on :purchase</code>.
46
+ #
47
+ # The default value for reports_on is to take the name of the cube,
48
+ # i.e. PurchaseCube, and remove the Cube suffix. The assumption is that
49
+ # your Cube name matches your Fact name.
50
+ def reports_on(fact_name)
51
+ @fact_name = fact_name
29
52
  end
30
53
  alias :report_on :reports_on
31
54
 
32
- # Rebuild all aggregate classes. Set :force => true to force the rebuild of aggregate classes.
55
+ # Rebuild the data warehouse.
33
56
  def rebuild(options={})
34
- logger.debug "Rebuilding aggregates for cube #{name}"
35
- options[:force] ||= false
36
- build_aggregate_classes(options)
57
+ populate(options)
37
58
  end
38
59
 
39
- # Populate all aggregates. Set :force => true to force the population of the aggregate class.
60
+ # Populate the data warehouse. Delegate to aggregate.populate
40
61
  def populate(options={})
41
- options[:force] ||= false
42
- aggregates.each do |agg_id, agg_clazz|
43
- if agg_clazz.needs_rebuild? || options[:force]
44
- logger.debug "Populating aggregate class #{agg_clazz.name}"
45
- agg_clazz.populate
46
- end
47
- end
48
- end
49
-
50
- # Get the fact that this cube reports on
51
- def fact
52
- @fact
62
+ aggregate.populate
53
63
  end
54
64
 
55
65
  # Get the dimensions that this cube pivots on
56
66
  def dimensions
57
- @dimensions ||= []
67
+ @dimensions ||= fact_class.dimension_relationships.collect{|k,v| k}
58
68
  end
59
69
 
60
- # Get the aggregate classes for this dimension
61
- def aggregates
62
- rebuild if @aggregates.nil?
63
- @aggregates
70
+ # Get an OrderedHash of each dimension mapped to its hierarchies which
71
+ # will be included in the cube
72
+ def dimensions_hierarchies
73
+ if @dimensions_hierarchies.nil?
74
+ @dimensions_hierarchies = OrderedHash.new
75
+ dimensions.each do |dimension|
76
+ @dimensions_hierarchies[dimension] = fact_class.dimension_class(dimension).hierarchies
77
+ end
78
+ end
79
+ @dimensions_hierarchies
80
+ end
81
+
82
+ # returns true if this cube pivots on a hierarchical dimension.
83
+ def pivot_on_hierarchical_dimension?
84
+ dimension_classes.each do |dimension|
85
+ return true if dimension.hierarchical_dimension?
86
+ end
87
+ return false
88
+ end
89
+
90
+ # returns the aggregate fields for this cube
91
+ # removing the aggregate fields that are defined in fact class that are
92
+ # related to hierarchical dimension, but the cube doesn't pivot on any
93
+ # hierarchical dimensions
94
+ def aggregate_fields
95
+ fact_class.aggregate_fields.reject {|field| !pivot_on_hierarchical_dimension? and !field.levels_from_parent.empty? }
64
96
  end
65
97
 
66
98
  # Get the class name for the specified cube name
@@ -73,7 +105,7 @@ module ActiveWarehouse
73
105
 
74
106
  # Get the aggregated fact class name
75
107
  def fact_class_name
76
- Fact.class_name(fact)
108
+ ActiveWarehouse::Fact.class_name(@fact_name || name.sub(/Cube$/,'').underscore.to_sym)
77
109
  end
78
110
 
79
111
  # Get the aggregated fact class instance
@@ -83,221 +115,90 @@ module ActiveWarehouse
83
115
 
84
116
  # Get a list of dimension class instances
85
117
  def dimension_classes
86
- dimensions.collect {|dimension| Dimension.class_name(dimension).constantize}
118
+ dimensions.collect do |dimension_name|
119
+ dimension_class(dimension_name)
120
+ end
121
+ end
122
+
123
+ # Get the dimension class for the specified dimension name
124
+ def dimension_class(dimension_name)
125
+ fact_class.dimension_relationships[dimension_name.to_sym].class_name.constantize
87
126
  end
88
127
 
128
+ # Get the cube logger
89
129
  def logger
90
130
  @logger ||= Logger.new('cube.log')
91
131
  end
92
132
 
133
+ # Get the time when the fact or any dimension referenced in this cube
134
+ # was last modified
93
135
  def last_modified
94
- lm = Fact.class_for_name(fact).last_modified
136
+ lm = fact_class.last_modified
95
137
  dimensions.each do |dimension|
96
- dim = Dimension.class_for_name(dimension)
138
+ dim = ActiveWarehouse::Dimension.class_for_name(dimension)
97
139
  lm = dim.last_modified if dim.last_modified > lm
98
140
  end
99
141
  lm
100
142
  end
101
143
 
102
- protected
103
- def build_aggregate_classes(options={})
104
- @aggregates = {}
105
- existing_dimension_pairs = []
106
- logger.debug "Building aggregate classes"
107
- dimensions.each do |column_dimension|
108
- dimensions.each do |row_dimension|
109
- next if column_dimension == row_dimension
110
- next if existing_dimension_pairs.include? [column_dimension,row_dimension]
111
- next if existing_dimension_pairs.include? [row_dimension,column_dimension]
112
-
113
- existing_dimension_pairs << [column_dimension,row_dimension]
114
- col_dim_class = Dimension.class_for_name(column_dimension)
115
- col_dim_class.hierarchy_levels.each_key do |column_hierarchy_name|
116
- row_dim_class = Dimension.class_for_name(row_dimension)
117
- row_dim_class.hierarchy_levels.each_key do |row_hierarchy_name|
118
- # Construct the aggregate meta data instance
119
- meta_data_attributes = {
120
- :cube_name => self.name,
121
- :dimension1 => column_dimension.to_s,
122
- :dimension1_hierarchy => column_hierarchy_name.to_s,
123
- :dimension2 => row_dimension.to_s,
124
- :dimension2_hierarchy => row_hierarchy_name.to_s
125
- }
126
- conditions = []
127
- condition_args = []
128
- meta_data_attributes.each do |key, value|
129
- conditions << "#{key} = ?"
130
- condition_args << value
131
- end
132
- conditions = [conditions.join(' and ')] + condition_args
133
- meta_data = AggregateMetaData.find(:first, :conditions => conditions)
134
- unless meta_data
135
- meta_data = AggregateMetaData.create(meta_data_attributes)
136
- end
137
-
138
- # Construct the aggregate class instance
139
- aggregate_class = Class.new(ActiveWarehouse::Aggregate)
140
- aggregate_class.name = "Agg#{meta_data.id}"
141
- logger.debug "Constructed aggregate #{aggregate_class.name}"
142
- aggregate_class.cube = self
143
- aggregate_class.dimension1 = column_dimension
144
- aggregate_class.dimension1_hierarchy_name = column_hierarchy_name
145
- aggregate_class.dimension2 = row_dimension
146
- aggregate_class.dimension2_hierarchy_name = row_hierarchy_name
147
-
148
- # Create the underlying aggregate storage table
149
- # TODO: fix the bug of data not being found when a storage table rebuild occurs
150
- force_storage_table_rebuild = options[:force] || aggregate_class.needs_rebuild?(last_modified)
151
- logger.debug "Force storage table rebuild? #{force_storage_table_rebuild}"
152
- aggregate_class.create_storage_table(force_storage_table_rebuild)
153
-
154
- # Keep a reference to the aggregate class instance
155
- @aggregates[meta_data.id] = aggregate_class
156
- end
157
- end
158
- end
159
- end
144
+ # The temp directory for storing files during warehouse rebuilds
145
+ attr_accessor :temp_dir
146
+ def temp_dir
147
+ @temp_dir ||= '/tmp'
148
+ end
149
+
150
+ # Specify the ActiveRecord class to connect through
151
+ # Note: this is a potential directive in a Cube subclass
152
+ attr_accessor :connect_through
153
+ def connect_through
154
+ @connect_through ||= ActiveRecord::Base
155
+ end
156
+
157
+ # Get an adapter connection
158
+ def connection
159
+ connect_through.connection
160
+ end
161
+
162
+ # Defaults to NoAggregate strategy.
163
+ def aggregate
164
+ @aggregate ||= ActiveWarehouse::Aggregate::NoAggregate.new(self)
165
+ end
166
+
167
+ def aggregate_class(agg_class)
168
+ @aggregate = agg_class.new(self)
160
169
  end
161
170
 
162
171
  end
163
172
 
164
173
  public
165
- def aggregate_map(column_dimension, column_hierarchy, row_dimension, row_hierarchy, cstage=0, rstage=0)
166
- # Fill known cells
167
- agg_map = AggregateMap.new
168
- agg_records = nil
169
- # s = Benchmark.realtime do
170
- agg_records = aggregate_records(column_dimension, column_hierarchy, cstage, row_dimension, row_hierarchy, rstage)
171
- #end
172
- # cs = 0
173
- # as = 0
174
- # calc = 0
175
- # x = 0
176
- calculated_fields = self.class.fact_class.calculated_fields
177
- calculated_field_options = self.class.fact_class.calculated_field_options
178
- #puts "loading aggregate_records took #{s}s"
179
- #s = Benchmark.realtime do
180
- #puts "there are #{agg_records.length} agg_records"
181
- #puts "there are #{self.class.fact_class.calculated_fields.length} calculated fields in class #{self.class.fact_class}"
182
- agg_records.each do |agg_record|
183
- # agg_record is an instance of Aggregate
184
- # collect the aggregate record data fields into an array
185
- data_array = nil
186
- #cs += Benchmark.realtime do
187
- data_array = agg_record.data_fields.collect{ |data_field_name| agg_record.send(data_field_name.to_sym) }
188
- #end
189
-
190
- # convert to an average where necessary
191
- # TODO: implement
192
-
193
- # add calculated fields to the data array
194
- #calc += Benchmark.realtime do
195
- calculated_fields.each do |calculated_field|
196
- options = calculated_field_options[calculated_field]
197
- data_array << options[:block].call(agg_record)
198
- end
199
- #end
200
-
201
- # add the data array to the aggregate map
202
- #as += Benchmark.realtime do
203
- agg_map.add_data(agg_record.dimension2_path, agg_record.dimension1_path, data_array)
204
- #end
205
- end
206
-
207
- #end
208
- #puts "creating the agg_map took #{s}s"
209
- #puts "total time spent collecting the data: #{cs}s, avg:#{cs/agg_records.length}s (#{(cs/s) * 100}%)"
210
- #puts "total time spent adding the data: #{as}s, avg:#{as/agg_records.length}s (#{(as/s) * 100}%)"
211
- #puts "total time spent calculating fields: #{calc}s, avg:#{calc/agg_records.length}s (#{(calc/s) * 100}%)"
212
- agg_map
174
+ # Query the cube. The column dimension, column hierarchy, row dimension and
175
+ # row hierarchy are all required.
176
+ #
177
+ # The conditions value is a String that represents a SQL condition appended
178
+ # to the where clause. TODO: this may eventually be converted to another
179
+ # query language.
180
+ #
181
+ # The cstage value represents the current column drill down stage and
182
+ # defaults to 0.
183
+ #
184
+ # The rstage value represents the current row drill down stage and defaults
185
+ # to 0. Filters contains key/value pairs where the key is a string of
186
+ # 'dimension.column' and the value is the value to filter by. For example:
187
+ #
188
+ # filters = {'date.calendar_year' => 2007, 'product.category' => 'Food'}
189
+ # query(:date, :cy, :store, :region, 1, 0, filters)
190
+ #
191
+ # Note that product.category refers to a dimension which is not actually
192
+ # visible but which is both part of the cube and is used for filtering.
193
+ def query(*args)
194
+ self.class.aggregate.query(*args)
213
195
  end
214
196
 
215
- protected
216
- # Return all of the Aggregate records for the specified dimensions and hierarchies
217
- def aggregate_records(column_dimension, column_hierarchy, cstage, row_dimension, row_hierarchy, rstage)
218
- k = Aggregate.key(column_dimension, column_hierarchy, row_dimension, row_hierarchy)
219
- if aggregates[k].nil?
220
- self.class.logger.debug("Aggregate #{k} not found in cache")
221
- conditions = ['cube_name = ?', self.class.name]
222
- conditions[0] << ' and dimension1 = ? and dimension1_hierarchy = ? and dimension2 = ? and dimension2_hierarchy = ?'
223
- conditions << column_dimension.to_s
224
- conditions << column_hierarchy.to_s
225
- conditions << row_dimension.to_s
226
- conditions << row_hierarchy.to_s
227
-
228
- conditions_reversed = ['cube_name = ?', self.class.name]
229
- conditions_reversed[0] << ' and dimension1 = ? and dimension1_hierarchy = ? and dimension2 = ? and dimension2_hierarchy = ?'
230
- conditions_reversed << row_dimension.to_s
231
- conditions_reversed << row_hierarchy.to_s
232
- conditions_reversed << column_dimension.to_s
233
- conditions_reversed << column_hierarchy.to_s
234
-
235
- aggregate_meta_data = AggregateMetaData.find(:first, :conditions => conditions)
236
- aggregate_meta_data ||= AggregateMetaData.find(:first, :conditions => conditions_reversed)
237
- if aggregate_meta_data.nil?
238
- self.class.rebuild
239
- aggregate_meta_data = AggregateMetaData.find(:first, :conditions => conditions)
240
- raise "Cannot find aggregate meta data for key #{k}" if aggregate_meta_data.nil?
241
- end
242
- aggregate_class = self.class.aggregates[aggregate_meta_data.id]
243
- if aggregate_class.nil?
244
- self.class.rebuild
245
- aggregate_class = self.class.aggregates[aggregate_meta_data.id]
246
- raise "Cannot find aggregate for id #{aggregate_meta_data.id}" if aggregate_class.nil?
247
- end
248
-
249
- aggregates[k] = aggregate_class.find(:all,
250
- :conditions => ['(dimension1_stage = ? and dimension2_stage = ?) or (dimension1_stage = ? and dimension2_stage = ?)',
251
- cstage, rstage, rstage, cstage])
252
- end
253
- aggregates[k]
197
+ # Get the database connection (delegates to Cube.connection class method)
198
+ def connection
199
+ self.class.connection
254
200
  end
255
201
 
256
- # Get a hash of all aggregate data
257
- def aggregates
258
- @aggregates ||= {}
259
- end
260
202
  end
261
203
 
262
- # In-memory map of aggregate values
263
- class AggregateMap
264
- attr_reader :length
265
-
266
- # Initialize the aggregate map
267
- def initialize
268
- @m = {}
269
- end
270
-
271
- # Return true if the aggregate map includes the specified row path
272
- def has_row_path?(row_path)
273
- @m.has_key?(row_path)
274
- end
275
-
276
- # Get the value for the specified row path, column path and field index
277
- def value(row_path, col_path, field_index)
278
- #puts "Getting value for #{row_path}, #{col_path} [field=#{field_index}]"
279
- row = @m[row_path]
280
- return 0 if row.nil?
281
- col = row[col_path]
282
- return 0 if col.nil?
283
- return col[field_index] || 0
284
- end
285
-
286
- # Get an array of the values for the specified row path and column path
287
- def values(row_path, col_path)
288
- row = @m[row_path]
289
- return Array.new(length, 0) if row.nil?
290
- col = row[col_path]
291
- return Array.new(length, 0) if col.nil?
292
- col
293
- end
294
-
295
- # Add an array of data for the given row and column path
296
- def add_data(row_path, col_path, data_array)
297
- @length ||= data_array.length
298
- #puts "Adding data for #{row_path}, #{col_path} [data=[#{data_array.join(',')}]]"
299
- @m[row_path] ||= {}
300
- @m[row_path][col_path] = data_array
301
- end
302
- end
303
204
  end