activewarehouse 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +27 -14
- data/Rakefile +16 -5
- data/doc/references.txt +4 -0
- data/generators/bridge/templates/migration.rb +9 -2
- data/generators/bridge/templates/unit_test.rb +8 -0
- data/generators/date_dimension/USAGE +1 -0
- data/generators/date_dimension/date_dimension_generator.rb +16 -0
- data/generators/date_dimension/templates/fixture.yml +5 -0
- data/generators/date_dimension/templates/migration.rb +31 -0
- data/generators/date_dimension/templates/model.rb +3 -0
- data/generators/date_dimension/templates/unit_test.rb +8 -0
- data/generators/dimension/templates/migration.rb +1 -10
- data/generators/dimension_view/dimension_view_generator.rb +2 -2
- data/generators/dimension_view/templates/migration.rb +8 -2
- data/generators/fact/templates/migration.rb +2 -0
- data/generators/time_dimension/USAGE +1 -0
- data/generators/time_dimension/templates/fixture.yml +5 -0
- data/generators/time_dimension/templates/migration.rb +12 -0
- data/generators/time_dimension/templates/model.rb +3 -0
- data/generators/time_dimension/templates/unit_test.rb +8 -0
- data/generators/time_dimension/time_dimension_generator.rb +14 -0
- data/lib/active_warehouse.rb +13 -2
- data/lib/active_warehouse/aggregate.rb +54 -253
- data/lib/active_warehouse/aggregate/dwarf/node.rb +36 -0
- data/lib/active_warehouse/aggregate/dwarf_aggregate.rb +369 -0
- data/lib/active_warehouse/aggregate/dwarf_common.rb +44 -0
- data/lib/active_warehouse/aggregate/dwarf_printer.rb +34 -0
- data/lib/active_warehouse/aggregate/no_aggregate.rb +194 -0
- data/lib/active_warehouse/aggregate/pid_aggregate.rb +29 -0
- data/lib/active_warehouse/aggregate/pipelined_rolap_aggregate.rb +129 -0
- data/lib/active_warehouse/aggregate/rolap_aggregate.rb +181 -0
- data/lib/active_warehouse/aggregate/rolap_common.rb +89 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_1.sql +12 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_10.sql +7166 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_11.sql +14334 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_12.sql +28670 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_13.sql +57342 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_2.sql +26 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_3.sql +54 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_4.sql +110 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_5.sql +222 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_6.sql +446 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_7.sql +894 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_8.sql +1790 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_9.sql +3582 -0
- data/lib/active_warehouse/aggregate_field.rb +49 -0
- data/lib/active_warehouse/{dimension/bridge.rb → bridge.rb} +7 -3
- data/lib/active_warehouse/bridge/hierarchy_bridge.rb +46 -0
- data/lib/active_warehouse/builder.rb +2 -1
- data/lib/active_warehouse/builder/date_dimension_builder.rb +5 -2
- data/lib/active_warehouse/builder/generator/generator.rb +13 -0
- data/lib/active_warehouse/builder/generator/name_generator.rb +20 -0
- data/lib/active_warehouse/builder/generator/paragraph_generator.rb +11 -0
- data/lib/active_warehouse/builder/random_data_builder.rb +21 -11
- data/lib/active_warehouse/builder/test_data_builder.rb +54 -0
- data/lib/active_warehouse/calculated_field.rb +27 -0
- data/lib/active_warehouse/compat/compat.rb +4 -4
- data/lib/active_warehouse/cube.rb +126 -225
- data/lib/active_warehouse/cube_query_result.rb +69 -0
- data/lib/active_warehouse/dimension.rb +64 -29
- data/lib/active_warehouse/dimension/date_dimension.rb +15 -0
- data/lib/active_warehouse/dimension/dimension_reflection.rb +21 -0
- data/lib/active_warehouse/dimension/dimension_view.rb +17 -2
- data/lib/active_warehouse/dimension/hierarchical_dimension.rb +43 -5
- data/lib/active_warehouse/dimension/slowly_changing_dimension.rb +22 -12
- data/lib/active_warehouse/fact.rb +119 -40
- data/lib/active_warehouse/field.rb +74 -0
- data/lib/active_warehouse/ordered_hash.rb +34 -0
- data/lib/active_warehouse/prejoin_fact.rb +97 -0
- data/lib/active_warehouse/report/abstract_report.rb +40 -14
- data/lib/active_warehouse/report/chart_report.rb +3 -3
- data/lib/active_warehouse/report/table_report.rb +8 -3
- data/lib/active_warehouse/version.rb +1 -1
- data/lib/active_warehouse/view/report_helper.rb +144 -34
- data/tasks/active_warehouse_tasks.rake +28 -10
- metadata +107 -30
@@ -0,0 +1,49 @@
|
|
1
|
+
module ActiveWarehouse
|
2
|
+
# Encapsulates a fact column in a fact table. These fields
|
3
|
+
# represent columns that should be aggregated.
|
4
|
+
class AggregateField < Field
|
5
|
+
|
6
|
+
attr_reader :strategy_name
|
7
|
+
|
8
|
+
# +fact_class+ is the class of the fact table this field is found in.
|
9
|
+
# +column_definition+ is the ActiveRecord ColumnDefinition instance for this
|
10
|
+
# column.
|
11
|
+
# +strategy_name+ is the name of th aggregation strategy to be used, defaults to :sum
|
12
|
+
# +field_options+ is a hash of raw options from the original aggregate definition.
|
13
|
+
def initialize(fact_class, column_definition, strategy_name = :sum, field_options = {})
|
14
|
+
super(fact_class, column_definition.name, column_definition.type, field_options)
|
15
|
+
@column_definition = column_definition
|
16
|
+
@limit = column_definition.limit
|
17
|
+
@scale = column_definition.scale
|
18
|
+
@precision = column_definition.precision
|
19
|
+
@strategy_name = strategy_name
|
20
|
+
end
|
21
|
+
|
22
|
+
# delegates to owning_class, returns the Fact that has this field
|
23
|
+
def fact_class
|
24
|
+
owning_class
|
25
|
+
end
|
26
|
+
|
27
|
+
def is_semiadditive?
|
28
|
+
!field_options[:semiadditive].nil?
|
29
|
+
end
|
30
|
+
|
31
|
+
# returns the Dimension that this semiadditive fact is over
|
32
|
+
def semiadditive_over
|
33
|
+
Dimension.to_dimension(field_options[:semiadditive])
|
34
|
+
end
|
35
|
+
|
36
|
+
# overrides Field.label, prepending the aggregation strategy name to label
|
37
|
+
def label
|
38
|
+
@label ? @label : "#{super}_#{strategy_name}"
|
39
|
+
end
|
40
|
+
|
41
|
+
def levels_from_parent
|
42
|
+
field_options[:levels_from_parent].nil? ? [] : field_options[:levels_from_parent]
|
43
|
+
end
|
44
|
+
|
45
|
+
def type_cast(value)
|
46
|
+
@column_definition.type_cast(value)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -2,9 +2,11 @@ module ActiveWarehouse #:nodoc
|
|
2
2
|
# Implements a bridge table.
|
3
3
|
class Bridge < ActiveRecord::Base
|
4
4
|
class << self
|
5
|
-
# Get the table name. By default the table name will be the name of the
|
5
|
+
# Get the table name. By default the table name will be the name of the
|
6
|
+
# bridge in singular form.
|
6
7
|
#
|
7
|
-
# Example: DepartmentHierarchyBridge will have a table called
|
8
|
+
# Example: DepartmentHierarchyBridge will have a table called
|
9
|
+
# department_hierarchy_bridge
|
8
10
|
def table_name
|
9
11
|
name = self.name.demodulize.underscore
|
10
12
|
set_table_name(name)
|
@@ -12,4 +14,6 @@ module ActiveWarehouse #:nodoc
|
|
12
14
|
end
|
13
15
|
end
|
14
16
|
end
|
15
|
-
end
|
17
|
+
end
|
18
|
+
|
19
|
+
require 'active_warehouse/bridge/hierarchy_bridge'
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module ActiveWarehouse #:nodoc:
|
2
|
+
# Bridge class that models ragged hierarchies.
|
3
|
+
class HierarchyBridge < Bridge
|
4
|
+
class << self
|
5
|
+
def set_levels_from_parent(name)
|
6
|
+
@levels_from_parent = name
|
7
|
+
end
|
8
|
+
|
9
|
+
def levels_from_parent
|
10
|
+
@levels_from_parent ||= "levels_from_parent"
|
11
|
+
end
|
12
|
+
|
13
|
+
def set_effective_date(name)
|
14
|
+
@effective_date = name
|
15
|
+
end
|
16
|
+
|
17
|
+
def effective_date
|
18
|
+
@effective_date ||= "effective_date"
|
19
|
+
end
|
20
|
+
|
21
|
+
def set_expiration_date(name)
|
22
|
+
@expiration_date = name
|
23
|
+
end
|
24
|
+
|
25
|
+
def expiration_date
|
26
|
+
@expiration_date ||= "expiration_date"
|
27
|
+
end
|
28
|
+
|
29
|
+
def set_top_flag(name)
|
30
|
+
@top_flag = name
|
31
|
+
end
|
32
|
+
|
33
|
+
def top_flag
|
34
|
+
@top_flag ||= "top_flag"
|
35
|
+
end
|
36
|
+
|
37
|
+
def set_top_flag_value(value)
|
38
|
+
@top_flag_value = value
|
39
|
+
end
|
40
|
+
|
41
|
+
def top_flag_value
|
42
|
+
@top_flag_value ||= 'Y'
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -30,8 +30,8 @@ module ActiveWarehouse #:nodoc:
|
|
30
30
|
# accessed by name.
|
31
31
|
def build(options={})
|
32
32
|
records = []
|
33
|
-
date = start_date
|
34
|
-
while date <= end_date
|
33
|
+
date = start_date.to_time
|
34
|
+
while date <= end_date.to_time
|
35
35
|
record = {}
|
36
36
|
record[:date] = date.strftime("%m/%d/%Y")
|
37
37
|
record[:full_date_description] = date.strftime("%B %d,%Y")
|
@@ -52,6 +52,7 @@ module ActiveWarehouse #:nodoc:
|
|
52
52
|
record[:calendar_month_number_in_year] = date.month
|
53
53
|
record[:calendar_year_month] = date.strftime("%Y-%m")
|
54
54
|
record[:calendar_quarter] = "Q#{date.quarter}"
|
55
|
+
record[:calendar_quarter_number_in_year] = date.quarter
|
55
56
|
record[:calendar_year_quarter] = "#{date.strftime('%Y')}-#{record[:calendar_quarter]}"
|
56
57
|
#record[:calendar_half_year] =
|
57
58
|
record[:calendar_year] = "#{date.year}"
|
@@ -62,8 +63,10 @@ module ActiveWarehouse #:nodoc:
|
|
62
63
|
record[:fiscal_year_month] = "FY#{date.fiscal_year}-" + date.fiscal_year_month.to_s.rjust(2, '0')
|
63
64
|
record[:fiscal_quarter] = "FY Q#{date.fiscal_year_quarter}"
|
64
65
|
record[:fiscal_year_quarter] = "FY#{date.fiscal_year}-Q#{date.fiscal_year_quarter}"
|
66
|
+
record[:fiscal_year_quarter_number] = date.fiscal_year_quarter
|
65
67
|
#record[:fiscal_half_year] =
|
66
68
|
record[:fiscal_year] = "FY#{date.fiscal_year}"
|
69
|
+
record[:fiscal_year_number] = date.fiscal_year
|
67
70
|
record[:holiday_indicator] = holiday_indicators.include?(date) ? 'Holiday' : 'Nonholiday'
|
68
71
|
record[:weekday_indicator] = weekday_indicators[date.wday]
|
69
72
|
record[:selling_season] = 'None'
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module ActiveWarehouse #:nodoc:
|
2
|
+
module Builder #:nodoc:
|
3
|
+
module Generator #:nodoc:
|
4
|
+
# Generate a name consisting of one or more words from word groups
|
5
|
+
class NameGenerator < ActiveWarehouse::Builder::Generator::Generator
|
6
|
+
def next(options={})
|
7
|
+
options[:separator] ||= ' '
|
8
|
+
parts = []
|
9
|
+
word_groups = options[:word_groups]
|
10
|
+
0.upto(word_groups.first.length) do |i|
|
11
|
+
word_groups.each do |word_group|
|
12
|
+
parts << word_group[i]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
parts.join(options[:separator])
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -8,6 +8,7 @@ module ActiveWarehouse #:nodoc:
|
|
8
8
|
# Hash of names mapped to generators where the name is the column name
|
9
9
|
attr_reader :column_generators
|
10
10
|
|
11
|
+
# Initialize the random data builder
|
11
12
|
def initialize
|
12
13
|
@generators = {
|
13
14
|
Fixnum => FixnumGenerator.new,
|
@@ -20,6 +21,9 @@ module ActiveWarehouse #:nodoc:
|
|
20
21
|
@column_generators = {}
|
21
22
|
end
|
22
23
|
|
24
|
+
# Build the data for the specified class. Name may be a Class (which must descend from ActiveWarehouse::Dimension
|
25
|
+
# or ActiveWarehouse::Fact), a String or a Symbol. String or Symbol will be converted to a class name and then
|
26
|
+
# passed back to this method.
|
23
27
|
def build(name, options={})
|
24
28
|
case name
|
25
29
|
when Class
|
@@ -44,6 +48,7 @@ module ActiveWarehouse #:nodoc:
|
|
44
48
|
# Build test dimension data for the specified dimension name.
|
45
49
|
#
|
46
50
|
# Options:
|
51
|
+
#
|
47
52
|
# * <tt>:rows</tt>: The number of rows to create (defaults to 100)
|
48
53
|
# * <tt>:generators</tt>: A map of generators where each key is Fixnum, Float, Date, Time, String, or Object and the
|
49
54
|
# value is extends from AbstractGenerator.
|
@@ -57,6 +62,9 @@ module ActiveWarehouse #:nodoc:
|
|
57
62
|
row = {}
|
58
63
|
dimension_class.content_columns.each do |column|
|
59
64
|
generator = (options[:generators][column.klass] || @column_generators[column.name] || @generators[column.klass])
|
65
|
+
if generator.nil?
|
66
|
+
raise ArgumentError, "No generator found, unknown column type?: #{column.klass}"
|
67
|
+
end
|
60
68
|
row[column.name] = generator.generate(column, options)
|
61
69
|
end
|
62
70
|
rows << row
|
@@ -74,6 +82,7 @@ module ActiveWarehouse #:nodoc:
|
|
74
82
|
# * <tt>:fk_limit</tt>: A Hash of foreign key limits, where each key is the name of column and the value is
|
75
83
|
# a number. For example options[:fk_limit][:date_id] = 1000 would limit the foreign key values to something between
|
76
84
|
# 1 and 1000, inclusive.
|
85
|
+
# * <tt>:dimensions</tt>: The number of available dimension FKs
|
77
86
|
def build_fact(name, options={})
|
78
87
|
options[:rows] ||= 100
|
79
88
|
options[:generators] ||= {}
|
@@ -87,9 +96,10 @@ module ActiveWarehouse #:nodoc:
|
|
87
96
|
generator = (options[:generators][column.klass] || @generators[column.klass])
|
88
97
|
row[column.name] = generator.generate(column, options)
|
89
98
|
end
|
90
|
-
fact_class.
|
91
|
-
|
92
|
-
|
99
|
+
fact_class.dimension_relationships.each do |name, reflection|
|
100
|
+
# it would be better to get a count of rows from the dimension tables
|
101
|
+
fk_limit = (options[:fk_limit][reflection.primary_key_name] || options[:dimensions] || 100) - 1
|
102
|
+
row[reflection.primary_key_name] = rand(fk_limit) + 1
|
93
103
|
end
|
94
104
|
rows << row
|
95
105
|
end
|
@@ -112,8 +122,8 @@ module ActiveWarehouse #:nodoc:
|
|
112
122
|
# Generate a random date value
|
113
123
|
#
|
114
124
|
# Options:
|
115
|
-
#
|
116
|
-
#
|
125
|
+
# * <tt>:start_date</tt>: The start date as a Date or Time object (default 1 year ago)
|
126
|
+
# * <tt>:end_date</tt>: The end date as a Date or Time object (default now)
|
117
127
|
def generate(column, options={})
|
118
128
|
end_date = (options[:end_date] || Time.now).to_date
|
119
129
|
start_date = (options[:start_date] || 1.year.ago).to_date
|
@@ -125,8 +135,8 @@ module ActiveWarehouse #:nodoc:
|
|
125
135
|
# Basic Time generator
|
126
136
|
#
|
127
137
|
# Options:
|
128
|
-
#
|
129
|
-
#
|
138
|
+
# * <tt>:start_date</tt>: The start date as a Date or Time object (default 1 year ago)
|
139
|
+
# * <tt>:end_date</tt>: The end date as a Date or Time object (default now)
|
130
140
|
class TimeGenerator < DateGenerator #:nodoc:
|
131
141
|
# Generate a random Time value
|
132
142
|
def generate(column, options={})
|
@@ -139,8 +149,8 @@ module ActiveWarehouse #:nodoc:
|
|
139
149
|
# Generate an integer from 0 to options[:max] inclusive
|
140
150
|
#
|
141
151
|
# Options:
|
142
|
-
#
|
143
|
-
#
|
152
|
+
# * <tt>:max</tt>: The maximum allowed value (default 1000)
|
153
|
+
# * <tt>:min</tt>: The minimum allowed value (default 0)
|
144
154
|
def generate(column, options={})
|
145
155
|
options[:max] ||= 1000
|
146
156
|
options[:min] ||= 0
|
@@ -153,7 +163,7 @@ module ActiveWarehouse #:nodoc:
|
|
153
163
|
# Generate a float from 0 to options[:max] inclusive (default 1000)
|
154
164
|
#
|
155
165
|
# Options:
|
156
|
-
#
|
166
|
+
# * <tt>:max</tt>: The maximum allowed value (default 1000)
|
157
167
|
def generate(column, options={})
|
158
168
|
options[:max] ||= 1000
|
159
169
|
rand * options[:max].to_f
|
@@ -165,7 +175,7 @@ module ActiveWarehouse #:nodoc:
|
|
165
175
|
# Generate a big decimal from 0 to options[:max] inclusive (default 1000)
|
166
176
|
#
|
167
177
|
# Options:
|
168
|
-
#
|
178
|
+
# * <tt>:max</tt>: The maximum allowed value (default 1000)
|
169
179
|
def generate(column, options={})
|
170
180
|
options[:max] ||= 1000
|
171
181
|
BigDecimal.new((rand * options[:max].to_f).to_s) # TODO: need BigDecimal type?
|
@@ -0,0 +1,54 @@
|
|
1
|
+
Dir[File.dirname(__FILE__) + "/generator/*.rb"].each { |file| require(file) }
|
2
|
+
|
3
|
+
module ActiveWarehouse #:nodoc:
|
4
|
+
module Builder #:nodoc:
|
5
|
+
# Unlike the RandomDataBuilder, which puts truly random data in the warehouse, this
|
6
|
+
# generator uses collections of possible values to construct semi-understandable data
|
7
|
+
class TestDataBuilder
|
8
|
+
def initialize
|
9
|
+
|
10
|
+
end
|
11
|
+
|
12
|
+
# Usage:
|
13
|
+
#
|
14
|
+
# fields = [:id,:product_name,:product_description,:suggested_retail_price]
|
15
|
+
# field_definitions = {
|
16
|
+
# :id => :sequence, # symbol or string
|
17
|
+
# :product_name => [['Foo','Bar']['Baz','Bing']], # array
|
18
|
+
# :product_description => IpsumLorumGenerator # class
|
19
|
+
# :suggested_retail_price => RandomNumberGenerator.new(0.00, 100.00) # generator instance
|
20
|
+
# }
|
21
|
+
def build(fields, field_definitions, options={})
|
22
|
+
options[:number] ||= 100
|
23
|
+
rows = []
|
24
|
+
generators = {}
|
25
|
+
# set up all of the generators first
|
26
|
+
field_definitions.each do |name, fd|
|
27
|
+
case fd
|
28
|
+
when Class
|
29
|
+
generators[name] = fd.new
|
30
|
+
when String, Symbol
|
31
|
+
generators[name] = "#{fd}Generator".classify.constantize.new
|
32
|
+
when Array
|
33
|
+
generators[name] = NameGenerator.new(fd)
|
34
|
+
when Generator
|
35
|
+
generators[name] = fd
|
36
|
+
else
|
37
|
+
raise "Invalid generator specified: #{fd}"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# generate all of the rows
|
42
|
+
0.upto(options[:number]) do
|
43
|
+
row = {}
|
44
|
+
fields.each do |field|
|
45
|
+
row[field] = generators[field].next(options)
|
46
|
+
end
|
47
|
+
rows << row
|
48
|
+
end
|
49
|
+
|
50
|
+
rows
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module ActiveWarehouse #:nodoc:
|
2
|
+
# A field that uses a Proc to calculate the value
|
3
|
+
class CalculatedField < Field
|
4
|
+
attr_reader :block
|
5
|
+
# Initialize the calculated field
|
6
|
+
#
|
7
|
+
# +fact_class+ is the fact class that the field is calculated in
|
8
|
+
# +name+ is the name of the calculated field
|
9
|
+
# +type+ is the type of the calculated field (defaults to :integer)
|
10
|
+
# +field_options+ is a Hash of options for the field
|
11
|
+
#
|
12
|
+
# This method accepts a block which should take a single argument that is the record
|
13
|
+
# itself.
|
14
|
+
def initialize(fact_class, name, type = :integer, field_options = {}, &block)
|
15
|
+
unless block_given?
|
16
|
+
raise ArgumentError, "A block is required for the calculated field #{name} in #{fact_class}"
|
17
|
+
end
|
18
|
+
super(fact_class, name.to_s, type, field_options)
|
19
|
+
@block = block
|
20
|
+
end
|
21
|
+
|
22
|
+
# Calculate the field value using the Hash of type-casted values
|
23
|
+
def calculate(values)
|
24
|
+
@block.call(values)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# Provides 1.1.6 compatibility
|
2
|
-
module ActiveRecord
|
3
|
-
module Calculations
|
4
|
-
module ClassMethods
|
2
|
+
module ActiveRecord #:nodoc:
|
3
|
+
module Calculations #:nodoc:
|
4
|
+
module ClassMethods #:nodoc:
|
5
5
|
protected
|
6
6
|
def construct_count_options_from_legacy_args(*args)
|
7
7
|
options = {}
|
@@ -37,7 +37,7 @@ module ActiveRecord
|
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
|
-
class Module
|
40
|
+
class Module #:nodoc:
|
41
41
|
def alias_method_chain(target, feature)
|
42
42
|
# Strip out punctuation on predicates or bang methods since
|
43
43
|
# e.g. target?_without_feature is not a valid method name.
|
@@ -1,8 +1,12 @@
|
|
1
1
|
module ActiveWarehouse
|
2
|
-
# A Cube represents a collection of dimensions operating on a fact. The Cube
|
3
|
-
#
|
2
|
+
# A Cube represents a collection of dimensions operating on a fact. The Cube
|
3
|
+
# provides a front-end for getting at the
|
4
|
+
# underlying data. Cubes support pluggable aggregation. The default aggregation
|
5
|
+
# is the NoAggregate which goes directly
|
6
|
+
# to the fact and dimensions to answer queries.
|
4
7
|
class Cube
|
5
8
|
class << self
|
9
|
+
|
6
10
|
# Callback which is invoked when subclasses are created
|
7
11
|
def inherited(subclass)
|
8
12
|
subclasses << subclass
|
@@ -13,54 +17,82 @@ module ActiveWarehouse
|
|
13
17
|
@subclasses ||= []
|
14
18
|
end
|
15
19
|
|
16
|
-
# Defines the dimensions that this cube pivots on.
|
20
|
+
# Defines the dimensions that this cube pivots on. If the fact name and
|
21
|
+
# cube name are different (for example, if a PurchaseCube does not report
|
22
|
+
# on a PurchaseFact) then you *must* declare the <code>reports_on</code>
|
23
|
+
# first.
|
17
24
|
def pivots_on(*dimension_list)
|
18
|
-
|
25
|
+
@dimensions_hierarchies = OrderedHash.new
|
26
|
+
@dimensions = []
|
19
27
|
dimension_list.each do |dimension|
|
20
|
-
|
28
|
+
case dimension
|
29
|
+
when Symbol, String
|
30
|
+
dimensions << dimension.to_sym
|
31
|
+
dimensions_hierarchies[dimension.to_sym] = fact_class.dimension_class(dimension).hierarchies
|
32
|
+
when Hash
|
33
|
+
dimension_name = dimension.keys.first.to_sym
|
34
|
+
dimensions << dimension_name
|
35
|
+
dimensions_hierarchies[dimension_name] = [dimension[dimension_name]].flatten
|
36
|
+
else
|
37
|
+
raise ArgumentError, "Each argument to pivot_on must be a symbol, string or Hash"
|
38
|
+
end
|
21
39
|
end
|
22
40
|
end
|
23
41
|
alias :pivot_on :pivots_on
|
24
42
|
|
25
|
-
# Defines the fact that this cube
|
26
|
-
|
27
|
-
|
28
|
-
|
43
|
+
# Defines the fact name, without the 'Fact' suffix, that this cube
|
44
|
+
# reports on. For instance, if you have PurchaseFact, you could then
|
45
|
+
# call <code>reports_on :purchase</code>.
|
46
|
+
#
|
47
|
+
# The default value for reports_on is to take the name of the cube,
|
48
|
+
# i.e. PurchaseCube, and remove the Cube suffix. The assumption is that
|
49
|
+
# your Cube name matches your Fact name.
|
50
|
+
def reports_on(fact_name)
|
51
|
+
@fact_name = fact_name
|
29
52
|
end
|
30
53
|
alias :report_on :reports_on
|
31
54
|
|
32
|
-
# Rebuild
|
55
|
+
# Rebuild the data warehouse.
|
33
56
|
def rebuild(options={})
|
34
|
-
|
35
|
-
options[:force] ||= false
|
36
|
-
build_aggregate_classes(options)
|
57
|
+
populate(options)
|
37
58
|
end
|
38
59
|
|
39
|
-
# Populate
|
60
|
+
# Populate the data warehouse. Delegate to aggregate.populate
|
40
61
|
def populate(options={})
|
41
|
-
|
42
|
-
aggregates.each do |agg_id, agg_clazz|
|
43
|
-
if agg_clazz.needs_rebuild? || options[:force]
|
44
|
-
logger.debug "Populating aggregate class #{agg_clazz.name}"
|
45
|
-
agg_clazz.populate
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
# Get the fact that this cube reports on
|
51
|
-
def fact
|
52
|
-
@fact
|
62
|
+
aggregate.populate
|
53
63
|
end
|
54
64
|
|
55
65
|
# Get the dimensions that this cube pivots on
|
56
66
|
def dimensions
|
57
|
-
@dimensions ||=
|
67
|
+
@dimensions ||= fact_class.dimension_relationships.collect{|k,v| k}
|
58
68
|
end
|
59
69
|
|
60
|
-
# Get
|
61
|
-
|
62
|
-
|
63
|
-
@
|
70
|
+
# Get an OrderedHash of each dimension mapped to its hierarchies which
|
71
|
+
# will be included in the cube
|
72
|
+
def dimensions_hierarchies
|
73
|
+
if @dimensions_hierarchies.nil?
|
74
|
+
@dimensions_hierarchies = OrderedHash.new
|
75
|
+
dimensions.each do |dimension|
|
76
|
+
@dimensions_hierarchies[dimension] = fact_class.dimension_class(dimension).hierarchies
|
77
|
+
end
|
78
|
+
end
|
79
|
+
@dimensions_hierarchies
|
80
|
+
end
|
81
|
+
|
82
|
+
# returns true if this cube pivots on a hierarchical dimension.
|
83
|
+
def pivot_on_hierarchical_dimension?
|
84
|
+
dimension_classes.each do |dimension|
|
85
|
+
return true if dimension.hierarchical_dimension?
|
86
|
+
end
|
87
|
+
return false
|
88
|
+
end
|
89
|
+
|
90
|
+
# returns the aggregate fields for this cube
|
91
|
+
# removing the aggregate fields that are defined in fact class that are
|
92
|
+
# related to hierarchical dimension, but the cube doesn't pivot on any
|
93
|
+
# hierarchical dimensions
|
94
|
+
def aggregate_fields
|
95
|
+
fact_class.aggregate_fields.reject {|field| !pivot_on_hierarchical_dimension? and !field.levels_from_parent.empty? }
|
64
96
|
end
|
65
97
|
|
66
98
|
# Get the class name for the specified cube name
|
@@ -73,7 +105,7 @@ module ActiveWarehouse
|
|
73
105
|
|
74
106
|
# Get the aggregated fact class name
|
75
107
|
def fact_class_name
|
76
|
-
Fact.class_name(
|
108
|
+
ActiveWarehouse::Fact.class_name(@fact_name || name.sub(/Cube$/,'').underscore.to_sym)
|
77
109
|
end
|
78
110
|
|
79
111
|
# Get the aggregated fact class instance
|
@@ -83,221 +115,90 @@ module ActiveWarehouse
|
|
83
115
|
|
84
116
|
# Get a list of dimension class instances
|
85
117
|
def dimension_classes
|
86
|
-
dimensions.collect
|
118
|
+
dimensions.collect do |dimension_name|
|
119
|
+
dimension_class(dimension_name)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Get the dimension class for the specified dimension name
|
124
|
+
def dimension_class(dimension_name)
|
125
|
+
fact_class.dimension_relationships[dimension_name.to_sym].class_name.constantize
|
87
126
|
end
|
88
127
|
|
128
|
+
# Get the cube logger
|
89
129
|
def logger
|
90
130
|
@logger ||= Logger.new('cube.log')
|
91
131
|
end
|
92
132
|
|
133
|
+
# Get the time when the fact or any dimension referenced in this cube
|
134
|
+
# was last modified
|
93
135
|
def last_modified
|
94
|
-
lm =
|
136
|
+
lm = fact_class.last_modified
|
95
137
|
dimensions.each do |dimension|
|
96
|
-
dim = Dimension.class_for_name(dimension)
|
138
|
+
dim = ActiveWarehouse::Dimension.class_for_name(dimension)
|
97
139
|
lm = dim.last_modified if dim.last_modified > lm
|
98
140
|
end
|
99
141
|
lm
|
100
142
|
end
|
101
143
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
condition_args = []
|
128
|
-
meta_data_attributes.each do |key, value|
|
129
|
-
conditions << "#{key} = ?"
|
130
|
-
condition_args << value
|
131
|
-
end
|
132
|
-
conditions = [conditions.join(' and ')] + condition_args
|
133
|
-
meta_data = AggregateMetaData.find(:first, :conditions => conditions)
|
134
|
-
unless meta_data
|
135
|
-
meta_data = AggregateMetaData.create(meta_data_attributes)
|
136
|
-
end
|
137
|
-
|
138
|
-
# Construct the aggregate class instance
|
139
|
-
aggregate_class = Class.new(ActiveWarehouse::Aggregate)
|
140
|
-
aggregate_class.name = "Agg#{meta_data.id}"
|
141
|
-
logger.debug "Constructed aggregate #{aggregate_class.name}"
|
142
|
-
aggregate_class.cube = self
|
143
|
-
aggregate_class.dimension1 = column_dimension
|
144
|
-
aggregate_class.dimension1_hierarchy_name = column_hierarchy_name
|
145
|
-
aggregate_class.dimension2 = row_dimension
|
146
|
-
aggregate_class.dimension2_hierarchy_name = row_hierarchy_name
|
147
|
-
|
148
|
-
# Create the underlying aggregate storage table
|
149
|
-
# TODO: fix the bug of data not being found when a storage table rebuild occurs
|
150
|
-
force_storage_table_rebuild = options[:force] || aggregate_class.needs_rebuild?(last_modified)
|
151
|
-
logger.debug "Force storage table rebuild? #{force_storage_table_rebuild}"
|
152
|
-
aggregate_class.create_storage_table(force_storage_table_rebuild)
|
153
|
-
|
154
|
-
# Keep a reference to the aggregate class instance
|
155
|
-
@aggregates[meta_data.id] = aggregate_class
|
156
|
-
end
|
157
|
-
end
|
158
|
-
end
|
159
|
-
end
|
144
|
+
# The temp directory for storing files during warehouse rebuilds
|
145
|
+
attr_accessor :temp_dir
|
146
|
+
def temp_dir
|
147
|
+
@temp_dir ||= '/tmp'
|
148
|
+
end
|
149
|
+
|
150
|
+
# Specify the ActiveRecord class to connect through
|
151
|
+
# Note: this is a potential directive in a Cube subclass
|
152
|
+
attr_accessor :connect_through
|
153
|
+
def connect_through
|
154
|
+
@connect_through ||= ActiveRecord::Base
|
155
|
+
end
|
156
|
+
|
157
|
+
# Get an adapter connection
|
158
|
+
def connection
|
159
|
+
connect_through.connection
|
160
|
+
end
|
161
|
+
|
162
|
+
# Defaults to NoAggregate strategy.
|
163
|
+
def aggregate
|
164
|
+
@aggregate ||= ActiveWarehouse::Aggregate::NoAggregate.new(self)
|
165
|
+
end
|
166
|
+
|
167
|
+
def aggregate_class(agg_class)
|
168
|
+
@aggregate = agg_class.new(self)
|
160
169
|
end
|
161
170
|
|
162
171
|
end
|
163
172
|
|
164
173
|
public
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
#
|
173
|
-
#
|
174
|
-
#
|
175
|
-
#
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
#cs += Benchmark.realtime do
|
187
|
-
data_array = agg_record.data_fields.collect{ |data_field_name| agg_record.send(data_field_name.to_sym) }
|
188
|
-
#end
|
189
|
-
|
190
|
-
# convert to an average where necessary
|
191
|
-
# TODO: implement
|
192
|
-
|
193
|
-
# add calculated fields to the data array
|
194
|
-
#calc += Benchmark.realtime do
|
195
|
-
calculated_fields.each do |calculated_field|
|
196
|
-
options = calculated_field_options[calculated_field]
|
197
|
-
data_array << options[:block].call(agg_record)
|
198
|
-
end
|
199
|
-
#end
|
200
|
-
|
201
|
-
# add the data array to the aggregate map
|
202
|
-
#as += Benchmark.realtime do
|
203
|
-
agg_map.add_data(agg_record.dimension2_path, agg_record.dimension1_path, data_array)
|
204
|
-
#end
|
205
|
-
end
|
206
|
-
|
207
|
-
#end
|
208
|
-
#puts "creating the agg_map took #{s}s"
|
209
|
-
#puts "total time spent collecting the data: #{cs}s, avg:#{cs/agg_records.length}s (#{(cs/s) * 100}%)"
|
210
|
-
#puts "total time spent adding the data: #{as}s, avg:#{as/agg_records.length}s (#{(as/s) * 100}%)"
|
211
|
-
#puts "total time spent calculating fields: #{calc}s, avg:#{calc/agg_records.length}s (#{(calc/s) * 100}%)"
|
212
|
-
agg_map
|
174
|
+
# Query the cube. The column dimension, column hierarchy, row dimension and
|
175
|
+
# row hierarchy are all required.
|
176
|
+
#
|
177
|
+
# The conditions value is a String that represents a SQL condition appended
|
178
|
+
# to the where clause. TODO: this may eventually be converted to another
|
179
|
+
# query language.
|
180
|
+
#
|
181
|
+
# The cstage value represents the current column drill down stage and
|
182
|
+
# defaults to 0.
|
183
|
+
#
|
184
|
+
# The rstage value represents the current row drill down stage and defaults
|
185
|
+
# to 0. Filters contains key/value pairs where the key is a string of
|
186
|
+
# 'dimension.column' and the value is the value to filter by. For example:
|
187
|
+
#
|
188
|
+
# filters = {'date.calendar_year' => 2007, 'product.category' => 'Food'}
|
189
|
+
# query(:date, :cy, :store, :region, 1, 0, filters)
|
190
|
+
#
|
191
|
+
# Note that product.category refers to a dimension which is not actually
|
192
|
+
# visible but which is both part of the cube and is used for filtering.
|
193
|
+
def query(*args)
|
194
|
+
self.class.aggregate.query(*args)
|
213
195
|
end
|
214
196
|
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
k = Aggregate.key(column_dimension, column_hierarchy, row_dimension, row_hierarchy)
|
219
|
-
if aggregates[k].nil?
|
220
|
-
self.class.logger.debug("Aggregate #{k} not found in cache")
|
221
|
-
conditions = ['cube_name = ?', self.class.name]
|
222
|
-
conditions[0] << ' and dimension1 = ? and dimension1_hierarchy = ? and dimension2 = ? and dimension2_hierarchy = ?'
|
223
|
-
conditions << column_dimension.to_s
|
224
|
-
conditions << column_hierarchy.to_s
|
225
|
-
conditions << row_dimension.to_s
|
226
|
-
conditions << row_hierarchy.to_s
|
227
|
-
|
228
|
-
conditions_reversed = ['cube_name = ?', self.class.name]
|
229
|
-
conditions_reversed[0] << ' and dimension1 = ? and dimension1_hierarchy = ? and dimension2 = ? and dimension2_hierarchy = ?'
|
230
|
-
conditions_reversed << row_dimension.to_s
|
231
|
-
conditions_reversed << row_hierarchy.to_s
|
232
|
-
conditions_reversed << column_dimension.to_s
|
233
|
-
conditions_reversed << column_hierarchy.to_s
|
234
|
-
|
235
|
-
aggregate_meta_data = AggregateMetaData.find(:first, :conditions => conditions)
|
236
|
-
aggregate_meta_data ||= AggregateMetaData.find(:first, :conditions => conditions_reversed)
|
237
|
-
if aggregate_meta_data.nil?
|
238
|
-
self.class.rebuild
|
239
|
-
aggregate_meta_data = AggregateMetaData.find(:first, :conditions => conditions)
|
240
|
-
raise "Cannot find aggregate meta data for key #{k}" if aggregate_meta_data.nil?
|
241
|
-
end
|
242
|
-
aggregate_class = self.class.aggregates[aggregate_meta_data.id]
|
243
|
-
if aggregate_class.nil?
|
244
|
-
self.class.rebuild
|
245
|
-
aggregate_class = self.class.aggregates[aggregate_meta_data.id]
|
246
|
-
raise "Cannot find aggregate for id #{aggregate_meta_data.id}" if aggregate_class.nil?
|
247
|
-
end
|
248
|
-
|
249
|
-
aggregates[k] = aggregate_class.find(:all,
|
250
|
-
:conditions => ['(dimension1_stage = ? and dimension2_stage = ?) or (dimension1_stage = ? and dimension2_stage = ?)',
|
251
|
-
cstage, rstage, rstage, cstage])
|
252
|
-
end
|
253
|
-
aggregates[k]
|
197
|
+
# Get the database connection (delegates to Cube.connection class method)
|
198
|
+
def connection
|
199
|
+
self.class.connection
|
254
200
|
end
|
255
201
|
|
256
|
-
# Get a hash of all aggregate data
|
257
|
-
def aggregates
|
258
|
-
@aggregates ||= {}
|
259
|
-
end
|
260
202
|
end
|
261
203
|
|
262
|
-
# In-memory map of aggregate values
|
263
|
-
class AggregateMap
|
264
|
-
attr_reader :length
|
265
|
-
|
266
|
-
# Initialize the aggregate map
|
267
|
-
def initialize
|
268
|
-
@m = {}
|
269
|
-
end
|
270
|
-
|
271
|
-
# Return true if the aggregate map includes the specified row path
|
272
|
-
def has_row_path?(row_path)
|
273
|
-
@m.has_key?(row_path)
|
274
|
-
end
|
275
|
-
|
276
|
-
# Get the value for the specified row path, column path and field index
|
277
|
-
def value(row_path, col_path, field_index)
|
278
|
-
#puts "Getting value for #{row_path}, #{col_path} [field=#{field_index}]"
|
279
|
-
row = @m[row_path]
|
280
|
-
return 0 if row.nil?
|
281
|
-
col = row[col_path]
|
282
|
-
return 0 if col.nil?
|
283
|
-
return col[field_index] || 0
|
284
|
-
end
|
285
|
-
|
286
|
-
# Get an array of the values for the specified row path and column path
|
287
|
-
def values(row_path, col_path)
|
288
|
-
row = @m[row_path]
|
289
|
-
return Array.new(length, 0) if row.nil?
|
290
|
-
col = row[col_path]
|
291
|
-
return Array.new(length, 0) if col.nil?
|
292
|
-
col
|
293
|
-
end
|
294
|
-
|
295
|
-
# Add an array of data for the given row and column path
|
296
|
-
def add_data(row_path, col_path, data_array)
|
297
|
-
@length ||= data_array.length
|
298
|
-
#puts "Adding data for #{row_path}, #{col_path} [data=[#{data_array.join(',')}]]"
|
299
|
-
@m[row_path] ||= {}
|
300
|
-
@m[row_path][col_path] = data_array
|
301
|
-
end
|
302
|
-
end
|
303
204
|
end
|