activewarehouse 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +27 -14
- data/Rakefile +16 -5
- data/doc/references.txt +4 -0
- data/generators/bridge/templates/migration.rb +9 -2
- data/generators/bridge/templates/unit_test.rb +8 -0
- data/generators/date_dimension/USAGE +1 -0
- data/generators/date_dimension/date_dimension_generator.rb +16 -0
- data/generators/date_dimension/templates/fixture.yml +5 -0
- data/generators/date_dimension/templates/migration.rb +31 -0
- data/generators/date_dimension/templates/model.rb +3 -0
- data/generators/date_dimension/templates/unit_test.rb +8 -0
- data/generators/dimension/templates/migration.rb +1 -10
- data/generators/dimension_view/dimension_view_generator.rb +2 -2
- data/generators/dimension_view/templates/migration.rb +8 -2
- data/generators/fact/templates/migration.rb +2 -0
- data/generators/time_dimension/USAGE +1 -0
- data/generators/time_dimension/templates/fixture.yml +5 -0
- data/generators/time_dimension/templates/migration.rb +12 -0
- data/generators/time_dimension/templates/model.rb +3 -0
- data/generators/time_dimension/templates/unit_test.rb +8 -0
- data/generators/time_dimension/time_dimension_generator.rb +14 -0
- data/lib/active_warehouse.rb +13 -2
- data/lib/active_warehouse/aggregate.rb +54 -253
- data/lib/active_warehouse/aggregate/dwarf/node.rb +36 -0
- data/lib/active_warehouse/aggregate/dwarf_aggregate.rb +369 -0
- data/lib/active_warehouse/aggregate/dwarf_common.rb +44 -0
- data/lib/active_warehouse/aggregate/dwarf_printer.rb +34 -0
- data/lib/active_warehouse/aggregate/no_aggregate.rb +194 -0
- data/lib/active_warehouse/aggregate/pid_aggregate.rb +29 -0
- data/lib/active_warehouse/aggregate/pipelined_rolap_aggregate.rb +129 -0
- data/lib/active_warehouse/aggregate/rolap_aggregate.rb +181 -0
- data/lib/active_warehouse/aggregate/rolap_common.rb +89 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_1.sql +12 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_10.sql +7166 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_11.sql +14334 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_12.sql +28670 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_13.sql +57342 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_2.sql +26 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_3.sql +54 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_4.sql +110 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_5.sql +222 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_6.sql +446 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_7.sql +894 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_8.sql +1790 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_9.sql +3582 -0
- data/lib/active_warehouse/aggregate_field.rb +49 -0
- data/lib/active_warehouse/{dimension/bridge.rb → bridge.rb} +7 -3
- data/lib/active_warehouse/bridge/hierarchy_bridge.rb +46 -0
- data/lib/active_warehouse/builder.rb +2 -1
- data/lib/active_warehouse/builder/date_dimension_builder.rb +5 -2
- data/lib/active_warehouse/builder/generator/generator.rb +13 -0
- data/lib/active_warehouse/builder/generator/name_generator.rb +20 -0
- data/lib/active_warehouse/builder/generator/paragraph_generator.rb +11 -0
- data/lib/active_warehouse/builder/random_data_builder.rb +21 -11
- data/lib/active_warehouse/builder/test_data_builder.rb +54 -0
- data/lib/active_warehouse/calculated_field.rb +27 -0
- data/lib/active_warehouse/compat/compat.rb +4 -4
- data/lib/active_warehouse/cube.rb +126 -225
- data/lib/active_warehouse/cube_query_result.rb +69 -0
- data/lib/active_warehouse/dimension.rb +64 -29
- data/lib/active_warehouse/dimension/date_dimension.rb +15 -0
- data/lib/active_warehouse/dimension/dimension_reflection.rb +21 -0
- data/lib/active_warehouse/dimension/dimension_view.rb +17 -2
- data/lib/active_warehouse/dimension/hierarchical_dimension.rb +43 -5
- data/lib/active_warehouse/dimension/slowly_changing_dimension.rb +22 -12
- data/lib/active_warehouse/fact.rb +119 -40
- data/lib/active_warehouse/field.rb +74 -0
- data/lib/active_warehouse/ordered_hash.rb +34 -0
- data/lib/active_warehouse/prejoin_fact.rb +97 -0
- data/lib/active_warehouse/report/abstract_report.rb +40 -14
- data/lib/active_warehouse/report/chart_report.rb +3 -3
- data/lib/active_warehouse/report/table_report.rb +8 -3
- data/lib/active_warehouse/version.rb +1 -1
- data/lib/active_warehouse/view/report_helper.rb +144 -34
- data/tasks/active_warehouse_tasks.rake +28 -10
- metadata +107 -30
@@ -0,0 +1,49 @@
|
|
1
|
+
module ActiveWarehouse
|
2
|
+
# Encapsulates a fact column in a fact table. These fields
|
3
|
+
# represent columns that should be aggregated.
|
4
|
+
class AggregateField < Field
|
5
|
+
|
6
|
+
attr_reader :strategy_name
|
7
|
+
|
8
|
+
# +fact_class+ is the class of the fact table this field is found in.
|
9
|
+
# +column_definition+ is the ActiveRecord ColumnDefinition instance for this
|
10
|
+
# column.
|
11
|
+
# +strategy_name+ is the name of th aggregation strategy to be used, defaults to :sum
|
12
|
+
# +field_options+ is a hash of raw options from the original aggregate definition.
|
13
|
+
def initialize(fact_class, column_definition, strategy_name = :sum, field_options = {})
|
14
|
+
super(fact_class, column_definition.name, column_definition.type, field_options)
|
15
|
+
@column_definition = column_definition
|
16
|
+
@limit = column_definition.limit
|
17
|
+
@scale = column_definition.scale
|
18
|
+
@precision = column_definition.precision
|
19
|
+
@strategy_name = strategy_name
|
20
|
+
end
|
21
|
+
|
22
|
+
# delegates to owning_class, returns the Fact that has this field
|
23
|
+
def fact_class
|
24
|
+
owning_class
|
25
|
+
end
|
26
|
+
|
27
|
+
def is_semiadditive?
|
28
|
+
!field_options[:semiadditive].nil?
|
29
|
+
end
|
30
|
+
|
31
|
+
# returns the Dimension that this semiadditive fact is over
|
32
|
+
def semiadditive_over
|
33
|
+
Dimension.to_dimension(field_options[:semiadditive])
|
34
|
+
end
|
35
|
+
|
36
|
+
# overrides Field.label, prepending the aggregation strategy name to label
|
37
|
+
def label
|
38
|
+
@label ? @label : "#{super}_#{strategy_name}"
|
39
|
+
end
|
40
|
+
|
41
|
+
def levels_from_parent
|
42
|
+
field_options[:levels_from_parent].nil? ? [] : field_options[:levels_from_parent]
|
43
|
+
end
|
44
|
+
|
45
|
+
def type_cast(value)
|
46
|
+
@column_definition.type_cast(value)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -2,9 +2,11 @@ module ActiveWarehouse #:nodoc
|
|
2
2
|
# Implements a bridge table.
|
3
3
|
class Bridge < ActiveRecord::Base
|
4
4
|
class << self
|
5
|
-
# Get the table name. By default the table name will be the name of the
|
5
|
+
# Get the table name. By default the table name will be the name of the
|
6
|
+
# bridge in singular form.
|
6
7
|
#
|
7
|
-
# Example: DepartmentHierarchyBridge will have a table called
|
8
|
+
# Example: DepartmentHierarchyBridge will have a table called
|
9
|
+
# department_hierarchy_bridge
|
8
10
|
def table_name
|
9
11
|
name = self.name.demodulize.underscore
|
10
12
|
set_table_name(name)
|
@@ -12,4 +14,6 @@ module ActiveWarehouse #:nodoc
|
|
12
14
|
end
|
13
15
|
end
|
14
16
|
end
|
15
|
-
end
|
17
|
+
end
|
18
|
+
|
19
|
+
require 'active_warehouse/bridge/hierarchy_bridge'
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module ActiveWarehouse #:nodoc:
|
2
|
+
# Bridge class that models ragged hierarchies.
|
3
|
+
class HierarchyBridge < Bridge
|
4
|
+
class << self
|
5
|
+
def set_levels_from_parent(name)
|
6
|
+
@levels_from_parent = name
|
7
|
+
end
|
8
|
+
|
9
|
+
def levels_from_parent
|
10
|
+
@levels_from_parent ||= "levels_from_parent"
|
11
|
+
end
|
12
|
+
|
13
|
+
def set_effective_date(name)
|
14
|
+
@effective_date = name
|
15
|
+
end
|
16
|
+
|
17
|
+
def effective_date
|
18
|
+
@effective_date ||= "effective_date"
|
19
|
+
end
|
20
|
+
|
21
|
+
def set_expiration_date(name)
|
22
|
+
@expiration_date = name
|
23
|
+
end
|
24
|
+
|
25
|
+
def expiration_date
|
26
|
+
@expiration_date ||= "expiration_date"
|
27
|
+
end
|
28
|
+
|
29
|
+
def set_top_flag(name)
|
30
|
+
@top_flag = name
|
31
|
+
end
|
32
|
+
|
33
|
+
def top_flag
|
34
|
+
@top_flag ||= "top_flag"
|
35
|
+
end
|
36
|
+
|
37
|
+
def set_top_flag_value(value)
|
38
|
+
@top_flag_value = value
|
39
|
+
end
|
40
|
+
|
41
|
+
def top_flag_value
|
42
|
+
@top_flag_value ||= 'Y'
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -30,8 +30,8 @@ module ActiveWarehouse #:nodoc:
|
|
30
30
|
# accessed by name.
|
31
31
|
def build(options={})
|
32
32
|
records = []
|
33
|
-
date = start_date
|
34
|
-
while date <= end_date
|
33
|
+
date = start_date.to_time
|
34
|
+
while date <= end_date.to_time
|
35
35
|
record = {}
|
36
36
|
record[:date] = date.strftime("%m/%d/%Y")
|
37
37
|
record[:full_date_description] = date.strftime("%B %d,%Y")
|
@@ -52,6 +52,7 @@ module ActiveWarehouse #:nodoc:
|
|
52
52
|
record[:calendar_month_number_in_year] = date.month
|
53
53
|
record[:calendar_year_month] = date.strftime("%Y-%m")
|
54
54
|
record[:calendar_quarter] = "Q#{date.quarter}"
|
55
|
+
record[:calendar_quarter_number_in_year] = date.quarter
|
55
56
|
record[:calendar_year_quarter] = "#{date.strftime('%Y')}-#{record[:calendar_quarter]}"
|
56
57
|
#record[:calendar_half_year] =
|
57
58
|
record[:calendar_year] = "#{date.year}"
|
@@ -62,8 +63,10 @@ module ActiveWarehouse #:nodoc:
|
|
62
63
|
record[:fiscal_year_month] = "FY#{date.fiscal_year}-" + date.fiscal_year_month.to_s.rjust(2, '0')
|
63
64
|
record[:fiscal_quarter] = "FY Q#{date.fiscal_year_quarter}"
|
64
65
|
record[:fiscal_year_quarter] = "FY#{date.fiscal_year}-Q#{date.fiscal_year_quarter}"
|
66
|
+
record[:fiscal_year_quarter_number] = date.fiscal_year_quarter
|
65
67
|
#record[:fiscal_half_year] =
|
66
68
|
record[:fiscal_year] = "FY#{date.fiscal_year}"
|
69
|
+
record[:fiscal_year_number] = date.fiscal_year
|
67
70
|
record[:holiday_indicator] = holiday_indicators.include?(date) ? 'Holiday' : 'Nonholiday'
|
68
71
|
record[:weekday_indicator] = weekday_indicators[date.wday]
|
69
72
|
record[:selling_season] = 'None'
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module ActiveWarehouse #:nodoc:
|
2
|
+
module Builder #:nodoc:
|
3
|
+
module Generator #:nodoc:
|
4
|
+
# Generate a name consisting of one or more words from word groups
|
5
|
+
class NameGenerator < ActiveWarehouse::Builder::Generator::Generator
|
6
|
+
def next(options={})
|
7
|
+
options[:separator] ||= ' '
|
8
|
+
parts = []
|
9
|
+
word_groups = options[:word_groups]
|
10
|
+
0.upto(word_groups.first.length) do |i|
|
11
|
+
word_groups.each do |word_group|
|
12
|
+
parts << word_group[i]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
parts.join(options[:separator])
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -8,6 +8,7 @@ module ActiveWarehouse #:nodoc:
|
|
8
8
|
# Hash of names mapped to generators where the name is the column name
|
9
9
|
attr_reader :column_generators
|
10
10
|
|
11
|
+
# Initialize the random data builder
|
11
12
|
def initialize
|
12
13
|
@generators = {
|
13
14
|
Fixnum => FixnumGenerator.new,
|
@@ -20,6 +21,9 @@ module ActiveWarehouse #:nodoc:
|
|
20
21
|
@column_generators = {}
|
21
22
|
end
|
22
23
|
|
24
|
+
# Build the data for the specified class. Name may be a Class (which must descend from ActiveWarehouse::Dimension
|
25
|
+
# or ActiveWarehouse::Fact), a String or a Symbol. String or Symbol will be converted to a class name and then
|
26
|
+
# passed back to this method.
|
23
27
|
def build(name, options={})
|
24
28
|
case name
|
25
29
|
when Class
|
@@ -44,6 +48,7 @@ module ActiveWarehouse #:nodoc:
|
|
44
48
|
# Build test dimension data for the specified dimension name.
|
45
49
|
#
|
46
50
|
# Options:
|
51
|
+
#
|
47
52
|
# * <tt>:rows</tt>: The number of rows to create (defaults to 100)
|
48
53
|
# * <tt>:generators</tt>: A map of generators where each key is Fixnum, Float, Date, Time, String, or Object and the
|
49
54
|
# value is extends from AbstractGenerator.
|
@@ -57,6 +62,9 @@ module ActiveWarehouse #:nodoc:
|
|
57
62
|
row = {}
|
58
63
|
dimension_class.content_columns.each do |column|
|
59
64
|
generator = (options[:generators][column.klass] || @column_generators[column.name] || @generators[column.klass])
|
65
|
+
if generator.nil?
|
66
|
+
raise ArgumentError, "No generator found, unknown column type?: #{column.klass}"
|
67
|
+
end
|
60
68
|
row[column.name] = generator.generate(column, options)
|
61
69
|
end
|
62
70
|
rows << row
|
@@ -74,6 +82,7 @@ module ActiveWarehouse #:nodoc:
|
|
74
82
|
# * <tt>:fk_limit</tt>: A Hash of foreign key limits, where each key is the name of column and the value is
|
75
83
|
# a number. For example options[:fk_limit][:date_id] = 1000 would limit the foreign key values to something between
|
76
84
|
# 1 and 1000, inclusive.
|
85
|
+
# * <tt>:dimensions</tt>: The number of available dimension FKs
|
77
86
|
def build_fact(name, options={})
|
78
87
|
options[:rows] ||= 100
|
79
88
|
options[:generators] ||= {}
|
@@ -87,9 +96,10 @@ module ActiveWarehouse #:nodoc:
|
|
87
96
|
generator = (options[:generators][column.klass] || @generators[column.klass])
|
88
97
|
row[column.name] = generator.generate(column, options)
|
89
98
|
end
|
90
|
-
fact_class.
|
91
|
-
|
92
|
-
|
99
|
+
fact_class.dimension_relationships.each do |name, reflection|
|
100
|
+
# it would be better to get a count of rows from the dimension tables
|
101
|
+
fk_limit = (options[:fk_limit][reflection.primary_key_name] || options[:dimensions] || 100) - 1
|
102
|
+
row[reflection.primary_key_name] = rand(fk_limit) + 1
|
93
103
|
end
|
94
104
|
rows << row
|
95
105
|
end
|
@@ -112,8 +122,8 @@ module ActiveWarehouse #:nodoc:
|
|
112
122
|
# Generate a random date value
|
113
123
|
#
|
114
124
|
# Options:
|
115
|
-
#
|
116
|
-
#
|
125
|
+
# * <tt>:start_date</tt>: The start date as a Date or Time object (default 1 year ago)
|
126
|
+
# * <tt>:end_date</tt>: The end date as a Date or Time object (default now)
|
117
127
|
def generate(column, options={})
|
118
128
|
end_date = (options[:end_date] || Time.now).to_date
|
119
129
|
start_date = (options[:start_date] || 1.year.ago).to_date
|
@@ -125,8 +135,8 @@ module ActiveWarehouse #:nodoc:
|
|
125
135
|
# Basic Time generator
|
126
136
|
#
|
127
137
|
# Options:
|
128
|
-
#
|
129
|
-
#
|
138
|
+
# * <tt>:start_date</tt>: The start date as a Date or Time object (default 1 year ago)
|
139
|
+
# * <tt>:end_date</tt>: The end date as a Date or Time object (default now)
|
130
140
|
class TimeGenerator < DateGenerator #:nodoc:
|
131
141
|
# Generate a random Time value
|
132
142
|
def generate(column, options={})
|
@@ -139,8 +149,8 @@ module ActiveWarehouse #:nodoc:
|
|
139
149
|
# Generate an integer from 0 to options[:max] inclusive
|
140
150
|
#
|
141
151
|
# Options:
|
142
|
-
#
|
143
|
-
#
|
152
|
+
# * <tt>:max</tt>: The maximum allowed value (default 1000)
|
153
|
+
# * <tt>:min</tt>: The minimum allowed value (default 0)
|
144
154
|
def generate(column, options={})
|
145
155
|
options[:max] ||= 1000
|
146
156
|
options[:min] ||= 0
|
@@ -153,7 +163,7 @@ module ActiveWarehouse #:nodoc:
|
|
153
163
|
# Generate a float from 0 to options[:max] inclusive (default 1000)
|
154
164
|
#
|
155
165
|
# Options:
|
156
|
-
#
|
166
|
+
# * <tt>:max</tt>: The maximum allowed value (default 1000)
|
157
167
|
def generate(column, options={})
|
158
168
|
options[:max] ||= 1000
|
159
169
|
rand * options[:max].to_f
|
@@ -165,7 +175,7 @@ module ActiveWarehouse #:nodoc:
|
|
165
175
|
# Generate a big decimal from 0 to options[:max] inclusive (default 1000)
|
166
176
|
#
|
167
177
|
# Options:
|
168
|
-
#
|
178
|
+
# * <tt>:max</tt>: The maximum allowed value (default 1000)
|
169
179
|
def generate(column, options={})
|
170
180
|
options[:max] ||= 1000
|
171
181
|
BigDecimal.new((rand * options[:max].to_f).to_s) # TODO: need BigDecimal type?
|
@@ -0,0 +1,54 @@
|
|
1
|
+
Dir[File.dirname(__FILE__) + "/generator/*.rb"].each { |file| require(file) }
|
2
|
+
|
3
|
+
module ActiveWarehouse #:nodoc:
|
4
|
+
module Builder #:nodoc:
|
5
|
+
# Unlike the RandomDataBuilder, which puts truly random data in the warehouse, this
|
6
|
+
# generator uses collections of possible values to construct semi-understandable data
|
7
|
+
class TestDataBuilder
|
8
|
+
def initialize
|
9
|
+
|
10
|
+
end
|
11
|
+
|
12
|
+
# Usage:
|
13
|
+
#
|
14
|
+
# fields = [:id,:product_name,:product_description,:suggested_retail_price]
|
15
|
+
# field_definitions = {
|
16
|
+
# :id => :sequence, # symbol or string
|
17
|
+
# :product_name => [['Foo','Bar']['Baz','Bing']], # array
|
18
|
+
# :product_description => IpsumLorumGenerator # class
|
19
|
+
# :suggested_retail_price => RandomNumberGenerator.new(0.00, 100.00) # generator instance
|
20
|
+
# }
|
21
|
+
def build(fields, field_definitions, options={})
|
22
|
+
options[:number] ||= 100
|
23
|
+
rows = []
|
24
|
+
generators = {}
|
25
|
+
# set up all of the generators first
|
26
|
+
field_definitions.each do |name, fd|
|
27
|
+
case fd
|
28
|
+
when Class
|
29
|
+
generators[name] = fd.new
|
30
|
+
when String, Symbol
|
31
|
+
generators[name] = "#{fd}Generator".classify.constantize.new
|
32
|
+
when Array
|
33
|
+
generators[name] = NameGenerator.new(fd)
|
34
|
+
when Generator
|
35
|
+
generators[name] = fd
|
36
|
+
else
|
37
|
+
raise "Invalid generator specified: #{fd}"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# generate all of the rows
|
42
|
+
0.upto(options[:number]) do
|
43
|
+
row = {}
|
44
|
+
fields.each do |field|
|
45
|
+
row[field] = generators[field].next(options)
|
46
|
+
end
|
47
|
+
rows << row
|
48
|
+
end
|
49
|
+
|
50
|
+
rows
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module ActiveWarehouse #:nodoc:
|
2
|
+
# A field that uses a Proc to calculate the value
|
3
|
+
class CalculatedField < Field
|
4
|
+
attr_reader :block
|
5
|
+
# Initialize the calculated field
|
6
|
+
#
|
7
|
+
# +fact_class+ is the fact class that the field is calculated in
|
8
|
+
# +name+ is the name of the calculated field
|
9
|
+
# +type+ is the type of the calculated field (defaults to :integer)
|
10
|
+
# +field_options+ is a Hash of options for the field
|
11
|
+
#
|
12
|
+
# This method accepts a block which should take a single argument that is the record
|
13
|
+
# itself.
|
14
|
+
def initialize(fact_class, name, type = :integer, field_options = {}, &block)
|
15
|
+
unless block_given?
|
16
|
+
raise ArgumentError, "A block is required for the calculated field #{name} in #{fact_class}"
|
17
|
+
end
|
18
|
+
super(fact_class, name.to_s, type, field_options)
|
19
|
+
@block = block
|
20
|
+
end
|
21
|
+
|
22
|
+
# Calculate the field value using the Hash of type-casted values
|
23
|
+
def calculate(values)
|
24
|
+
@block.call(values)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# Provides 1.1.6 compatibility
|
2
|
-
module ActiveRecord
|
3
|
-
module Calculations
|
4
|
-
module ClassMethods
|
2
|
+
module ActiveRecord #:nodoc:
|
3
|
+
module Calculations #:nodoc:
|
4
|
+
module ClassMethods #:nodoc:
|
5
5
|
protected
|
6
6
|
def construct_count_options_from_legacy_args(*args)
|
7
7
|
options = {}
|
@@ -37,7 +37,7 @@ module ActiveRecord
|
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
|
-
class Module
|
40
|
+
class Module #:nodoc:
|
41
41
|
def alias_method_chain(target, feature)
|
42
42
|
# Strip out punctuation on predicates or bang methods since
|
43
43
|
# e.g. target?_without_feature is not a valid method name.
|
@@ -1,8 +1,12 @@
|
|
1
1
|
module ActiveWarehouse
|
2
|
-
# A Cube represents a collection of dimensions operating on a fact. The Cube
|
3
|
-
#
|
2
|
+
# A Cube represents a collection of dimensions operating on a fact. The Cube
|
3
|
+
# provides a front-end for getting at the
|
4
|
+
# underlying data. Cubes support pluggable aggregation. The default aggregation
|
5
|
+
# is the NoAggregate which goes directly
|
6
|
+
# to the fact and dimensions to answer queries.
|
4
7
|
class Cube
|
5
8
|
class << self
|
9
|
+
|
6
10
|
# Callback which is invoked when subclasses are created
|
7
11
|
def inherited(subclass)
|
8
12
|
subclasses << subclass
|
@@ -13,54 +17,82 @@ module ActiveWarehouse
|
|
13
17
|
@subclasses ||= []
|
14
18
|
end
|
15
19
|
|
16
|
-
# Defines the dimensions that this cube pivots on.
|
20
|
+
# Defines the dimensions that this cube pivots on. If the fact name and
|
21
|
+
# cube name are different (for example, if a PurchaseCube does not report
|
22
|
+
# on a PurchaseFact) then you *must* declare the <code>reports_on</code>
|
23
|
+
# first.
|
17
24
|
def pivots_on(*dimension_list)
|
18
|
-
|
25
|
+
@dimensions_hierarchies = OrderedHash.new
|
26
|
+
@dimensions = []
|
19
27
|
dimension_list.each do |dimension|
|
20
|
-
|
28
|
+
case dimension
|
29
|
+
when Symbol, String
|
30
|
+
dimensions << dimension.to_sym
|
31
|
+
dimensions_hierarchies[dimension.to_sym] = fact_class.dimension_class(dimension).hierarchies
|
32
|
+
when Hash
|
33
|
+
dimension_name = dimension.keys.first.to_sym
|
34
|
+
dimensions << dimension_name
|
35
|
+
dimensions_hierarchies[dimension_name] = [dimension[dimension_name]].flatten
|
36
|
+
else
|
37
|
+
raise ArgumentError, "Each argument to pivot_on must be a symbol, string or Hash"
|
38
|
+
end
|
21
39
|
end
|
22
40
|
end
|
23
41
|
alias :pivot_on :pivots_on
|
24
42
|
|
25
|
-
# Defines the fact that this cube
|
26
|
-
|
27
|
-
|
28
|
-
|
43
|
+
# Defines the fact name, without the 'Fact' suffix, that this cube
|
44
|
+
# reports on. For instance, if you have PurchaseFact, you could then
|
45
|
+
# call <code>reports_on :purchase</code>.
|
46
|
+
#
|
47
|
+
# The default value for reports_on is to take the name of the cube,
|
48
|
+
# i.e. PurchaseCube, and remove the Cube suffix. The assumption is that
|
49
|
+
# your Cube name matches your Fact name.
|
50
|
+
def reports_on(fact_name)
|
51
|
+
@fact_name = fact_name
|
29
52
|
end
|
30
53
|
alias :report_on :reports_on
|
31
54
|
|
32
|
-
# Rebuild
|
55
|
+
# Rebuild the data warehouse.
|
33
56
|
def rebuild(options={})
|
34
|
-
|
35
|
-
options[:force] ||= false
|
36
|
-
build_aggregate_classes(options)
|
57
|
+
populate(options)
|
37
58
|
end
|
38
59
|
|
39
|
-
# Populate
|
60
|
+
# Populate the data warehouse. Delegate to aggregate.populate
|
40
61
|
def populate(options={})
|
41
|
-
|
42
|
-
aggregates.each do |agg_id, agg_clazz|
|
43
|
-
if agg_clazz.needs_rebuild? || options[:force]
|
44
|
-
logger.debug "Populating aggregate class #{agg_clazz.name}"
|
45
|
-
agg_clazz.populate
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
# Get the fact that this cube reports on
|
51
|
-
def fact
|
52
|
-
@fact
|
62
|
+
aggregate.populate
|
53
63
|
end
|
54
64
|
|
55
65
|
# Get the dimensions that this cube pivots on
|
56
66
|
def dimensions
|
57
|
-
@dimensions ||=
|
67
|
+
@dimensions ||= fact_class.dimension_relationships.collect{|k,v| k}
|
58
68
|
end
|
59
69
|
|
60
|
-
# Get
|
61
|
-
|
62
|
-
|
63
|
-
@
|
70
|
+
# Get an OrderedHash of each dimension mapped to its hierarchies which
|
71
|
+
# will be included in the cube
|
72
|
+
def dimensions_hierarchies
|
73
|
+
if @dimensions_hierarchies.nil?
|
74
|
+
@dimensions_hierarchies = OrderedHash.new
|
75
|
+
dimensions.each do |dimension|
|
76
|
+
@dimensions_hierarchies[dimension] = fact_class.dimension_class(dimension).hierarchies
|
77
|
+
end
|
78
|
+
end
|
79
|
+
@dimensions_hierarchies
|
80
|
+
end
|
81
|
+
|
82
|
+
# returns true if this cube pivots on a hierarchical dimension.
|
83
|
+
def pivot_on_hierarchical_dimension?
|
84
|
+
dimension_classes.each do |dimension|
|
85
|
+
return true if dimension.hierarchical_dimension?
|
86
|
+
end
|
87
|
+
return false
|
88
|
+
end
|
89
|
+
|
90
|
+
# returns the aggregate fields for this cube
|
91
|
+
# removing the aggregate fields that are defined in fact class that are
|
92
|
+
# related to hierarchical dimension, but the cube doesn't pivot on any
|
93
|
+
# hierarchical dimensions
|
94
|
+
def aggregate_fields
|
95
|
+
fact_class.aggregate_fields.reject {|field| !pivot_on_hierarchical_dimension? and !field.levels_from_parent.empty? }
|
64
96
|
end
|
65
97
|
|
66
98
|
# Get the class name for the specified cube name
|
@@ -73,7 +105,7 @@ module ActiveWarehouse
|
|
73
105
|
|
74
106
|
# Get the aggregated fact class name
|
75
107
|
def fact_class_name
|
76
|
-
Fact.class_name(
|
108
|
+
ActiveWarehouse::Fact.class_name(@fact_name || name.sub(/Cube$/,'').underscore.to_sym)
|
77
109
|
end
|
78
110
|
|
79
111
|
# Get the aggregated fact class instance
|
@@ -83,221 +115,90 @@ module ActiveWarehouse
|
|
83
115
|
|
84
116
|
# Get a list of dimension class instances
|
85
117
|
def dimension_classes
|
86
|
-
dimensions.collect
|
118
|
+
dimensions.collect do |dimension_name|
|
119
|
+
dimension_class(dimension_name)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Get the dimension class for the specified dimension name
|
124
|
+
def dimension_class(dimension_name)
|
125
|
+
fact_class.dimension_relationships[dimension_name.to_sym].class_name.constantize
|
87
126
|
end
|
88
127
|
|
128
|
+
# Get the cube logger
|
89
129
|
def logger
|
90
130
|
@logger ||= Logger.new('cube.log')
|
91
131
|
end
|
92
132
|
|
133
|
+
# Get the time when the fact or any dimension referenced in this cube
|
134
|
+
# was last modified
|
93
135
|
def last_modified
|
94
|
-
lm =
|
136
|
+
lm = fact_class.last_modified
|
95
137
|
dimensions.each do |dimension|
|
96
|
-
dim = Dimension.class_for_name(dimension)
|
138
|
+
dim = ActiveWarehouse::Dimension.class_for_name(dimension)
|
97
139
|
lm = dim.last_modified if dim.last_modified > lm
|
98
140
|
end
|
99
141
|
lm
|
100
142
|
end
|
101
143
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
condition_args = []
|
128
|
-
meta_data_attributes.each do |key, value|
|
129
|
-
conditions << "#{key} = ?"
|
130
|
-
condition_args << value
|
131
|
-
end
|
132
|
-
conditions = [conditions.join(' and ')] + condition_args
|
133
|
-
meta_data = AggregateMetaData.find(:first, :conditions => conditions)
|
134
|
-
unless meta_data
|
135
|
-
meta_data = AggregateMetaData.create(meta_data_attributes)
|
136
|
-
end
|
137
|
-
|
138
|
-
# Construct the aggregate class instance
|
139
|
-
aggregate_class = Class.new(ActiveWarehouse::Aggregate)
|
140
|
-
aggregate_class.name = "Agg#{meta_data.id}"
|
141
|
-
logger.debug "Constructed aggregate #{aggregate_class.name}"
|
142
|
-
aggregate_class.cube = self
|
143
|
-
aggregate_class.dimension1 = column_dimension
|
144
|
-
aggregate_class.dimension1_hierarchy_name = column_hierarchy_name
|
145
|
-
aggregate_class.dimension2 = row_dimension
|
146
|
-
aggregate_class.dimension2_hierarchy_name = row_hierarchy_name
|
147
|
-
|
148
|
-
# Create the underlying aggregate storage table
|
149
|
-
# TODO: fix the bug of data not being found when a storage table rebuild occurs
|
150
|
-
force_storage_table_rebuild = options[:force] || aggregate_class.needs_rebuild?(last_modified)
|
151
|
-
logger.debug "Force storage table rebuild? #{force_storage_table_rebuild}"
|
152
|
-
aggregate_class.create_storage_table(force_storage_table_rebuild)
|
153
|
-
|
154
|
-
# Keep a reference to the aggregate class instance
|
155
|
-
@aggregates[meta_data.id] = aggregate_class
|
156
|
-
end
|
157
|
-
end
|
158
|
-
end
|
159
|
-
end
|
144
|
+
# The temp directory for storing files during warehouse rebuilds
|
145
|
+
attr_accessor :temp_dir
|
146
|
+
def temp_dir
|
147
|
+
@temp_dir ||= '/tmp'
|
148
|
+
end
|
149
|
+
|
150
|
+
# Specify the ActiveRecord class to connect through
|
151
|
+
# Note: this is a potential directive in a Cube subclass
|
152
|
+
attr_accessor :connect_through
|
153
|
+
def connect_through
|
154
|
+
@connect_through ||= ActiveRecord::Base
|
155
|
+
end
|
156
|
+
|
157
|
+
# Get an adapter connection
|
158
|
+
def connection
|
159
|
+
connect_through.connection
|
160
|
+
end
|
161
|
+
|
162
|
+
# Defaults to NoAggregate strategy.
|
163
|
+
def aggregate
|
164
|
+
@aggregate ||= ActiveWarehouse::Aggregate::NoAggregate.new(self)
|
165
|
+
end
|
166
|
+
|
167
|
+
def aggregate_class(agg_class)
|
168
|
+
@aggregate = agg_class.new(self)
|
160
169
|
end
|
161
170
|
|
162
171
|
end
|
163
172
|
|
164
173
|
public
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
#
|
173
|
-
#
|
174
|
-
#
|
175
|
-
#
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
#cs += Benchmark.realtime do
|
187
|
-
data_array = agg_record.data_fields.collect{ |data_field_name| agg_record.send(data_field_name.to_sym) }
|
188
|
-
#end
|
189
|
-
|
190
|
-
# convert to an average where necessary
|
191
|
-
# TODO: implement
|
192
|
-
|
193
|
-
# add calculated fields to the data array
|
194
|
-
#calc += Benchmark.realtime do
|
195
|
-
calculated_fields.each do |calculated_field|
|
196
|
-
options = calculated_field_options[calculated_field]
|
197
|
-
data_array << options[:block].call(agg_record)
|
198
|
-
end
|
199
|
-
#end
|
200
|
-
|
201
|
-
# add the data array to the aggregate map
|
202
|
-
#as += Benchmark.realtime do
|
203
|
-
agg_map.add_data(agg_record.dimension2_path, agg_record.dimension1_path, data_array)
|
204
|
-
#end
|
205
|
-
end
|
206
|
-
|
207
|
-
#end
|
208
|
-
#puts "creating the agg_map took #{s}s"
|
209
|
-
#puts "total time spent collecting the data: #{cs}s, avg:#{cs/agg_records.length}s (#{(cs/s) * 100}%)"
|
210
|
-
#puts "total time spent adding the data: #{as}s, avg:#{as/agg_records.length}s (#{(as/s) * 100}%)"
|
211
|
-
#puts "total time spent calculating fields: #{calc}s, avg:#{calc/agg_records.length}s (#{(calc/s) * 100}%)"
|
212
|
-
agg_map
|
174
|
+
# Query the cube. The column dimension, column hierarchy, row dimension and
|
175
|
+
# row hierarchy are all required.
|
176
|
+
#
|
177
|
+
# The conditions value is a String that represents a SQL condition appended
|
178
|
+
# to the where clause. TODO: this may eventually be converted to another
|
179
|
+
# query language.
|
180
|
+
#
|
181
|
+
# The cstage value represents the current column drill down stage and
|
182
|
+
# defaults to 0.
|
183
|
+
#
|
184
|
+
# The rstage value represents the current row drill down stage and defaults
|
185
|
+
# to 0. Filters contains key/value pairs where the key is a string of
|
186
|
+
# 'dimension.column' and the value is the value to filter by. For example:
|
187
|
+
#
|
188
|
+
# filters = {'date.calendar_year' => 2007, 'product.category' => 'Food'}
|
189
|
+
# query(:date, :cy, :store, :region, 1, 0, filters)
|
190
|
+
#
|
191
|
+
# Note that product.category refers to a dimension which is not actually
|
192
|
+
# visible but which is both part of the cube and is used for filtering.
|
193
|
+
def query(*args)
|
194
|
+
self.class.aggregate.query(*args)
|
213
195
|
end
|
214
196
|
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
k = Aggregate.key(column_dimension, column_hierarchy, row_dimension, row_hierarchy)
|
219
|
-
if aggregates[k].nil?
|
220
|
-
self.class.logger.debug("Aggregate #{k} not found in cache")
|
221
|
-
conditions = ['cube_name = ?', self.class.name]
|
222
|
-
conditions[0] << ' and dimension1 = ? and dimension1_hierarchy = ? and dimension2 = ? and dimension2_hierarchy = ?'
|
223
|
-
conditions << column_dimension.to_s
|
224
|
-
conditions << column_hierarchy.to_s
|
225
|
-
conditions << row_dimension.to_s
|
226
|
-
conditions << row_hierarchy.to_s
|
227
|
-
|
228
|
-
conditions_reversed = ['cube_name = ?', self.class.name]
|
229
|
-
conditions_reversed[0] << ' and dimension1 = ? and dimension1_hierarchy = ? and dimension2 = ? and dimension2_hierarchy = ?'
|
230
|
-
conditions_reversed << row_dimension.to_s
|
231
|
-
conditions_reversed << row_hierarchy.to_s
|
232
|
-
conditions_reversed << column_dimension.to_s
|
233
|
-
conditions_reversed << column_hierarchy.to_s
|
234
|
-
|
235
|
-
aggregate_meta_data = AggregateMetaData.find(:first, :conditions => conditions)
|
236
|
-
aggregate_meta_data ||= AggregateMetaData.find(:first, :conditions => conditions_reversed)
|
237
|
-
if aggregate_meta_data.nil?
|
238
|
-
self.class.rebuild
|
239
|
-
aggregate_meta_data = AggregateMetaData.find(:first, :conditions => conditions)
|
240
|
-
raise "Cannot find aggregate meta data for key #{k}" if aggregate_meta_data.nil?
|
241
|
-
end
|
242
|
-
aggregate_class = self.class.aggregates[aggregate_meta_data.id]
|
243
|
-
if aggregate_class.nil?
|
244
|
-
self.class.rebuild
|
245
|
-
aggregate_class = self.class.aggregates[aggregate_meta_data.id]
|
246
|
-
raise "Cannot find aggregate for id #{aggregate_meta_data.id}" if aggregate_class.nil?
|
247
|
-
end
|
248
|
-
|
249
|
-
aggregates[k] = aggregate_class.find(:all,
|
250
|
-
:conditions => ['(dimension1_stage = ? and dimension2_stage = ?) or (dimension1_stage = ? and dimension2_stage = ?)',
|
251
|
-
cstage, rstage, rstage, cstage])
|
252
|
-
end
|
253
|
-
aggregates[k]
|
197
|
+
# Get the database connection (delegates to Cube.connection class method)
|
198
|
+
def connection
|
199
|
+
self.class.connection
|
254
200
|
end
|
255
201
|
|
256
|
-
# Get a hash of all aggregate data
|
257
|
-
def aggregates
|
258
|
-
@aggregates ||= {}
|
259
|
-
end
|
260
202
|
end
|
261
203
|
|
262
|
-
# In-memory map of aggregate values
|
263
|
-
class AggregateMap
|
264
|
-
attr_reader :length
|
265
|
-
|
266
|
-
# Initialize the aggregate map
|
267
|
-
def initialize
|
268
|
-
@m = {}
|
269
|
-
end
|
270
|
-
|
271
|
-
# Return true if the aggregate map includes the specified row path
|
272
|
-
def has_row_path?(row_path)
|
273
|
-
@m.has_key?(row_path)
|
274
|
-
end
|
275
|
-
|
276
|
-
# Get the value for the specified row path, column path and field index
|
277
|
-
def value(row_path, col_path, field_index)
|
278
|
-
#puts "Getting value for #{row_path}, #{col_path} [field=#{field_index}]"
|
279
|
-
row = @m[row_path]
|
280
|
-
return 0 if row.nil?
|
281
|
-
col = row[col_path]
|
282
|
-
return 0 if col.nil?
|
283
|
-
return col[field_index] || 0
|
284
|
-
end
|
285
|
-
|
286
|
-
# Get an array of the values for the specified row path and column path
|
287
|
-
def values(row_path, col_path)
|
288
|
-
row = @m[row_path]
|
289
|
-
return Array.new(length, 0) if row.nil?
|
290
|
-
col = row[col_path]
|
291
|
-
return Array.new(length, 0) if col.nil?
|
292
|
-
col
|
293
|
-
end
|
294
|
-
|
295
|
-
# Add an array of data for the given row and column path
|
296
|
-
def add_data(row_path, col_path, data_array)
|
297
|
-
@length ||= data_array.length
|
298
|
-
#puts "Adding data for #{row_path}, #{col_path} [data=[#{data_array.join(',')}]]"
|
299
|
-
@m[row_path] ||= {}
|
300
|
-
@m[row_path][col_path] = data_array
|
301
|
-
end
|
302
|
-
end
|
303
204
|
end
|