activewarehouse 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +27 -14
- data/Rakefile +16 -5
- data/doc/references.txt +4 -0
- data/generators/bridge/templates/migration.rb +9 -2
- data/generators/bridge/templates/unit_test.rb +8 -0
- data/generators/date_dimension/USAGE +1 -0
- data/generators/date_dimension/date_dimension_generator.rb +16 -0
- data/generators/date_dimension/templates/fixture.yml +5 -0
- data/generators/date_dimension/templates/migration.rb +31 -0
- data/generators/date_dimension/templates/model.rb +3 -0
- data/generators/date_dimension/templates/unit_test.rb +8 -0
- data/generators/dimension/templates/migration.rb +1 -10
- data/generators/dimension_view/dimension_view_generator.rb +2 -2
- data/generators/dimension_view/templates/migration.rb +8 -2
- data/generators/fact/templates/migration.rb +2 -0
- data/generators/time_dimension/USAGE +1 -0
- data/generators/time_dimension/templates/fixture.yml +5 -0
- data/generators/time_dimension/templates/migration.rb +12 -0
- data/generators/time_dimension/templates/model.rb +3 -0
- data/generators/time_dimension/templates/unit_test.rb +8 -0
- data/generators/time_dimension/time_dimension_generator.rb +14 -0
- data/lib/active_warehouse.rb +13 -2
- data/lib/active_warehouse/aggregate.rb +54 -253
- data/lib/active_warehouse/aggregate/dwarf/node.rb +36 -0
- data/lib/active_warehouse/aggregate/dwarf_aggregate.rb +369 -0
- data/lib/active_warehouse/aggregate/dwarf_common.rb +44 -0
- data/lib/active_warehouse/aggregate/dwarf_printer.rb +34 -0
- data/lib/active_warehouse/aggregate/no_aggregate.rb +194 -0
- data/lib/active_warehouse/aggregate/pid_aggregate.rb +29 -0
- data/lib/active_warehouse/aggregate/pipelined_rolap_aggregate.rb +129 -0
- data/lib/active_warehouse/aggregate/rolap_aggregate.rb +181 -0
- data/lib/active_warehouse/aggregate/rolap_common.rb +89 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_1.sql +12 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_10.sql +7166 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_11.sql +14334 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_12.sql +28670 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_13.sql +57342 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_2.sql +26 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_3.sql +54 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_4.sql +110 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_5.sql +222 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_6.sql +446 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_7.sql +894 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_8.sql +1790 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_9.sql +3582 -0
- data/lib/active_warehouse/aggregate_field.rb +49 -0
- data/lib/active_warehouse/{dimension/bridge.rb → bridge.rb} +7 -3
- data/lib/active_warehouse/bridge/hierarchy_bridge.rb +46 -0
- data/lib/active_warehouse/builder.rb +2 -1
- data/lib/active_warehouse/builder/date_dimension_builder.rb +5 -2
- data/lib/active_warehouse/builder/generator/generator.rb +13 -0
- data/lib/active_warehouse/builder/generator/name_generator.rb +20 -0
- data/lib/active_warehouse/builder/generator/paragraph_generator.rb +11 -0
- data/lib/active_warehouse/builder/random_data_builder.rb +21 -11
- data/lib/active_warehouse/builder/test_data_builder.rb +54 -0
- data/lib/active_warehouse/calculated_field.rb +27 -0
- data/lib/active_warehouse/compat/compat.rb +4 -4
- data/lib/active_warehouse/cube.rb +126 -225
- data/lib/active_warehouse/cube_query_result.rb +69 -0
- data/lib/active_warehouse/dimension.rb +64 -29
- data/lib/active_warehouse/dimension/date_dimension.rb +15 -0
- data/lib/active_warehouse/dimension/dimension_reflection.rb +21 -0
- data/lib/active_warehouse/dimension/dimension_view.rb +17 -2
- data/lib/active_warehouse/dimension/hierarchical_dimension.rb +43 -5
- data/lib/active_warehouse/dimension/slowly_changing_dimension.rb +22 -12
- data/lib/active_warehouse/fact.rb +119 -40
- data/lib/active_warehouse/field.rb +74 -0
- data/lib/active_warehouse/ordered_hash.rb +34 -0
- data/lib/active_warehouse/prejoin_fact.rb +97 -0
- data/lib/active_warehouse/report/abstract_report.rb +40 -14
- data/lib/active_warehouse/report/chart_report.rb +3 -3
- data/lib/active_warehouse/report/table_report.rb +8 -3
- data/lib/active_warehouse/version.rb +1 -1
- data/lib/active_warehouse/view/report_helper.rb +144 -34
- data/tasks/active_warehouse_tasks.rake +28 -10
- metadata +107 -30
@@ -0,0 +1,34 @@
|
|
1
|
+
module ActiveWarehouse #:nodoc:
|
2
|
+
module Aggregate #:nodoc:
|
3
|
+
# Dwarf support class that prints a representation of the Dwarf
|
4
|
+
class DwarfPrinter
|
5
|
+
# Print the specified node at the given depth.
|
6
|
+
def self.print_node(node, depth=0, recurse=true)
|
7
|
+
#puts "printing node #{node.index}"
|
8
|
+
cells = node.cells.collect { |c| cell_to_string(c)}.join('|')
|
9
|
+
|
10
|
+
parent_node = node.parent ? "#{cell_to_string(node.parent)}:" : ''
|
11
|
+
puts "#{node.index}=#{' '*depth}#{parent_node}[#{cells}|#{all_cell_to_string(node.all_cell)}]"
|
12
|
+
if !node.leaf?
|
13
|
+
print_node(node.all_cell.child, depth + 1, false) if node.all_cell
|
14
|
+
end
|
15
|
+
if recurse
|
16
|
+
node.children.each { |child| print_node(child, depth+1) }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.cell_to_string(cell)
|
21
|
+
# a new String object must be created here, otherwise to_s returns a reference
|
22
|
+
# to the same String object each time and thus the value will be appended each time
|
23
|
+
# which is not what I want
|
24
|
+
s = String.new(cell.key.to_s)
|
25
|
+
s << " #{cell.value.join(',')}" if cell.node.leaf?
|
26
|
+
s
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.all_cell_to_string(cell)
|
30
|
+
cell ? (cell.value ? cell.value.inspect : '') : ''
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,194 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
module ActiveWarehouse #:nodoc:
|
4
|
+
module Aggregate #:nodoc:
|
5
|
+
# An aggregate which goes directly to the fact and dimensions to answer questions
|
6
|
+
class NoAggregate < Aggregate
|
7
|
+
# Populate the aggregate (in this case it is a no-op implementation)
|
8
|
+
def populate
|
9
|
+
# do nothing
|
10
|
+
end
|
11
|
+
|
12
|
+
# Query the aggregate
|
13
|
+
# def query(column_dimension_name, column_hierarchy_name,
|
14
|
+
# row_dimension_name, row_hierarchy_name, conditions=nil,
|
15
|
+
# cstage=0, rstage=0, filters={})
|
16
|
+
|
17
|
+
# Query the aggregate
|
18
|
+
def query(*args)
|
19
|
+
options = parse_query_args(*args)
|
20
|
+
|
21
|
+
column_dimension_name = options[:column_dimension_name]
|
22
|
+
column_hierarchy_name = options[:column_hierarchy_name]
|
23
|
+
row_dimension_name = options[:row_dimension_name]
|
24
|
+
row_hierarchy_name = options[:row_hierarchy_name]
|
25
|
+
conditions = options[:conditions]
|
26
|
+
cstage = options[:cstage] || 0
|
27
|
+
rstage = options[:rstage] || 0
|
28
|
+
filters = options[:filters] || {}
|
29
|
+
|
30
|
+
fact_class = cube_class.fact_class
|
31
|
+
column_dimension = fact_class.dimension_class(column_dimension_name)
|
32
|
+
column_hierarchy = column_dimension.hierarchy(column_hierarchy_name)
|
33
|
+
row_dimension = fact_class.dimension_class(row_dimension_name)
|
34
|
+
row_hierarchy = row_dimension.hierarchy(row_hierarchy_name)
|
35
|
+
|
36
|
+
used_dimensions = Set.new
|
37
|
+
used_dimensions.merge([column_dimension_name, row_dimension_name])
|
38
|
+
row_dim_reflection = fact_class.dimension_relationships[row_dimension_name].dependent_dimension_reflections
|
39
|
+
used_dimensions.merge(row_dim_reflection.collect{|d| d.name})
|
40
|
+
col_dim_reflection = fact_class.dimension_relationships[column_dimension_name].dependent_dimension_reflections
|
41
|
+
used_dimensions.merge(col_dim_reflection.collect{|d| d.name})
|
42
|
+
filters.each do |k,v|
|
43
|
+
used_dimensions << k.split('.')[0]
|
44
|
+
end
|
45
|
+
if conditions
|
46
|
+
cube_class.dimensions.each do |dimension|
|
47
|
+
if conditions =~ /#{dimension}\./i
|
48
|
+
used_dimensions << dimension
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# This method assumes at most one dimension is hierarchical dimension
|
54
|
+
# in the query params. TODO: need to handle when both row and column
|
55
|
+
# are hierarchical dimensions.
|
56
|
+
hierarchical_dimension = nil
|
57
|
+
hierarchical_dimension_name = nil
|
58
|
+
hierarchical_stage = nil
|
59
|
+
|
60
|
+
if !column_dimension.hierarchical_dimension?
|
61
|
+
current_column_name = column_hierarchy[cstage]
|
62
|
+
else
|
63
|
+
hierarchical_dimension = column_dimension
|
64
|
+
hierarchical_dimension_name = column_dimension_name
|
65
|
+
hierarchical_stage = cstage
|
66
|
+
current_column_name = column_hierarchy[0]
|
67
|
+
end
|
68
|
+
|
69
|
+
if !row_dimension.hierarchical_dimension?
|
70
|
+
current_row_name = row_hierarchy[rstage]
|
71
|
+
else
|
72
|
+
hierarchical_dimension = row_dimension
|
73
|
+
hierarchical_dimension_name = row_dimension_name
|
74
|
+
hierarchical_stage = rstage
|
75
|
+
current_row_name = row_hierarchy[0]
|
76
|
+
end
|
77
|
+
|
78
|
+
fact_columns = cube_class.aggregate_fields.collect { |c|
|
79
|
+
agg_sql = ''
|
80
|
+
quoted_label = cube_class.connection.quote_column_name(c.label)
|
81
|
+
if hierarchical_dimension and !c.levels_from_parent.empty?
|
82
|
+
bridge = hierarchical_dimension.bridge_class
|
83
|
+
bridge_table_name = bridge.table_name
|
84
|
+
levels_from_parent = bridge.levels_from_parent
|
85
|
+
get_all = false
|
86
|
+
c.levels_from_parent.each do |level|
|
87
|
+
case level
|
88
|
+
when :all
|
89
|
+
agg_sql += " #{c.strategy_name}(#{c.from_table_name}.#{c.name}) AS #{quoted_label})"
|
90
|
+
get_all = true
|
91
|
+
when :self
|
92
|
+
agg_sql += " #{c.strategy_name}(CASE " if agg_sql.length == 0
|
93
|
+
agg_sql += " WHEN #{bridge_table_name}.#{levels_from_parent} = 0 THEN #{c.from_table_name}.#{c.name} \n"
|
94
|
+
when Integer
|
95
|
+
agg_sql += " #{c.strategy_name}(CASE " if agg_sql.length == 0
|
96
|
+
agg_sql += " WHEN #{bridge_table_name}.#{levels_from_parent} = #{level} then #{c.from_table_name}.#{c.name} \n"
|
97
|
+
else
|
98
|
+
raise ArgumentError, "Each element to :levels_from_parent option must be :all, :self, or Integer"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
agg_sql += " ELSE 0 END) AS #{quoted_label}" unless get_all
|
102
|
+
else
|
103
|
+
agg_sql = " #{c.strategy_name}(#{c.from_table_name}.#{c.name}) AS #{quoted_label}"
|
104
|
+
end
|
105
|
+
agg_sql
|
106
|
+
}.join(",\n")
|
107
|
+
|
108
|
+
sql = ''
|
109
|
+
sql += "SELECT\n"
|
110
|
+
sql += " #{column_dimension_name}.#{current_column_name},\n"
|
111
|
+
sql += " #{row_dimension_name}.#{current_row_name},\n"
|
112
|
+
sql += fact_columns
|
113
|
+
sql += "\nFROM\n"
|
114
|
+
|
115
|
+
sql += " #{fact_class.table_name}"
|
116
|
+
cube_class.dimensions_hierarchies.each do |dimension_name, hierarchy_names|
|
117
|
+
next if !used_dimensions.include?(dimension_name)
|
118
|
+
dimension = fact_class.dimension_class(dimension_name)
|
119
|
+
if !dimension.hierarchical_dimension?
|
120
|
+
sql += "\nJOIN #{dimension.table_name} as #{dimension_name}"
|
121
|
+
sql += "\n ON #{fact_class.table_name}.#{fact_class.foreign_key_for(dimension_name)} = "
|
122
|
+
sql += "#{dimension_name}.#{dimension.primary_key}"
|
123
|
+
else
|
124
|
+
dimension_bridge = dimension.bridge_class
|
125
|
+
sql += "\nJOIN #{dimension_bridge.table_name}"
|
126
|
+
sql += "\n ON #{fact_class.table_name}.#{fact_class.foreign_key_for(dimension_name)} = "
|
127
|
+
sql += "#{dimension_bridge.table_name}.#{dimension.parent_foreign_key}"
|
128
|
+
if dimension.slowly_changing_dimension?
|
129
|
+
sql += " and (#{dimension_bridge.table_name}.#{dimension_bridge.effective_date} <= "
|
130
|
+
sql += "#{fact_class.slowly_changes_over_name(dimension_name)}."
|
131
|
+
sql += "#{fact_class.slowly_changes_over_class(dimension_name).sql_date_stamp} "
|
132
|
+
sql += "and #{dimension_bridge.table_name}.#{dimension_bridge.expiration_date} >= "
|
133
|
+
sql += "#{fact_class.slowly_changes_over_name(dimension_name)}."
|
134
|
+
sql += "#{fact_class.slowly_changes_over_class(dimension_name).sql_date_stamp}) "
|
135
|
+
end
|
136
|
+
sql += "\nJOIN #{dimension.table_name} as #{dimension_name}"
|
137
|
+
sql += "\n ON #{dimension_bridge.table_name}.#{dimension.child_foreign_key} = "
|
138
|
+
sql += "#{dimension_name}.#{dimension.primary_key}"
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# build the where clause
|
143
|
+
# first add conditions
|
144
|
+
where_clause = Array(conditions)
|
145
|
+
|
146
|
+
# apply filters
|
147
|
+
filters.each do |key, value|
|
148
|
+
dimension_name, column = key.split('.')
|
149
|
+
where_clause << "#{dimension_name}.#{column} = #{cube_class.connection.quote(value)}"
|
150
|
+
end
|
151
|
+
sql += %Q(\nWHERE\n #{where_clause.join(" AND\n ")} ) if where_clause.length > 0
|
152
|
+
|
153
|
+
# for hierarchical dimension we need to add where clause in for drill downs
|
154
|
+
if !hierarchical_dimension.nil?
|
155
|
+
if where_clause.length == 0
|
156
|
+
sql += "\n WHERE "
|
157
|
+
else
|
158
|
+
sql += " \n AND "
|
159
|
+
end
|
160
|
+
sql += "\n #{hierarchical_dimension_name}.#{hierarchical_dimension.primary_key} IN ( "
|
161
|
+
sql += "\n SELECT #{hierarchical_dimension.parent_foreign_key} FROM #{hierarchical_dimension.bridge_class.table_name} "
|
162
|
+
if hierarchical_stage == 0
|
163
|
+
sql += "\n WHERE #{hierarchical_dimension.bridge_class.top_flag} = #{connection.send(:quote, hierarchical_dimension.bridge_class.top_flag_value)})"
|
164
|
+
else
|
165
|
+
sql += "\n WHERE #{hierarchical_dimension.child_foreign_key} = #{hierarchical_stage} AND #{hierarchical_dimension.levels_from_parent} = 1)"
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
sql += "\nGROUP BY\n"
|
170
|
+
sql += " #{column_dimension_name}.#{current_column_name},\n"
|
171
|
+
sql += " #{row_dimension_name}.#{current_row_name}"
|
172
|
+
|
173
|
+
if options[:order]
|
174
|
+
order_by = options[:order]
|
175
|
+
order_by = [order_by] if order_by.is_a?(String)
|
176
|
+
order_by.collect!{ |v| cube_class.connection.quote_column_name(order_by) }
|
177
|
+
sql += %Q(\nORDER BY\n #{order_by.join(",\n")})
|
178
|
+
end
|
179
|
+
|
180
|
+
result = ActiveWarehouse::CubeQueryResult.new(
|
181
|
+
cube_class.aggregate_fields
|
182
|
+
)
|
183
|
+
|
184
|
+
cube_class.connection.select_all(sql).each do |row|
|
185
|
+
result.add_data(row.delete(current_row_name.to_s),
|
186
|
+
row.delete(current_column_name.to_s),
|
187
|
+
row) # the rest of the members of row are the fact columns
|
188
|
+
end
|
189
|
+
|
190
|
+
result
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module ActiveWarehouse #:nodoc:
|
2
|
+
module Aggregate #:nodoc:
|
3
|
+
# Implementation of a Partitioning and Inserting Dwarf algorithm as defined
|
4
|
+
# in http://www.zju.edu.cn/jzus/2005/A0506/A050608.pdf
|
5
|
+
class PidAggregate < Aggregate
|
6
|
+
include DwarfCommon
|
7
|
+
|
8
|
+
# Initialize the aggregate
|
9
|
+
def initialize(cube_class)
|
10
|
+
super
|
11
|
+
end
|
12
|
+
|
13
|
+
# Populate the aggregate
|
14
|
+
def populate
|
15
|
+
create_dwarf_cube(sorted_facts)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Query the aggregate
|
19
|
+
def query(*args)
|
20
|
+
options = parse_query_args(*args)
|
21
|
+
end
|
22
|
+
|
23
|
+
def create_dwarf_cube(sorted_facts)
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
require 'erb'
|
2
|
+
|
3
|
+
module ActiveWarehouse #:nodoc
|
4
|
+
module Aggregate #:nodoc
|
5
|
+
# A Pipelined implementation of a ROLAP engine that stores all possible
|
6
|
+
# combinations
|
7
|
+
# of fact and dimensional values for a specific cube.
|
8
|
+
#
|
9
|
+
# This implementation attempts to reduce the amount of work required
|
10
|
+
# by aggregating facts in a pipelined fashion. This means that smaller
|
11
|
+
# aggregates are generated from a preceding aggregate, in order to avoid
|
12
|
+
# having to query the entire raw data set for every aggregate.
|
13
|
+
#
|
14
|
+
# E.g.
|
15
|
+
#
|
16
|
+
# ABCD -> ABC -> AB -> A -> *all*
|
17
|
+
class PipelinedRolapAggregate < Aggregate
|
18
|
+
include RolapCommon
|
19
|
+
|
20
|
+
# Build and populate the data store
|
21
|
+
def populate(options={})
|
22
|
+
create_and_populate_all_table
|
23
|
+
create_all_pipelined_agg_tables
|
24
|
+
create_insert_statements.each_with_index do |insert, i|
|
25
|
+
next if i == 0 #handled by create_and_populate_all_table
|
26
|
+
connection.transaction {connection.execute(insert)}
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
protected
|
31
|
+
|
32
|
+
# build and populate a table which group by's all dimension columns.
|
33
|
+
def create_and_populate_all_table
|
34
|
+
dimension_column_names = dimensions_to_columns.collect do |c|
|
35
|
+
"#{c.table_alias}.#{c.name} as #{c.table_alias}_#{c.name}"
|
36
|
+
end
|
37
|
+
|
38
|
+
fact_column_names = aggregate_fields.collect do |c|
|
39
|
+
"#{c.from_table_name}.#{c.name} as #{c.label_for_table}"
|
40
|
+
end
|
41
|
+
|
42
|
+
sql = <<-SQL
|
43
|
+
SELECT
|
44
|
+
#{dimension_column_names.join(",")},
|
45
|
+
#{aggregated_fact_column_sql_for_all}
|
46
|
+
FROM #{tables_and_joins}
|
47
|
+
GROUP BY
|
48
|
+
#{dimensions_to_columns.collect{|c| "#{c.table_alias}.#{c.name}"}.join(",")}
|
49
|
+
SQL
|
50
|
+
|
51
|
+
all_table_name = indexed_rollup_table_name(dimension_column_names.length)
|
52
|
+
|
53
|
+
sql = connection.add_select_into_table(all_table_name, sql)
|
54
|
+
|
55
|
+
connection.drop_table(all_table_name) if connection.tables.include?(all_table_name)
|
56
|
+
connection.transaction { connection.execute(sql) }
|
57
|
+
end
|
58
|
+
|
59
|
+
def create_all_pipelined_agg_tables
|
60
|
+
(0..dimensions_to_columns.size-1).each do |i|
|
61
|
+
create_rollup_cube_table(i)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Creates the rollup table
|
66
|
+
def create_rollup_cube_table(index)
|
67
|
+
table_name = indexed_rollup_table_name(index)
|
68
|
+
connection.drop_table(table_name) if connection.tables.include?(table_name)
|
69
|
+
|
70
|
+
ActiveRecord::Base.transaction do
|
71
|
+
connection.create_table(table_name, :id => false) do |t|
|
72
|
+
dimensions_to_columns.each do |c|
|
73
|
+
t.column(c.label, c.column_type)
|
74
|
+
end
|
75
|
+
aggregate_fields.each do |c|
|
76
|
+
options = {}
|
77
|
+
options[:limit] = c.column_type == :integer ? 8 : c.limit
|
78
|
+
options[:scale] = c.scale if c.scale
|
79
|
+
options[:precision] = c.precision if c.precision
|
80
|
+
t.column(c.label_for_table, c.column_type, options)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def create_insert_statements
|
87
|
+
dim_columns = dimensions_to_columns
|
88
|
+
template_filename = File.dirname(__FILE__) + "/templates/pipelined_rollup_#{dim_columns.length}.sql"
|
89
|
+
dim_columns.length.times do |i|
|
90
|
+
eval("@dimension_#{i} = '#{dim_columns[i].label}'")
|
91
|
+
end
|
92
|
+
@aggregate_fields_from_flat_table = aggregated_fact_column_sql_for_rollup
|
93
|
+
@aggregate_fields = aggregated_fact_column_sql_for_rollup
|
94
|
+
@flat_table_name = flat_table_name
|
95
|
+
@rollup_table_name = rollup_table_name
|
96
|
+
|
97
|
+
inserts = []
|
98
|
+
|
99
|
+
sql = ""
|
100
|
+
ERB.new(File.read(template_filename)).result(binding).each do |line|
|
101
|
+
if line.strip == ""
|
102
|
+
inserts << sql
|
103
|
+
sql = ""
|
104
|
+
else
|
105
|
+
sql += line
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
inserts
|
110
|
+
end
|
111
|
+
|
112
|
+
def indexed_rollup_table_name(index)
|
113
|
+
"#{rollup_table_name}_#{index}"
|
114
|
+
end
|
115
|
+
|
116
|
+
def aggregated_fact_column_sql_for_all
|
117
|
+
aggregate_fields.collect { |c|
|
118
|
+
"#{c.strategy_name}(#{c.name}) AS #{c.label_for_table}"
|
119
|
+
}.join(",")
|
120
|
+
end
|
121
|
+
|
122
|
+
def aggregated_fact_column_sql_for_rollup
|
123
|
+
aggregate_fields.collect { |c|
|
124
|
+
"#{c.strategy_name == :avg ? :avg : :sum}(#{c.label_for_table}) AS #{c.label_for_table}"
|
125
|
+
}.join(", ")
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,181 @@
|
|
1
|
+
# Source file that contains a basic ROLAP engine implementation.
|
2
|
+
|
3
|
+
module ActiveWarehouse #:nodoc
|
4
|
+
module Aggregate #:nodoc
|
5
|
+
# Basic implementation of a ROLAP engine that stores all possible combinations
|
6
|
+
# of fact and dimensional values for a specific cube.
|
7
|
+
class RolapAggregate < Aggregate
|
8
|
+
include RolapCommon
|
9
|
+
|
10
|
+
# Build and populate the data store
|
11
|
+
def populate(options={})
|
12
|
+
populate_rollup_cube
|
13
|
+
end
|
14
|
+
|
15
|
+
# Query the aggregate, returning a QueryResult object
|
16
|
+
def query(*args)
|
17
|
+
options = parse_query_args(*args)
|
18
|
+
|
19
|
+
column_dimension_name = options[:column_dimension_name]
|
20
|
+
column_hierarchy_name = options[:column_hierarchy_name]
|
21
|
+
row_dimension_name = options[:row_dimension_name]
|
22
|
+
row_hierarchy_name = options[:row_hierarchy_name]
|
23
|
+
conditions = options[:conditions]
|
24
|
+
cstage = options[:cstage]
|
25
|
+
rstage = options[:rstage]
|
26
|
+
filters = options[:filters]
|
27
|
+
|
28
|
+
column_dimension = fact_class.dimension_class(column_dimension_name)
|
29
|
+
column_hierarchy = column_dimension.hierarchy(column_hierarchy_name)
|
30
|
+
row_dimension = fact_class.dimension_class(row_dimension_name)
|
31
|
+
row_hierarchy = row_dimension.hierarchy(row_hierarchy_name)
|
32
|
+
current_column_name = column_hierarchy[cstage]
|
33
|
+
current_row_name = row_hierarchy[rstage]
|
34
|
+
full_column_name = "#{column_dimension_name}_#{current_column_name}"
|
35
|
+
full_row_name = "#{row_dimension_name}_#{current_row_name}"
|
36
|
+
|
37
|
+
# build the SQL query
|
38
|
+
sql = ''
|
39
|
+
sql += 'SELECT '
|
40
|
+
sql += "#{full_column_name} AS #{current_column_name},"
|
41
|
+
sql += "#{full_row_name} AS #{current_row_name},"
|
42
|
+
sql += aggregate_fields.collect{|c| "#{c.label_for_table} as '#{c.label}'"}.join(",")
|
43
|
+
sql += " FROM #{rollup_table_name} "
|
44
|
+
|
45
|
+
# build the where clause
|
46
|
+
where_clause = []
|
47
|
+
0.upto(column_hierarchy.length - 1) do |stage|
|
48
|
+
column_name = column_hierarchy[stage]
|
49
|
+
name = "#{column_dimension_name}_#{column_name}"
|
50
|
+
filter_value = filters.delete(column_name)
|
51
|
+
if filter_value
|
52
|
+
where_clause << "#{name} = '#{filter_value}'" # TODO: protect from
|
53
|
+
# SQL injection
|
54
|
+
else
|
55
|
+
where_clause << "#{name} is null" if stage > cstage
|
56
|
+
end
|
57
|
+
end
|
58
|
+
0.upto(row_hierarchy.length - 1) do |stage|
|
59
|
+
row_name = row_hierarchy[stage]
|
60
|
+
name = "#{row_dimension_name}_#{row_name}"
|
61
|
+
filter_value = filters.delete(row_name)
|
62
|
+
if filter_value
|
63
|
+
where_clause << "#{name} = '#{filter_value}'" # TODO: protect from
|
64
|
+
# SQL injection
|
65
|
+
else
|
66
|
+
where_clause << "#{name} is null" if stage > rstage
|
67
|
+
end
|
68
|
+
end
|
69
|
+
where_clause << "#{full_column_name} is not null"
|
70
|
+
where_clause << "#{full_row_name} is not null"
|
71
|
+
filters.each do |key, value|
|
72
|
+
dimension_name, column = key.split('.')
|
73
|
+
where_clause << "#{dimension_name}_#{column} = '#{value}'" # TODO: protect from SQL injection
|
74
|
+
end
|
75
|
+
sql += %Q( WHERE #{where_clause.join(" AND ")} ) if where_clause.length > 0
|
76
|
+
|
77
|
+
if conditions
|
78
|
+
sql += "\n WHERE\n" unless sql =~ /WHERE/i
|
79
|
+
sql += conditions
|
80
|
+
end
|
81
|
+
|
82
|
+
# execute the query and return the results as a CubeQueryResult object
|
83
|
+
result = ActiveWarehouse::CubeQueryResult.new(
|
84
|
+
aggregate_fields
|
85
|
+
)
|
86
|
+
rows = connection.select_all(sql)
|
87
|
+
# fact_column_names = fact_class.aggregate_fields.collect{|f| f.to_s}
|
88
|
+
rows.each do |row|
|
89
|
+
result.add_data(row.delete(current_row_name.to_s),
|
90
|
+
row.delete(current_column_name.to_s),
|
91
|
+
row) # the rest of the members of row are the fact columns
|
92
|
+
end
|
93
|
+
result
|
94
|
+
end
|
95
|
+
|
96
|
+
protected
|
97
|
+
|
98
|
+
# Creates the rollup table
|
99
|
+
def create_rollup_cube_table(options={})
|
100
|
+
# TODO: perhaps this should all be executed in a single transaction?
|
101
|
+
connection.drop_table(rollup_table_name) if connection.tables.include?(rollup_table_name)
|
102
|
+
|
103
|
+
ActiveRecord::Base.transaction do
|
104
|
+
connection.create_table(rollup_table_name, :id => false) do |t|
|
105
|
+
dimensions_to_columns.each do |c|
|
106
|
+
t.column(c.label, c.column_type)
|
107
|
+
end
|
108
|
+
aggregate_fields.each do |c|
|
109
|
+
t.column(c.label_for_table, c.column_type)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# Builds the aggregate SQL that will be used to populate the ROLAP table.
|
116
|
+
# This SQL is just the SELECT statement and includes all of the GROUP BYs
|
117
|
+
# and aggregation functions.
|
118
|
+
#
|
119
|
+
# +column_mask+ is an array of booleans, where true is the column to group
|
120
|
+
# by. The length of this array is equal to the number of columns in
|
121
|
+
# the SELECT clause.
|
122
|
+
def build_aggregate_sql(column_mask)
|
123
|
+
dimension_column_names = dimensions_to_columns.collect do |c|
|
124
|
+
"#{c.table_alias}.#{c.name}"
|
125
|
+
end
|
126
|
+
|
127
|
+
sql = <<-SQL
|
128
|
+
SELECT
|
129
|
+
#{mask_columns_with_null(dimension_column_names, column_mask).join(",")},
|
130
|
+
#{aggregated_fact_column_sql}
|
131
|
+
FROM #{tables_and_joins}
|
132
|
+
SQL
|
133
|
+
|
134
|
+
group = mask_columns_with_null(dimension_column_names, column_mask).reject{|o| o == 'null'}.join(",")
|
135
|
+
sql += "GROUP BY #{group}" if !group.empty?
|
136
|
+
|
137
|
+
sql
|
138
|
+
end
|
139
|
+
|
140
|
+
# Populate the rollup cube
|
141
|
+
#
|
142
|
+
# Options:
|
143
|
+
# * <tt>:verbose</tt>: Set to true to print info to STDOUT during building
|
144
|
+
def populate_rollup_cube(options={})
|
145
|
+
create_rollup_cube_table(options)
|
146
|
+
puts "Populating rollup cube #{cube_class.name}" if options[:verbose]
|
147
|
+
|
148
|
+
num_columns = dimensions_to_columns.size
|
149
|
+
num_combos = (2**num_columns)-1
|
150
|
+
puts "There are #{num_combos} combinations" if options[:verbose]
|
151
|
+
(0..num_combos).each do |i|
|
152
|
+
puts "Populating agg #{i} of #{num_combos}" if i % 100 == 0 if options[:verbose]
|
153
|
+
mask = sprintf("%0#{num_columns}b", i).split(//).collect{|x| x == '1' ? true : false}
|
154
|
+
|
155
|
+
sql = ''
|
156
|
+
sql += "INSERT INTO #{rollup_table_name} "
|
157
|
+
sql += build_aggregate_sql(mask)
|
158
|
+
|
159
|
+
connection.transaction { connection.execute(sql) }
|
160
|
+
end
|
161
|
+
|
162
|
+
if options[:verbose]
|
163
|
+
row_count = connection.select_value("SELECT count(*) FROM #{rollup_table_name}")
|
164
|
+
puts "Rollup cube populated with #{row_count} rows"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
# Mask columns with null
|
169
|
+
def mask_columns_with_null(column_names, mask)
|
170
|
+
if mask.size != column_names.size
|
171
|
+
raise "Columns has #{column_names.size} elements, but mask has only #{mask.size}"
|
172
|
+
end
|
173
|
+
|
174
|
+
new_columns = []
|
175
|
+
column_names.each_with_index{ |c,i| new_columns << (mask[i] ? c : 'null')}
|
176
|
+
new_columns
|
177
|
+
end
|
178
|
+
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|