activewarehouse 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +27 -14
- data/Rakefile +16 -5
- data/doc/references.txt +4 -0
- data/generators/bridge/templates/migration.rb +9 -2
- data/generators/bridge/templates/unit_test.rb +8 -0
- data/generators/date_dimension/USAGE +1 -0
- data/generators/date_dimension/date_dimension_generator.rb +16 -0
- data/generators/date_dimension/templates/fixture.yml +5 -0
- data/generators/date_dimension/templates/migration.rb +31 -0
- data/generators/date_dimension/templates/model.rb +3 -0
- data/generators/date_dimension/templates/unit_test.rb +8 -0
- data/generators/dimension/templates/migration.rb +1 -10
- data/generators/dimension_view/dimension_view_generator.rb +2 -2
- data/generators/dimension_view/templates/migration.rb +8 -2
- data/generators/fact/templates/migration.rb +2 -0
- data/generators/time_dimension/USAGE +1 -0
- data/generators/time_dimension/templates/fixture.yml +5 -0
- data/generators/time_dimension/templates/migration.rb +12 -0
- data/generators/time_dimension/templates/model.rb +3 -0
- data/generators/time_dimension/templates/unit_test.rb +8 -0
- data/generators/time_dimension/time_dimension_generator.rb +14 -0
- data/lib/active_warehouse.rb +13 -2
- data/lib/active_warehouse/aggregate.rb +54 -253
- data/lib/active_warehouse/aggregate/dwarf/node.rb +36 -0
- data/lib/active_warehouse/aggregate/dwarf_aggregate.rb +369 -0
- data/lib/active_warehouse/aggregate/dwarf_common.rb +44 -0
- data/lib/active_warehouse/aggregate/dwarf_printer.rb +34 -0
- data/lib/active_warehouse/aggregate/no_aggregate.rb +194 -0
- data/lib/active_warehouse/aggregate/pid_aggregate.rb +29 -0
- data/lib/active_warehouse/aggregate/pipelined_rolap_aggregate.rb +129 -0
- data/lib/active_warehouse/aggregate/rolap_aggregate.rb +181 -0
- data/lib/active_warehouse/aggregate/rolap_common.rb +89 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_1.sql +12 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_10.sql +7166 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_11.sql +14334 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_12.sql +28670 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_13.sql +57342 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_2.sql +26 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_3.sql +54 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_4.sql +110 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_5.sql +222 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_6.sql +446 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_7.sql +894 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_8.sql +1790 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_9.sql +3582 -0
- data/lib/active_warehouse/aggregate_field.rb +49 -0
- data/lib/active_warehouse/{dimension/bridge.rb → bridge.rb} +7 -3
- data/lib/active_warehouse/bridge/hierarchy_bridge.rb +46 -0
- data/lib/active_warehouse/builder.rb +2 -1
- data/lib/active_warehouse/builder/date_dimension_builder.rb +5 -2
- data/lib/active_warehouse/builder/generator/generator.rb +13 -0
- data/lib/active_warehouse/builder/generator/name_generator.rb +20 -0
- data/lib/active_warehouse/builder/generator/paragraph_generator.rb +11 -0
- data/lib/active_warehouse/builder/random_data_builder.rb +21 -11
- data/lib/active_warehouse/builder/test_data_builder.rb +54 -0
- data/lib/active_warehouse/calculated_field.rb +27 -0
- data/lib/active_warehouse/compat/compat.rb +4 -4
- data/lib/active_warehouse/cube.rb +126 -225
- data/lib/active_warehouse/cube_query_result.rb +69 -0
- data/lib/active_warehouse/dimension.rb +64 -29
- data/lib/active_warehouse/dimension/date_dimension.rb +15 -0
- data/lib/active_warehouse/dimension/dimension_reflection.rb +21 -0
- data/lib/active_warehouse/dimension/dimension_view.rb +17 -2
- data/lib/active_warehouse/dimension/hierarchical_dimension.rb +43 -5
- data/lib/active_warehouse/dimension/slowly_changing_dimension.rb +22 -12
- data/lib/active_warehouse/fact.rb +119 -40
- data/lib/active_warehouse/field.rb +74 -0
- data/lib/active_warehouse/ordered_hash.rb +34 -0
- data/lib/active_warehouse/prejoin_fact.rb +97 -0
- data/lib/active_warehouse/report/abstract_report.rb +40 -14
- data/lib/active_warehouse/report/chart_report.rb +3 -3
- data/lib/active_warehouse/report/table_report.rb +8 -3
- data/lib/active_warehouse/version.rb +1 -1
- data/lib/active_warehouse/view/report_helper.rb +144 -34
- data/tasks/active_warehouse_tasks.rake +28 -10
- metadata +107 -30
@@ -0,0 +1,34 @@
|
|
1
|
+
module ActiveWarehouse #:nodoc:
|
2
|
+
module Aggregate #:nodoc:
|
3
|
+
# Dwarf support class that prints a representation of the Dwarf
|
4
|
+
class DwarfPrinter
|
5
|
+
# Print the specified node at the given depth.
|
6
|
+
def self.print_node(node, depth=0, recurse=true)
|
7
|
+
#puts "printing node #{node.index}"
|
8
|
+
cells = node.cells.collect { |c| cell_to_string(c)}.join('|')
|
9
|
+
|
10
|
+
parent_node = node.parent ? "#{cell_to_string(node.parent)}:" : ''
|
11
|
+
puts "#{node.index}=#{' '*depth}#{parent_node}[#{cells}|#{all_cell_to_string(node.all_cell)}]"
|
12
|
+
if !node.leaf?
|
13
|
+
print_node(node.all_cell.child, depth + 1, false) if node.all_cell
|
14
|
+
end
|
15
|
+
if recurse
|
16
|
+
node.children.each { |child| print_node(child, depth+1) }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.cell_to_string(cell)
|
21
|
+
# a new String object must be created here, otherwise to_s returns a reference
|
22
|
+
# to the same String object each time and thus the value will be appended each time
|
23
|
+
# which is not what I want
|
24
|
+
s = String.new(cell.key.to_s)
|
25
|
+
s << " #{cell.value.join(',')}" if cell.node.leaf?
|
26
|
+
s
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.all_cell_to_string(cell)
|
30
|
+
cell ? (cell.value ? cell.value.inspect : '') : ''
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,194 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
module ActiveWarehouse #:nodoc:
|
4
|
+
module Aggregate #:nodoc:
|
5
|
+
# An aggregate which goes directly to the fact and dimensions to answer questions
|
6
|
+
class NoAggregate < Aggregate
|
7
|
+
# Populate the aggregate (in this case it is a no-op implementation)
|
8
|
+
def populate
|
9
|
+
# do nothing
|
10
|
+
end
|
11
|
+
|
12
|
+
# Query the aggregate
|
13
|
+
# def query(column_dimension_name, column_hierarchy_name,
|
14
|
+
# row_dimension_name, row_hierarchy_name, conditions=nil,
|
15
|
+
# cstage=0, rstage=0, filters={})
|
16
|
+
|
17
|
+
# Query the aggregate
|
18
|
+
def query(*args)
|
19
|
+
options = parse_query_args(*args)
|
20
|
+
|
21
|
+
column_dimension_name = options[:column_dimension_name]
|
22
|
+
column_hierarchy_name = options[:column_hierarchy_name]
|
23
|
+
row_dimension_name = options[:row_dimension_name]
|
24
|
+
row_hierarchy_name = options[:row_hierarchy_name]
|
25
|
+
conditions = options[:conditions]
|
26
|
+
cstage = options[:cstage] || 0
|
27
|
+
rstage = options[:rstage] || 0
|
28
|
+
filters = options[:filters] || {}
|
29
|
+
|
30
|
+
fact_class = cube_class.fact_class
|
31
|
+
column_dimension = fact_class.dimension_class(column_dimension_name)
|
32
|
+
column_hierarchy = column_dimension.hierarchy(column_hierarchy_name)
|
33
|
+
row_dimension = fact_class.dimension_class(row_dimension_name)
|
34
|
+
row_hierarchy = row_dimension.hierarchy(row_hierarchy_name)
|
35
|
+
|
36
|
+
used_dimensions = Set.new
|
37
|
+
used_dimensions.merge([column_dimension_name, row_dimension_name])
|
38
|
+
row_dim_reflection = fact_class.dimension_relationships[row_dimension_name].dependent_dimension_reflections
|
39
|
+
used_dimensions.merge(row_dim_reflection.collect{|d| d.name})
|
40
|
+
col_dim_reflection = fact_class.dimension_relationships[column_dimension_name].dependent_dimension_reflections
|
41
|
+
used_dimensions.merge(col_dim_reflection.collect{|d| d.name})
|
42
|
+
filters.each do |k,v|
|
43
|
+
used_dimensions << k.split('.')[0]
|
44
|
+
end
|
45
|
+
if conditions
|
46
|
+
cube_class.dimensions.each do |dimension|
|
47
|
+
if conditions =~ /#{dimension}\./i
|
48
|
+
used_dimensions << dimension
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# This method assumes at most one dimension is hierarchical dimension
|
54
|
+
# in the query params. TODO: need to handle when both row and column
|
55
|
+
# are hierarchical dimensions.
|
56
|
+
hierarchical_dimension = nil
|
57
|
+
hierarchical_dimension_name = nil
|
58
|
+
hierarchical_stage = nil
|
59
|
+
|
60
|
+
if !column_dimension.hierarchical_dimension?
|
61
|
+
current_column_name = column_hierarchy[cstage]
|
62
|
+
else
|
63
|
+
hierarchical_dimension = column_dimension
|
64
|
+
hierarchical_dimension_name = column_dimension_name
|
65
|
+
hierarchical_stage = cstage
|
66
|
+
current_column_name = column_hierarchy[0]
|
67
|
+
end
|
68
|
+
|
69
|
+
if !row_dimension.hierarchical_dimension?
|
70
|
+
current_row_name = row_hierarchy[rstage]
|
71
|
+
else
|
72
|
+
hierarchical_dimension = row_dimension
|
73
|
+
hierarchical_dimension_name = row_dimension_name
|
74
|
+
hierarchical_stage = rstage
|
75
|
+
current_row_name = row_hierarchy[0]
|
76
|
+
end
|
77
|
+
|
78
|
+
fact_columns = cube_class.aggregate_fields.collect { |c|
|
79
|
+
agg_sql = ''
|
80
|
+
quoted_label = cube_class.connection.quote_column_name(c.label)
|
81
|
+
if hierarchical_dimension and !c.levels_from_parent.empty?
|
82
|
+
bridge = hierarchical_dimension.bridge_class
|
83
|
+
bridge_table_name = bridge.table_name
|
84
|
+
levels_from_parent = bridge.levels_from_parent
|
85
|
+
get_all = false
|
86
|
+
c.levels_from_parent.each do |level|
|
87
|
+
case level
|
88
|
+
when :all
|
89
|
+
agg_sql += " #{c.strategy_name}(#{c.from_table_name}.#{c.name}) AS #{quoted_label})"
|
90
|
+
get_all = true
|
91
|
+
when :self
|
92
|
+
agg_sql += " #{c.strategy_name}(CASE " if agg_sql.length == 0
|
93
|
+
agg_sql += " WHEN #{bridge_table_name}.#{levels_from_parent} = 0 THEN #{c.from_table_name}.#{c.name} \n"
|
94
|
+
when Integer
|
95
|
+
agg_sql += " #{c.strategy_name}(CASE " if agg_sql.length == 0
|
96
|
+
agg_sql += " WHEN #{bridge_table_name}.#{levels_from_parent} = #{level} then #{c.from_table_name}.#{c.name} \n"
|
97
|
+
else
|
98
|
+
raise ArgumentError, "Each element to :levels_from_parent option must be :all, :self, or Integer"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
agg_sql += " ELSE 0 END) AS #{quoted_label}" unless get_all
|
102
|
+
else
|
103
|
+
agg_sql = " #{c.strategy_name}(#{c.from_table_name}.#{c.name}) AS #{quoted_label}"
|
104
|
+
end
|
105
|
+
agg_sql
|
106
|
+
}.join(",\n")
|
107
|
+
|
108
|
+
sql = ''
|
109
|
+
sql += "SELECT\n"
|
110
|
+
sql += " #{column_dimension_name}.#{current_column_name},\n"
|
111
|
+
sql += " #{row_dimension_name}.#{current_row_name},\n"
|
112
|
+
sql += fact_columns
|
113
|
+
sql += "\nFROM\n"
|
114
|
+
|
115
|
+
sql += " #{fact_class.table_name}"
|
116
|
+
cube_class.dimensions_hierarchies.each do |dimension_name, hierarchy_names|
|
117
|
+
next if !used_dimensions.include?(dimension_name)
|
118
|
+
dimension = fact_class.dimension_class(dimension_name)
|
119
|
+
if !dimension.hierarchical_dimension?
|
120
|
+
sql += "\nJOIN #{dimension.table_name} as #{dimension_name}"
|
121
|
+
sql += "\n ON #{fact_class.table_name}.#{fact_class.foreign_key_for(dimension_name)} = "
|
122
|
+
sql += "#{dimension_name}.#{dimension.primary_key}"
|
123
|
+
else
|
124
|
+
dimension_bridge = dimension.bridge_class
|
125
|
+
sql += "\nJOIN #{dimension_bridge.table_name}"
|
126
|
+
sql += "\n ON #{fact_class.table_name}.#{fact_class.foreign_key_for(dimension_name)} = "
|
127
|
+
sql += "#{dimension_bridge.table_name}.#{dimension.parent_foreign_key}"
|
128
|
+
if dimension.slowly_changing_dimension?
|
129
|
+
sql += " and (#{dimension_bridge.table_name}.#{dimension_bridge.effective_date} <= "
|
130
|
+
sql += "#{fact_class.slowly_changes_over_name(dimension_name)}."
|
131
|
+
sql += "#{fact_class.slowly_changes_over_class(dimension_name).sql_date_stamp} "
|
132
|
+
sql += "and #{dimension_bridge.table_name}.#{dimension_bridge.expiration_date} >= "
|
133
|
+
sql += "#{fact_class.slowly_changes_over_name(dimension_name)}."
|
134
|
+
sql += "#{fact_class.slowly_changes_over_class(dimension_name).sql_date_stamp}) "
|
135
|
+
end
|
136
|
+
sql += "\nJOIN #{dimension.table_name} as #{dimension_name}"
|
137
|
+
sql += "\n ON #{dimension_bridge.table_name}.#{dimension.child_foreign_key} = "
|
138
|
+
sql += "#{dimension_name}.#{dimension.primary_key}"
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# build the where clause
|
143
|
+
# first add conditions
|
144
|
+
where_clause = Array(conditions)
|
145
|
+
|
146
|
+
# apply filters
|
147
|
+
filters.each do |key, value|
|
148
|
+
dimension_name, column = key.split('.')
|
149
|
+
where_clause << "#{dimension_name}.#{column} = #{cube_class.connection.quote(value)}"
|
150
|
+
end
|
151
|
+
sql += %Q(\nWHERE\n #{where_clause.join(" AND\n ")} ) if where_clause.length > 0
|
152
|
+
|
153
|
+
# for hierarchical dimension we need to add where clause in for drill downs
|
154
|
+
if !hierarchical_dimension.nil?
|
155
|
+
if where_clause.length == 0
|
156
|
+
sql += "\n WHERE "
|
157
|
+
else
|
158
|
+
sql += " \n AND "
|
159
|
+
end
|
160
|
+
sql += "\n #{hierarchical_dimension_name}.#{hierarchical_dimension.primary_key} IN ( "
|
161
|
+
sql += "\n SELECT #{hierarchical_dimension.parent_foreign_key} FROM #{hierarchical_dimension.bridge_class.table_name} "
|
162
|
+
if hierarchical_stage == 0
|
163
|
+
sql += "\n WHERE #{hierarchical_dimension.bridge_class.top_flag} = #{connection.send(:quote, hierarchical_dimension.bridge_class.top_flag_value)})"
|
164
|
+
else
|
165
|
+
sql += "\n WHERE #{hierarchical_dimension.child_foreign_key} = #{hierarchical_stage} AND #{hierarchical_dimension.levels_from_parent} = 1)"
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
sql += "\nGROUP BY\n"
|
170
|
+
sql += " #{column_dimension_name}.#{current_column_name},\n"
|
171
|
+
sql += " #{row_dimension_name}.#{current_row_name}"
|
172
|
+
|
173
|
+
if options[:order]
|
174
|
+
order_by = options[:order]
|
175
|
+
order_by = [order_by] if order_by.is_a?(String)
|
176
|
+
order_by.collect!{ |v| cube_class.connection.quote_column_name(order_by) }
|
177
|
+
sql += %Q(\nORDER BY\n #{order_by.join(",\n")})
|
178
|
+
end
|
179
|
+
|
180
|
+
result = ActiveWarehouse::CubeQueryResult.new(
|
181
|
+
cube_class.aggregate_fields
|
182
|
+
)
|
183
|
+
|
184
|
+
cube_class.connection.select_all(sql).each do |row|
|
185
|
+
result.add_data(row.delete(current_row_name.to_s),
|
186
|
+
row.delete(current_column_name.to_s),
|
187
|
+
row) # the rest of the members of row are the fact columns
|
188
|
+
end
|
189
|
+
|
190
|
+
result
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module ActiveWarehouse #:nodoc:
|
2
|
+
module Aggregate #:nodoc:
|
3
|
+
# Implementation of a Partitioning and Inserting Dwarf algorithm as defined
|
4
|
+
# in http://www.zju.edu.cn/jzus/2005/A0506/A050608.pdf
|
5
|
+
class PidAggregate < Aggregate
|
6
|
+
include DwarfCommon
|
7
|
+
|
8
|
+
# Initialize the aggregate
|
9
|
+
def initialize(cube_class)
|
10
|
+
super
|
11
|
+
end
|
12
|
+
|
13
|
+
# Populate the aggregate
|
14
|
+
def populate
|
15
|
+
create_dwarf_cube(sorted_facts)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Query the aggregate
|
19
|
+
def query(*args)
|
20
|
+
options = parse_query_args(*args)
|
21
|
+
end
|
22
|
+
|
23
|
+
def create_dwarf_cube(sorted_facts)
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
require 'erb'
|
2
|
+
|
3
|
+
module ActiveWarehouse #:nodoc
|
4
|
+
module Aggregate #:nodoc
|
5
|
+
# A Pipelined implementation of a ROLAP engine that stores all possible
|
6
|
+
# combinations
|
7
|
+
# of fact and dimensional values for a specific cube.
|
8
|
+
#
|
9
|
+
# This implementation attempts to reduce the amount of work required
|
10
|
+
# by aggregating facts in a pipelined fashion. This means that smaller
|
11
|
+
# aggregates are generated from a preceding aggregate, in order to avoid
|
12
|
+
# having to query the entire raw data set for every aggregate.
|
13
|
+
#
|
14
|
+
# E.g.
|
15
|
+
#
|
16
|
+
# ABCD -> ABC -> AB -> A -> *all*
|
17
|
+
class PipelinedRolapAggregate < Aggregate
|
18
|
+
include RolapCommon
|
19
|
+
|
20
|
+
# Build and populate the data store
|
21
|
+
def populate(options={})
|
22
|
+
create_and_populate_all_table
|
23
|
+
create_all_pipelined_agg_tables
|
24
|
+
create_insert_statements.each_with_index do |insert, i|
|
25
|
+
next if i == 0 #handled by create_and_populate_all_table
|
26
|
+
connection.transaction {connection.execute(insert)}
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
protected
|
31
|
+
|
32
|
+
# build and populate a table which group by's all dimension columns.
|
33
|
+
def create_and_populate_all_table
|
34
|
+
dimension_column_names = dimensions_to_columns.collect do |c|
|
35
|
+
"#{c.table_alias}.#{c.name} as #{c.table_alias}_#{c.name}"
|
36
|
+
end
|
37
|
+
|
38
|
+
fact_column_names = aggregate_fields.collect do |c|
|
39
|
+
"#{c.from_table_name}.#{c.name} as #{c.label_for_table}"
|
40
|
+
end
|
41
|
+
|
42
|
+
sql = <<-SQL
|
43
|
+
SELECT
|
44
|
+
#{dimension_column_names.join(",")},
|
45
|
+
#{aggregated_fact_column_sql_for_all}
|
46
|
+
FROM #{tables_and_joins}
|
47
|
+
GROUP BY
|
48
|
+
#{dimensions_to_columns.collect{|c| "#{c.table_alias}.#{c.name}"}.join(",")}
|
49
|
+
SQL
|
50
|
+
|
51
|
+
all_table_name = indexed_rollup_table_name(dimension_column_names.length)
|
52
|
+
|
53
|
+
sql = connection.add_select_into_table(all_table_name, sql)
|
54
|
+
|
55
|
+
connection.drop_table(all_table_name) if connection.tables.include?(all_table_name)
|
56
|
+
connection.transaction { connection.execute(sql) }
|
57
|
+
end
|
58
|
+
|
59
|
+
def create_all_pipelined_agg_tables
|
60
|
+
(0..dimensions_to_columns.size-1).each do |i|
|
61
|
+
create_rollup_cube_table(i)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Creates the rollup table
|
66
|
+
def create_rollup_cube_table(index)
|
67
|
+
table_name = indexed_rollup_table_name(index)
|
68
|
+
connection.drop_table(table_name) if connection.tables.include?(table_name)
|
69
|
+
|
70
|
+
ActiveRecord::Base.transaction do
|
71
|
+
connection.create_table(table_name, :id => false) do |t|
|
72
|
+
dimensions_to_columns.each do |c|
|
73
|
+
t.column(c.label, c.column_type)
|
74
|
+
end
|
75
|
+
aggregate_fields.each do |c|
|
76
|
+
options = {}
|
77
|
+
options[:limit] = c.column_type == :integer ? 8 : c.limit
|
78
|
+
options[:scale] = c.scale if c.scale
|
79
|
+
options[:precision] = c.precision if c.precision
|
80
|
+
t.column(c.label_for_table, c.column_type, options)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def create_insert_statements
|
87
|
+
dim_columns = dimensions_to_columns
|
88
|
+
template_filename = File.dirname(__FILE__) + "/templates/pipelined_rollup_#{dim_columns.length}.sql"
|
89
|
+
dim_columns.length.times do |i|
|
90
|
+
eval("@dimension_#{i} = '#{dim_columns[i].label}'")
|
91
|
+
end
|
92
|
+
@aggregate_fields_from_flat_table = aggregated_fact_column_sql_for_rollup
|
93
|
+
@aggregate_fields = aggregated_fact_column_sql_for_rollup
|
94
|
+
@flat_table_name = flat_table_name
|
95
|
+
@rollup_table_name = rollup_table_name
|
96
|
+
|
97
|
+
inserts = []
|
98
|
+
|
99
|
+
sql = ""
|
100
|
+
ERB.new(File.read(template_filename)).result(binding).each do |line|
|
101
|
+
if line.strip == ""
|
102
|
+
inserts << sql
|
103
|
+
sql = ""
|
104
|
+
else
|
105
|
+
sql += line
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
inserts
|
110
|
+
end
|
111
|
+
|
112
|
+
def indexed_rollup_table_name(index)
|
113
|
+
"#{rollup_table_name}_#{index}"
|
114
|
+
end
|
115
|
+
|
116
|
+
def aggregated_fact_column_sql_for_all
|
117
|
+
aggregate_fields.collect { |c|
|
118
|
+
"#{c.strategy_name}(#{c.name}) AS #{c.label_for_table}"
|
119
|
+
}.join(",")
|
120
|
+
end
|
121
|
+
|
122
|
+
def aggregated_fact_column_sql_for_rollup
|
123
|
+
aggregate_fields.collect { |c|
|
124
|
+
"#{c.strategy_name == :avg ? :avg : :sum}(#{c.label_for_table}) AS #{c.label_for_table}"
|
125
|
+
}.join(", ")
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,181 @@
|
|
1
|
+
# Source file that contains a basic ROLAP engine implementation.
|
2
|
+
|
3
|
+
module ActiveWarehouse #:nodoc
|
4
|
+
module Aggregate #:nodoc
|
5
|
+
# Basic implementation of a ROLAP engine that stores all possible combinations
|
6
|
+
# of fact and dimensional values for a specific cube.
|
7
|
+
class RolapAggregate < Aggregate
|
8
|
+
include RolapCommon
|
9
|
+
|
10
|
+
# Build and populate the data store
|
11
|
+
def populate(options={})
|
12
|
+
populate_rollup_cube
|
13
|
+
end
|
14
|
+
|
15
|
+
# Query the aggregate, returning a QueryResult object
|
16
|
+
def query(*args)
|
17
|
+
options = parse_query_args(*args)
|
18
|
+
|
19
|
+
column_dimension_name = options[:column_dimension_name]
|
20
|
+
column_hierarchy_name = options[:column_hierarchy_name]
|
21
|
+
row_dimension_name = options[:row_dimension_name]
|
22
|
+
row_hierarchy_name = options[:row_hierarchy_name]
|
23
|
+
conditions = options[:conditions]
|
24
|
+
cstage = options[:cstage]
|
25
|
+
rstage = options[:rstage]
|
26
|
+
filters = options[:filters]
|
27
|
+
|
28
|
+
column_dimension = fact_class.dimension_class(column_dimension_name)
|
29
|
+
column_hierarchy = column_dimension.hierarchy(column_hierarchy_name)
|
30
|
+
row_dimension = fact_class.dimension_class(row_dimension_name)
|
31
|
+
row_hierarchy = row_dimension.hierarchy(row_hierarchy_name)
|
32
|
+
current_column_name = column_hierarchy[cstage]
|
33
|
+
current_row_name = row_hierarchy[rstage]
|
34
|
+
full_column_name = "#{column_dimension_name}_#{current_column_name}"
|
35
|
+
full_row_name = "#{row_dimension_name}_#{current_row_name}"
|
36
|
+
|
37
|
+
# build the SQL query
|
38
|
+
sql = ''
|
39
|
+
sql += 'SELECT '
|
40
|
+
sql += "#{full_column_name} AS #{current_column_name},"
|
41
|
+
sql += "#{full_row_name} AS #{current_row_name},"
|
42
|
+
sql += aggregate_fields.collect{|c| "#{c.label_for_table} as '#{c.label}'"}.join(",")
|
43
|
+
sql += " FROM #{rollup_table_name} "
|
44
|
+
|
45
|
+
# build the where clause
|
46
|
+
where_clause = []
|
47
|
+
0.upto(column_hierarchy.length - 1) do |stage|
|
48
|
+
column_name = column_hierarchy[stage]
|
49
|
+
name = "#{column_dimension_name}_#{column_name}"
|
50
|
+
filter_value = filters.delete(column_name)
|
51
|
+
if filter_value
|
52
|
+
where_clause << "#{name} = '#{filter_value}'" # TODO: protect from
|
53
|
+
# SQL injection
|
54
|
+
else
|
55
|
+
where_clause << "#{name} is null" if stage > cstage
|
56
|
+
end
|
57
|
+
end
|
58
|
+
0.upto(row_hierarchy.length - 1) do |stage|
|
59
|
+
row_name = row_hierarchy[stage]
|
60
|
+
name = "#{row_dimension_name}_#{row_name}"
|
61
|
+
filter_value = filters.delete(row_name)
|
62
|
+
if filter_value
|
63
|
+
where_clause << "#{name} = '#{filter_value}'" # TODO: protect from
|
64
|
+
# SQL injection
|
65
|
+
else
|
66
|
+
where_clause << "#{name} is null" if stage > rstage
|
67
|
+
end
|
68
|
+
end
|
69
|
+
where_clause << "#{full_column_name} is not null"
|
70
|
+
where_clause << "#{full_row_name} is not null"
|
71
|
+
filters.each do |key, value|
|
72
|
+
dimension_name, column = key.split('.')
|
73
|
+
where_clause << "#{dimension_name}_#{column} = '#{value}'" # TODO: protect from SQL injection
|
74
|
+
end
|
75
|
+
sql += %Q( WHERE #{where_clause.join(" AND ")} ) if where_clause.length > 0
|
76
|
+
|
77
|
+
if conditions
|
78
|
+
sql += "\n WHERE\n" unless sql =~ /WHERE/i
|
79
|
+
sql += conditions
|
80
|
+
end
|
81
|
+
|
82
|
+
# execute the query and return the results as a CubeQueryResult object
|
83
|
+
result = ActiveWarehouse::CubeQueryResult.new(
|
84
|
+
aggregate_fields
|
85
|
+
)
|
86
|
+
rows = connection.select_all(sql)
|
87
|
+
# fact_column_names = fact_class.aggregate_fields.collect{|f| f.to_s}
|
88
|
+
rows.each do |row|
|
89
|
+
result.add_data(row.delete(current_row_name.to_s),
|
90
|
+
row.delete(current_column_name.to_s),
|
91
|
+
row) # the rest of the members of row are the fact columns
|
92
|
+
end
|
93
|
+
result
|
94
|
+
end
|
95
|
+
|
96
|
+
protected
|
97
|
+
|
98
|
+
# Creates the rollup table
|
99
|
+
def create_rollup_cube_table(options={})
|
100
|
+
# TODO: perhaps this should all be executed in a single transaction?
|
101
|
+
connection.drop_table(rollup_table_name) if connection.tables.include?(rollup_table_name)
|
102
|
+
|
103
|
+
ActiveRecord::Base.transaction do
|
104
|
+
connection.create_table(rollup_table_name, :id => false) do |t|
|
105
|
+
dimensions_to_columns.each do |c|
|
106
|
+
t.column(c.label, c.column_type)
|
107
|
+
end
|
108
|
+
aggregate_fields.each do |c|
|
109
|
+
t.column(c.label_for_table, c.column_type)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# Builds the aggregate SQL that will be used to populate the ROLAP table.
|
116
|
+
# This SQL is just the SELECT statement and includes all of the GROUP BYs
|
117
|
+
# and aggregation functions.
|
118
|
+
#
|
119
|
+
# +column_mask+ is an array of booleans, where true is the column to group
|
120
|
+
# by. The length of this array is equal to the number of columns in
|
121
|
+
# the SELECT clause.
|
122
|
+
def build_aggregate_sql(column_mask)
|
123
|
+
dimension_column_names = dimensions_to_columns.collect do |c|
|
124
|
+
"#{c.table_alias}.#{c.name}"
|
125
|
+
end
|
126
|
+
|
127
|
+
sql = <<-SQL
|
128
|
+
SELECT
|
129
|
+
#{mask_columns_with_null(dimension_column_names, column_mask).join(",")},
|
130
|
+
#{aggregated_fact_column_sql}
|
131
|
+
FROM #{tables_and_joins}
|
132
|
+
SQL
|
133
|
+
|
134
|
+
group = mask_columns_with_null(dimension_column_names, column_mask).reject{|o| o == 'null'}.join(",")
|
135
|
+
sql += "GROUP BY #{group}" if !group.empty?
|
136
|
+
|
137
|
+
sql
|
138
|
+
end
|
139
|
+
|
140
|
+
# Populate the rollup cube
|
141
|
+
#
|
142
|
+
# Options:
|
143
|
+
# * <tt>:verbose</tt>: Set to true to print info to STDOUT during building
|
144
|
+
def populate_rollup_cube(options={})
|
145
|
+
create_rollup_cube_table(options)
|
146
|
+
puts "Populating rollup cube #{cube_class.name}" if options[:verbose]
|
147
|
+
|
148
|
+
num_columns = dimensions_to_columns.size
|
149
|
+
num_combos = (2**num_columns)-1
|
150
|
+
puts "There are #{num_combos} combinations" if options[:verbose]
|
151
|
+
(0..num_combos).each do |i|
|
152
|
+
puts "Populating agg #{i} of #{num_combos}" if i % 100 == 0 if options[:verbose]
|
153
|
+
mask = sprintf("%0#{num_columns}b", i).split(//).collect{|x| x == '1' ? true : false}
|
154
|
+
|
155
|
+
sql = ''
|
156
|
+
sql += "INSERT INTO #{rollup_table_name} "
|
157
|
+
sql += build_aggregate_sql(mask)
|
158
|
+
|
159
|
+
connection.transaction { connection.execute(sql) }
|
160
|
+
end
|
161
|
+
|
162
|
+
if options[:verbose]
|
163
|
+
row_count = connection.select_value("SELECT count(*) FROM #{rollup_table_name}")
|
164
|
+
puts "Rollup cube populated with #{row_count} rows"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
# Mask columns with null
|
169
|
+
def mask_columns_with_null(column_names, mask)
|
170
|
+
if mask.size != column_names.size
|
171
|
+
raise "Columns has #{column_names.size} elements, but mask has only #{mask.size}"
|
172
|
+
end
|
173
|
+
|
174
|
+
new_columns = []
|
175
|
+
column_names.each_with_index{ |c,i| new_columns << (mask[i] ? c : 'null')}
|
176
|
+
new_columns
|
177
|
+
end
|
178
|
+
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|