activewarehouse 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/README +27 -14
  2. data/Rakefile +16 -5
  3. data/doc/references.txt +4 -0
  4. data/generators/bridge/templates/migration.rb +9 -2
  5. data/generators/bridge/templates/unit_test.rb +8 -0
  6. data/generators/date_dimension/USAGE +1 -0
  7. data/generators/date_dimension/date_dimension_generator.rb +16 -0
  8. data/generators/date_dimension/templates/fixture.yml +5 -0
  9. data/generators/date_dimension/templates/migration.rb +31 -0
  10. data/generators/date_dimension/templates/model.rb +3 -0
  11. data/generators/date_dimension/templates/unit_test.rb +8 -0
  12. data/generators/dimension/templates/migration.rb +1 -10
  13. data/generators/dimension_view/dimension_view_generator.rb +2 -2
  14. data/generators/dimension_view/templates/migration.rb +8 -2
  15. data/generators/fact/templates/migration.rb +2 -0
  16. data/generators/time_dimension/USAGE +1 -0
  17. data/generators/time_dimension/templates/fixture.yml +5 -0
  18. data/generators/time_dimension/templates/migration.rb +12 -0
  19. data/generators/time_dimension/templates/model.rb +3 -0
  20. data/generators/time_dimension/templates/unit_test.rb +8 -0
  21. data/generators/time_dimension/time_dimension_generator.rb +14 -0
  22. data/lib/active_warehouse.rb +13 -2
  23. data/lib/active_warehouse/aggregate.rb +54 -253
  24. data/lib/active_warehouse/aggregate/dwarf/node.rb +36 -0
  25. data/lib/active_warehouse/aggregate/dwarf_aggregate.rb +369 -0
  26. data/lib/active_warehouse/aggregate/dwarf_common.rb +44 -0
  27. data/lib/active_warehouse/aggregate/dwarf_printer.rb +34 -0
  28. data/lib/active_warehouse/aggregate/no_aggregate.rb +194 -0
  29. data/lib/active_warehouse/aggregate/pid_aggregate.rb +29 -0
  30. data/lib/active_warehouse/aggregate/pipelined_rolap_aggregate.rb +129 -0
  31. data/lib/active_warehouse/aggregate/rolap_aggregate.rb +181 -0
  32. data/lib/active_warehouse/aggregate/rolap_common.rb +89 -0
  33. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_1.sql +12 -0
  34. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_10.sql +7166 -0
  35. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_11.sql +14334 -0
  36. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_12.sql +28670 -0
  37. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_13.sql +57342 -0
  38. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_2.sql +26 -0
  39. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_3.sql +54 -0
  40. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_4.sql +110 -0
  41. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_5.sql +222 -0
  42. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_6.sql +446 -0
  43. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_7.sql +894 -0
  44. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_8.sql +1790 -0
  45. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_9.sql +3582 -0
  46. data/lib/active_warehouse/aggregate_field.rb +49 -0
  47. data/lib/active_warehouse/{dimension/bridge.rb → bridge.rb} +7 -3
  48. data/lib/active_warehouse/bridge/hierarchy_bridge.rb +46 -0
  49. data/lib/active_warehouse/builder.rb +2 -1
  50. data/lib/active_warehouse/builder/date_dimension_builder.rb +5 -2
  51. data/lib/active_warehouse/builder/generator/generator.rb +13 -0
  52. data/lib/active_warehouse/builder/generator/name_generator.rb +20 -0
  53. data/lib/active_warehouse/builder/generator/paragraph_generator.rb +11 -0
  54. data/lib/active_warehouse/builder/random_data_builder.rb +21 -11
  55. data/lib/active_warehouse/builder/test_data_builder.rb +54 -0
  56. data/lib/active_warehouse/calculated_field.rb +27 -0
  57. data/lib/active_warehouse/compat/compat.rb +4 -4
  58. data/lib/active_warehouse/cube.rb +126 -225
  59. data/lib/active_warehouse/cube_query_result.rb +69 -0
  60. data/lib/active_warehouse/dimension.rb +64 -29
  61. data/lib/active_warehouse/dimension/date_dimension.rb +15 -0
  62. data/lib/active_warehouse/dimension/dimension_reflection.rb +21 -0
  63. data/lib/active_warehouse/dimension/dimension_view.rb +17 -2
  64. data/lib/active_warehouse/dimension/hierarchical_dimension.rb +43 -5
  65. data/lib/active_warehouse/dimension/slowly_changing_dimension.rb +22 -12
  66. data/lib/active_warehouse/fact.rb +119 -40
  67. data/lib/active_warehouse/field.rb +74 -0
  68. data/lib/active_warehouse/ordered_hash.rb +34 -0
  69. data/lib/active_warehouse/prejoin_fact.rb +97 -0
  70. data/lib/active_warehouse/report/abstract_report.rb +40 -14
  71. data/lib/active_warehouse/report/chart_report.rb +3 -3
  72. data/lib/active_warehouse/report/table_report.rb +8 -3
  73. data/lib/active_warehouse/version.rb +1 -1
  74. data/lib/active_warehouse/view/report_helper.rb +144 -34
  75. data/tasks/active_warehouse_tasks.rake +28 -10
  76. metadata +107 -30
@@ -0,0 +1,34 @@
1
+ module ActiveWarehouse #:nodoc:
2
+ module Aggregate #:nodoc:
3
+ # Dwarf support class that prints a representation of the Dwarf
4
+ class DwarfPrinter
5
+ # Print the specified node at the given depth.
6
+ def self.print_node(node, depth=0, recurse=true)
7
+ #puts "printing node #{node.index}"
8
+ cells = node.cells.collect { |c| cell_to_string(c)}.join('|')
9
+
10
+ parent_node = node.parent ? "#{cell_to_string(node.parent)}:" : ''
11
+ puts "#{node.index}=#{' '*depth}#{parent_node}[#{cells}|#{all_cell_to_string(node.all_cell)}]"
12
+ if !node.leaf?
13
+ print_node(node.all_cell.child, depth + 1, false) if node.all_cell
14
+ end
15
+ if recurse
16
+ node.children.each { |child| print_node(child, depth+1) }
17
+ end
18
+ end
19
+
20
+ def self.cell_to_string(cell)
21
+ # a new String object must be created here, otherwise to_s returns a reference
22
+ # to the same String object each time and thus the value will be appended each time
23
+ # which is not what I want
24
+ s = String.new(cell.key.to_s)
25
+ s << " #{cell.value.join(',')}" if cell.node.leaf?
26
+ s
27
+ end
28
+
29
+ def self.all_cell_to_string(cell)
30
+ cell ? (cell.value ? cell.value.inspect : '') : ''
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,194 @@
1
+ require 'set'
2
+
3
+ module ActiveWarehouse #:nodoc:
4
+ module Aggregate #:nodoc:
5
+ # An aggregate which goes directly to the fact and dimensions to answer questions
6
+ class NoAggregate < Aggregate
7
+ # Populate the aggregate (in this case it is a no-op implementation)
8
+ def populate
9
+ # do nothing
10
+ end
11
+
12
+ # Query the aggregate
13
+ # def query(column_dimension_name, column_hierarchy_name,
14
+ # row_dimension_name, row_hierarchy_name, conditions=nil,
15
+ # cstage=0, rstage=0, filters={})
16
+
17
+ # Query the aggregate
18
+ def query(*args)
19
+ options = parse_query_args(*args)
20
+
21
+ column_dimension_name = options[:column_dimension_name]
22
+ column_hierarchy_name = options[:column_hierarchy_name]
23
+ row_dimension_name = options[:row_dimension_name]
24
+ row_hierarchy_name = options[:row_hierarchy_name]
25
+ conditions = options[:conditions]
26
+ cstage = options[:cstage] || 0
27
+ rstage = options[:rstage] || 0
28
+ filters = options[:filters] || {}
29
+
30
+ fact_class = cube_class.fact_class
31
+ column_dimension = fact_class.dimension_class(column_dimension_name)
32
+ column_hierarchy = column_dimension.hierarchy(column_hierarchy_name)
33
+ row_dimension = fact_class.dimension_class(row_dimension_name)
34
+ row_hierarchy = row_dimension.hierarchy(row_hierarchy_name)
35
+
36
+ used_dimensions = Set.new
37
+ used_dimensions.merge([column_dimension_name, row_dimension_name])
38
+ row_dim_reflection = fact_class.dimension_relationships[row_dimension_name].dependent_dimension_reflections
39
+ used_dimensions.merge(row_dim_reflection.collect{|d| d.name})
40
+ col_dim_reflection = fact_class.dimension_relationships[column_dimension_name].dependent_dimension_reflections
41
+ used_dimensions.merge(col_dim_reflection.collect{|d| d.name})
42
+ filters.each do |k,v|
43
+ used_dimensions << k.split('.')[0]
44
+ end
45
+ if conditions
46
+ cube_class.dimensions.each do |dimension|
47
+ if conditions =~ /#{dimension}\./i
48
+ used_dimensions << dimension
49
+ end
50
+ end
51
+ end
52
+
53
+ # This method assumes at most one dimension is hierarchical dimension
54
+ # in the query params. TODO: need to handle when both row and column
55
+ # are hierarchical dimensions.
56
+ hierarchical_dimension = nil
57
+ hierarchical_dimension_name = nil
58
+ hierarchical_stage = nil
59
+
60
+ if !column_dimension.hierarchical_dimension?
61
+ current_column_name = column_hierarchy[cstage]
62
+ else
63
+ hierarchical_dimension = column_dimension
64
+ hierarchical_dimension_name = column_dimension_name
65
+ hierarchical_stage = cstage
66
+ current_column_name = column_hierarchy[0]
67
+ end
68
+
69
+ if !row_dimension.hierarchical_dimension?
70
+ current_row_name = row_hierarchy[rstage]
71
+ else
72
+ hierarchical_dimension = row_dimension
73
+ hierarchical_dimension_name = row_dimension_name
74
+ hierarchical_stage = rstage
75
+ current_row_name = row_hierarchy[0]
76
+ end
77
+
78
+ fact_columns = cube_class.aggregate_fields.collect { |c|
79
+ agg_sql = ''
80
+ quoted_label = cube_class.connection.quote_column_name(c.label)
81
+ if hierarchical_dimension and !c.levels_from_parent.empty?
82
+ bridge = hierarchical_dimension.bridge_class
83
+ bridge_table_name = bridge.table_name
84
+ levels_from_parent = bridge.levels_from_parent
85
+ get_all = false
86
+ c.levels_from_parent.each do |level|
87
+ case level
88
+ when :all
89
+ agg_sql += " #{c.strategy_name}(#{c.from_table_name}.#{c.name}) AS #{quoted_label})"
90
+ get_all = true
91
+ when :self
92
+ agg_sql += " #{c.strategy_name}(CASE " if agg_sql.length == 0
93
+ agg_sql += " WHEN #{bridge_table_name}.#{levels_from_parent} = 0 THEN #{c.from_table_name}.#{c.name} \n"
94
+ when Integer
95
+ agg_sql += " #{c.strategy_name}(CASE " if agg_sql.length == 0
96
+ agg_sql += " WHEN #{bridge_table_name}.#{levels_from_parent} = #{level} then #{c.from_table_name}.#{c.name} \n"
97
+ else
98
+ raise ArgumentError, "Each element to :levels_from_parent option must be :all, :self, or Integer"
99
+ end
100
+ end
101
+ agg_sql += " ELSE 0 END) AS #{quoted_label}" unless get_all
102
+ else
103
+ agg_sql = " #{c.strategy_name}(#{c.from_table_name}.#{c.name}) AS #{quoted_label}"
104
+ end
105
+ agg_sql
106
+ }.join(",\n")
107
+
108
+ sql = ''
109
+ sql += "SELECT\n"
110
+ sql += " #{column_dimension_name}.#{current_column_name},\n"
111
+ sql += " #{row_dimension_name}.#{current_row_name},\n"
112
+ sql += fact_columns
113
+ sql += "\nFROM\n"
114
+
115
+ sql += " #{fact_class.table_name}"
116
+ cube_class.dimensions_hierarchies.each do |dimension_name, hierarchy_names|
117
+ next if !used_dimensions.include?(dimension_name)
118
+ dimension = fact_class.dimension_class(dimension_name)
119
+ if !dimension.hierarchical_dimension?
120
+ sql += "\nJOIN #{dimension.table_name} as #{dimension_name}"
121
+ sql += "\n ON #{fact_class.table_name}.#{fact_class.foreign_key_for(dimension_name)} = "
122
+ sql += "#{dimension_name}.#{dimension.primary_key}"
123
+ else
124
+ dimension_bridge = dimension.bridge_class
125
+ sql += "\nJOIN #{dimension_bridge.table_name}"
126
+ sql += "\n ON #{fact_class.table_name}.#{fact_class.foreign_key_for(dimension_name)} = "
127
+ sql += "#{dimension_bridge.table_name}.#{dimension.parent_foreign_key}"
128
+ if dimension.slowly_changing_dimension?
129
+ sql += " and (#{dimension_bridge.table_name}.#{dimension_bridge.effective_date} <= "
130
+ sql += "#{fact_class.slowly_changes_over_name(dimension_name)}."
131
+ sql += "#{fact_class.slowly_changes_over_class(dimension_name).sql_date_stamp} "
132
+ sql += "and #{dimension_bridge.table_name}.#{dimension_bridge.expiration_date} >= "
133
+ sql += "#{fact_class.slowly_changes_over_name(dimension_name)}."
134
+ sql += "#{fact_class.slowly_changes_over_class(dimension_name).sql_date_stamp}) "
135
+ end
136
+ sql += "\nJOIN #{dimension.table_name} as #{dimension_name}"
137
+ sql += "\n ON #{dimension_bridge.table_name}.#{dimension.child_foreign_key} = "
138
+ sql += "#{dimension_name}.#{dimension.primary_key}"
139
+ end
140
+ end
141
+
142
+ # build the where clause
143
+ # first add conditions
144
+ where_clause = Array(conditions)
145
+
146
+ # apply filters
147
+ filters.each do |key, value|
148
+ dimension_name, column = key.split('.')
149
+ where_clause << "#{dimension_name}.#{column} = #{cube_class.connection.quote(value)}"
150
+ end
151
+ sql += %Q(\nWHERE\n #{where_clause.join(" AND\n ")} ) if where_clause.length > 0
152
+
153
+ # for hierarchical dimension we need to add where clause in for drill downs
154
+ if !hierarchical_dimension.nil?
155
+ if where_clause.length == 0
156
+ sql += "\n WHERE "
157
+ else
158
+ sql += " \n AND "
159
+ end
160
+ sql += "\n #{hierarchical_dimension_name}.#{hierarchical_dimension.primary_key} IN ( "
161
+ sql += "\n SELECT #{hierarchical_dimension.parent_foreign_key} FROM #{hierarchical_dimension.bridge_class.table_name} "
162
+ if hierarchical_stage == 0
163
+ sql += "\n WHERE #{hierarchical_dimension.bridge_class.top_flag} = #{connection.send(:quote, hierarchical_dimension.bridge_class.top_flag_value)})"
164
+ else
165
+ sql += "\n WHERE #{hierarchical_dimension.child_foreign_key} = #{hierarchical_stage} AND #{hierarchical_dimension.levels_from_parent} = 1)"
166
+ end
167
+ end
168
+
169
+ sql += "\nGROUP BY\n"
170
+ sql += " #{column_dimension_name}.#{current_column_name},\n"
171
+ sql += " #{row_dimension_name}.#{current_row_name}"
172
+
173
+ if options[:order]
174
+ order_by = options[:order]
175
+ order_by = [order_by] if order_by.is_a?(String)
176
+ order_by.collect!{ |v| cube_class.connection.quote_column_name(order_by) }
177
+ sql += %Q(\nORDER BY\n #{order_by.join(",\n")})
178
+ end
179
+
180
+ result = ActiveWarehouse::CubeQueryResult.new(
181
+ cube_class.aggregate_fields
182
+ )
183
+
184
+ cube_class.connection.select_all(sql).each do |row|
185
+ result.add_data(row.delete(current_row_name.to_s),
186
+ row.delete(current_column_name.to_s),
187
+ row) # the rest of the members of row are the fact columns
188
+ end
189
+
190
+ result
191
+ end
192
+ end
193
+ end
194
+ end
@@ -0,0 +1,29 @@
1
+ module ActiveWarehouse #:nodoc:
2
+ module Aggregate #:nodoc:
3
+ # Implementation of a Partitioning and Inserting Dwarf algorithm as defined
4
+ # in http://www.zju.edu.cn/jzus/2005/A0506/A050608.pdf
5
+ class PidAggregate < Aggregate
6
+ include DwarfCommon
7
+
8
+ # Initialize the aggregate
9
+ def initialize(cube_class)
10
+ super
11
+ end
12
+
13
+ # Populate the aggregate
14
+ def populate
15
+ create_dwarf_cube(sorted_facts)
16
+ end
17
+
18
+ # Query the aggregate
19
+ def query(*args)
20
+ options = parse_query_args(*args)
21
+ end
22
+
23
+ def create_dwarf_cube(sorted_facts)
24
+
25
+ end
26
+
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,129 @@
1
+ require 'erb'
2
+
3
+ module ActiveWarehouse #:nodoc
4
+ module Aggregate #:nodoc
5
+ # A Pipelined implementation of a ROLAP engine that stores all possible
6
+ # combinations
7
+ # of fact and dimensional values for a specific cube.
8
+ #
9
+ # This implementation attempts to reduce the amount of work required
10
+ # by aggregating facts in a pipelined fashion. This means that smaller
11
+ # aggregates are generated from a preceding aggregate, in order to avoid
12
+ # having to query the entire raw data set for every aggregate.
13
+ #
14
+ # E.g.
15
+ #
16
+ # ABCD -> ABC -> AB -> A -> *all*
17
+ class PipelinedRolapAggregate < Aggregate
18
+ include RolapCommon
19
+
20
+ # Build and populate the data store
21
+ def populate(options={})
22
+ create_and_populate_all_table
23
+ create_all_pipelined_agg_tables
24
+ create_insert_statements.each_with_index do |insert, i|
25
+ next if i == 0 #handled by create_and_populate_all_table
26
+ connection.transaction {connection.execute(insert)}
27
+ end
28
+ end
29
+
30
+ protected
31
+
32
+ # build and populate a table which group by's all dimension columns.
33
+ def create_and_populate_all_table
34
+ dimension_column_names = dimensions_to_columns.collect do |c|
35
+ "#{c.table_alias}.#{c.name} as #{c.table_alias}_#{c.name}"
36
+ end
37
+
38
+ fact_column_names = aggregate_fields.collect do |c|
39
+ "#{c.from_table_name}.#{c.name} as #{c.label_for_table}"
40
+ end
41
+
42
+ sql = <<-SQL
43
+ SELECT
44
+ #{dimension_column_names.join(",")},
45
+ #{aggregated_fact_column_sql_for_all}
46
+ FROM #{tables_and_joins}
47
+ GROUP BY
48
+ #{dimensions_to_columns.collect{|c| "#{c.table_alias}.#{c.name}"}.join(",")}
49
+ SQL
50
+
51
+ all_table_name = indexed_rollup_table_name(dimension_column_names.length)
52
+
53
+ sql = connection.add_select_into_table(all_table_name, sql)
54
+
55
+ connection.drop_table(all_table_name) if connection.tables.include?(all_table_name)
56
+ connection.transaction { connection.execute(sql) }
57
+ end
58
+
59
+ def create_all_pipelined_agg_tables
60
+ (0..dimensions_to_columns.size-1).each do |i|
61
+ create_rollup_cube_table(i)
62
+ end
63
+ end
64
+
65
+ # Creates the rollup table
66
+ def create_rollup_cube_table(index)
67
+ table_name = indexed_rollup_table_name(index)
68
+ connection.drop_table(table_name) if connection.tables.include?(table_name)
69
+
70
+ ActiveRecord::Base.transaction do
71
+ connection.create_table(table_name, :id => false) do |t|
72
+ dimensions_to_columns.each do |c|
73
+ t.column(c.label, c.column_type)
74
+ end
75
+ aggregate_fields.each do |c|
76
+ options = {}
77
+ options[:limit] = c.column_type == :integer ? 8 : c.limit
78
+ options[:scale] = c.scale if c.scale
79
+ options[:precision] = c.precision if c.precision
80
+ t.column(c.label_for_table, c.column_type, options)
81
+ end
82
+ end
83
+ end
84
+ end
85
+
86
+ def create_insert_statements
87
+ dim_columns = dimensions_to_columns
88
+ template_filename = File.dirname(__FILE__) + "/templates/pipelined_rollup_#{dim_columns.length}.sql"
89
+ dim_columns.length.times do |i|
90
+ eval("@dimension_#{i} = '#{dim_columns[i].label}'")
91
+ end
92
+ @aggregate_fields_from_flat_table = aggregated_fact_column_sql_for_rollup
93
+ @aggregate_fields = aggregated_fact_column_sql_for_rollup
94
+ @flat_table_name = flat_table_name
95
+ @rollup_table_name = rollup_table_name
96
+
97
+ inserts = []
98
+
99
+ sql = ""
100
+ ERB.new(File.read(template_filename)).result(binding).each do |line|
101
+ if line.strip == ""
102
+ inserts << sql
103
+ sql = ""
104
+ else
105
+ sql += line
106
+ end
107
+ end
108
+
109
+ inserts
110
+ end
111
+
112
+ def indexed_rollup_table_name(index)
113
+ "#{rollup_table_name}_#{index}"
114
+ end
115
+
116
+ def aggregated_fact_column_sql_for_all
117
+ aggregate_fields.collect { |c|
118
+ "#{c.strategy_name}(#{c.name}) AS #{c.label_for_table}"
119
+ }.join(",")
120
+ end
121
+
122
+ def aggregated_fact_column_sql_for_rollup
123
+ aggregate_fields.collect { |c|
124
+ "#{c.strategy_name == :avg ? :avg : :sum}(#{c.label_for_table}) AS #{c.label_for_table}"
125
+ }.join(", ")
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,181 @@
1
+ # Source file that contains a basic ROLAP engine implementation.
2
+
3
+ module ActiveWarehouse #:nodoc
4
+ module Aggregate #:nodoc
5
+ # Basic implementation of a ROLAP engine that stores all possible combinations
6
+ # of fact and dimensional values for a specific cube.
7
+ class RolapAggregate < Aggregate
8
+ include RolapCommon
9
+
10
+ # Build and populate the data store
11
+ def populate(options={})
12
+ populate_rollup_cube
13
+ end
14
+
15
+ # Query the aggregate, returning a QueryResult object
16
+ def query(*args)
17
+ options = parse_query_args(*args)
18
+
19
+ column_dimension_name = options[:column_dimension_name]
20
+ column_hierarchy_name = options[:column_hierarchy_name]
21
+ row_dimension_name = options[:row_dimension_name]
22
+ row_hierarchy_name = options[:row_hierarchy_name]
23
+ conditions = options[:conditions]
24
+ cstage = options[:cstage]
25
+ rstage = options[:rstage]
26
+ filters = options[:filters]
27
+
28
+ column_dimension = fact_class.dimension_class(column_dimension_name)
29
+ column_hierarchy = column_dimension.hierarchy(column_hierarchy_name)
30
+ row_dimension = fact_class.dimension_class(row_dimension_name)
31
+ row_hierarchy = row_dimension.hierarchy(row_hierarchy_name)
32
+ current_column_name = column_hierarchy[cstage]
33
+ current_row_name = row_hierarchy[rstage]
34
+ full_column_name = "#{column_dimension_name}_#{current_column_name}"
35
+ full_row_name = "#{row_dimension_name}_#{current_row_name}"
36
+
37
+ # build the SQL query
38
+ sql = ''
39
+ sql += 'SELECT '
40
+ sql += "#{full_column_name} AS #{current_column_name},"
41
+ sql += "#{full_row_name} AS #{current_row_name},"
42
+ sql += aggregate_fields.collect{|c| "#{c.label_for_table} as '#{c.label}'"}.join(",")
43
+ sql += " FROM #{rollup_table_name} "
44
+
45
+ # build the where clause
46
+ where_clause = []
47
+ 0.upto(column_hierarchy.length - 1) do |stage|
48
+ column_name = column_hierarchy[stage]
49
+ name = "#{column_dimension_name}_#{column_name}"
50
+ filter_value = filters.delete(column_name)
51
+ if filter_value
52
+ where_clause << "#{name} = '#{filter_value}'" # TODO: protect from
53
+ # SQL injection
54
+ else
55
+ where_clause << "#{name} is null" if stage > cstage
56
+ end
57
+ end
58
+ 0.upto(row_hierarchy.length - 1) do |stage|
59
+ row_name = row_hierarchy[stage]
60
+ name = "#{row_dimension_name}_#{row_name}"
61
+ filter_value = filters.delete(row_name)
62
+ if filter_value
63
+ where_clause << "#{name} = '#{filter_value}'" # TODO: protect from
64
+ # SQL injection
65
+ else
66
+ where_clause << "#{name} is null" if stage > rstage
67
+ end
68
+ end
69
+ where_clause << "#{full_column_name} is not null"
70
+ where_clause << "#{full_row_name} is not null"
71
+ filters.each do |key, value|
72
+ dimension_name, column = key.split('.')
73
+ where_clause << "#{dimension_name}_#{column} = '#{value}'" # TODO: protect from SQL injection
74
+ end
75
+ sql += %Q( WHERE #{where_clause.join(" AND ")} ) if where_clause.length > 0
76
+
77
+ if conditions
78
+ sql += "\n WHERE\n" unless sql =~ /WHERE/i
79
+ sql += conditions
80
+ end
81
+
82
+ # execute the query and return the results as a CubeQueryResult object
83
+ result = ActiveWarehouse::CubeQueryResult.new(
84
+ aggregate_fields
85
+ )
86
+ rows = connection.select_all(sql)
87
+ # fact_column_names = fact_class.aggregate_fields.collect{|f| f.to_s}
88
+ rows.each do |row|
89
+ result.add_data(row.delete(current_row_name.to_s),
90
+ row.delete(current_column_name.to_s),
91
+ row) # the rest of the members of row are the fact columns
92
+ end
93
+ result
94
+ end
95
+
96
+ protected
97
+
98
+ # Creates the rollup table
99
+ def create_rollup_cube_table(options={})
100
+ # TODO: perhaps this should all be executed in a single transaction?
101
+ connection.drop_table(rollup_table_name) if connection.tables.include?(rollup_table_name)
102
+
103
+ ActiveRecord::Base.transaction do
104
+ connection.create_table(rollup_table_name, :id => false) do |t|
105
+ dimensions_to_columns.each do |c|
106
+ t.column(c.label, c.column_type)
107
+ end
108
+ aggregate_fields.each do |c|
109
+ t.column(c.label_for_table, c.column_type)
110
+ end
111
+ end
112
+ end
113
+ end
114
+
115
+ # Builds the aggregate SQL that will be used to populate the ROLAP table.
116
+ # This SQL is just the SELECT statement and includes all of the GROUP BYs
117
+ # and aggregation functions.
118
+ #
119
+ # +column_mask+ is an array of booleans, where true is the column to group
120
+ # by. The length of this array is equal to the number of columns in
121
+ # the SELECT clause.
122
+ def build_aggregate_sql(column_mask)
123
+ dimension_column_names = dimensions_to_columns.collect do |c|
124
+ "#{c.table_alias}.#{c.name}"
125
+ end
126
+
127
+ sql = <<-SQL
128
+ SELECT
129
+ #{mask_columns_with_null(dimension_column_names, column_mask).join(",")},
130
+ #{aggregated_fact_column_sql}
131
+ FROM #{tables_and_joins}
132
+ SQL
133
+
134
+ group = mask_columns_with_null(dimension_column_names, column_mask).reject{|o| o == 'null'}.join(",")
135
+ sql += "GROUP BY #{group}" if !group.empty?
136
+
137
+ sql
138
+ end
139
+
140
+ # Populate the rollup cube
141
+ #
142
+ # Options:
143
+ # * <tt>:verbose</tt>: Set to true to print info to STDOUT during building
144
+ def populate_rollup_cube(options={})
145
+ create_rollup_cube_table(options)
146
+ puts "Populating rollup cube #{cube_class.name}" if options[:verbose]
147
+
148
+ num_columns = dimensions_to_columns.size
149
+ num_combos = (2**num_columns)-1
150
+ puts "There are #{num_combos} combinations" if options[:verbose]
151
+ (0..num_combos).each do |i|
152
+ puts "Populating agg #{i} of #{num_combos}" if i % 100 == 0 if options[:verbose]
153
+ mask = sprintf("%0#{num_columns}b", i).split(//).collect{|x| x == '1' ? true : false}
154
+
155
+ sql = ''
156
+ sql += "INSERT INTO #{rollup_table_name} "
157
+ sql += build_aggregate_sql(mask)
158
+
159
+ connection.transaction { connection.execute(sql) }
160
+ end
161
+
162
+ if options[:verbose]
163
+ row_count = connection.select_value("SELECT count(*) FROM #{rollup_table_name}")
164
+ puts "Rollup cube populated with #{row_count} rows"
165
+ end
166
+ end
167
+
168
+ # Mask columns with null
169
+ def mask_columns_with_null(column_names, mask)
170
+ if mask.size != column_names.size
171
+ raise "Columns has #{column_names.size} elements, but mask has only #{mask.size}"
172
+ end
173
+
174
+ new_columns = []
175
+ column_names.each_with_index{ |c,i| new_columns << (mask[i] ? c : 'null')}
176
+ new_columns
177
+ end
178
+
179
+ end
180
+ end
181
+ end