activewarehouse 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/README +27 -14
  2. data/Rakefile +16 -5
  3. data/doc/references.txt +4 -0
  4. data/generators/bridge/templates/migration.rb +9 -2
  5. data/generators/bridge/templates/unit_test.rb +8 -0
  6. data/generators/date_dimension/USAGE +1 -0
  7. data/generators/date_dimension/date_dimension_generator.rb +16 -0
  8. data/generators/date_dimension/templates/fixture.yml +5 -0
  9. data/generators/date_dimension/templates/migration.rb +31 -0
  10. data/generators/date_dimension/templates/model.rb +3 -0
  11. data/generators/date_dimension/templates/unit_test.rb +8 -0
  12. data/generators/dimension/templates/migration.rb +1 -10
  13. data/generators/dimension_view/dimension_view_generator.rb +2 -2
  14. data/generators/dimension_view/templates/migration.rb +8 -2
  15. data/generators/fact/templates/migration.rb +2 -0
  16. data/generators/time_dimension/USAGE +1 -0
  17. data/generators/time_dimension/templates/fixture.yml +5 -0
  18. data/generators/time_dimension/templates/migration.rb +12 -0
  19. data/generators/time_dimension/templates/model.rb +3 -0
  20. data/generators/time_dimension/templates/unit_test.rb +8 -0
  21. data/generators/time_dimension/time_dimension_generator.rb +14 -0
  22. data/lib/active_warehouse.rb +13 -2
  23. data/lib/active_warehouse/aggregate.rb +54 -253
  24. data/lib/active_warehouse/aggregate/dwarf/node.rb +36 -0
  25. data/lib/active_warehouse/aggregate/dwarf_aggregate.rb +369 -0
  26. data/lib/active_warehouse/aggregate/dwarf_common.rb +44 -0
  27. data/lib/active_warehouse/aggregate/dwarf_printer.rb +34 -0
  28. data/lib/active_warehouse/aggregate/no_aggregate.rb +194 -0
  29. data/lib/active_warehouse/aggregate/pid_aggregate.rb +29 -0
  30. data/lib/active_warehouse/aggregate/pipelined_rolap_aggregate.rb +129 -0
  31. data/lib/active_warehouse/aggregate/rolap_aggregate.rb +181 -0
  32. data/lib/active_warehouse/aggregate/rolap_common.rb +89 -0
  33. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_1.sql +12 -0
  34. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_10.sql +7166 -0
  35. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_11.sql +14334 -0
  36. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_12.sql +28670 -0
  37. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_13.sql +57342 -0
  38. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_2.sql +26 -0
  39. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_3.sql +54 -0
  40. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_4.sql +110 -0
  41. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_5.sql +222 -0
  42. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_6.sql +446 -0
  43. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_7.sql +894 -0
  44. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_8.sql +1790 -0
  45. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_9.sql +3582 -0
  46. data/lib/active_warehouse/aggregate_field.rb +49 -0
  47. data/lib/active_warehouse/{dimension/bridge.rb → bridge.rb} +7 -3
  48. data/lib/active_warehouse/bridge/hierarchy_bridge.rb +46 -0
  49. data/lib/active_warehouse/builder.rb +2 -1
  50. data/lib/active_warehouse/builder/date_dimension_builder.rb +5 -2
  51. data/lib/active_warehouse/builder/generator/generator.rb +13 -0
  52. data/lib/active_warehouse/builder/generator/name_generator.rb +20 -0
  53. data/lib/active_warehouse/builder/generator/paragraph_generator.rb +11 -0
  54. data/lib/active_warehouse/builder/random_data_builder.rb +21 -11
  55. data/lib/active_warehouse/builder/test_data_builder.rb +54 -0
  56. data/lib/active_warehouse/calculated_field.rb +27 -0
  57. data/lib/active_warehouse/compat/compat.rb +4 -4
  58. data/lib/active_warehouse/cube.rb +126 -225
  59. data/lib/active_warehouse/cube_query_result.rb +69 -0
  60. data/lib/active_warehouse/dimension.rb +64 -29
  61. data/lib/active_warehouse/dimension/date_dimension.rb +15 -0
  62. data/lib/active_warehouse/dimension/dimension_reflection.rb +21 -0
  63. data/lib/active_warehouse/dimension/dimension_view.rb +17 -2
  64. data/lib/active_warehouse/dimension/hierarchical_dimension.rb +43 -5
  65. data/lib/active_warehouse/dimension/slowly_changing_dimension.rb +22 -12
  66. data/lib/active_warehouse/fact.rb +119 -40
  67. data/lib/active_warehouse/field.rb +74 -0
  68. data/lib/active_warehouse/ordered_hash.rb +34 -0
  69. data/lib/active_warehouse/prejoin_fact.rb +97 -0
  70. data/lib/active_warehouse/report/abstract_report.rb +40 -14
  71. data/lib/active_warehouse/report/chart_report.rb +3 -3
  72. data/lib/active_warehouse/report/table_report.rb +8 -3
  73. data/lib/active_warehouse/version.rb +1 -1
  74. data/lib/active_warehouse/view/report_helper.rb +144 -34
  75. data/tasks/active_warehouse_tasks.rake +28 -10
  76. metadata +107 -30
@@ -0,0 +1,34 @@
1
+ module ActiveWarehouse #:nodoc:
2
+ module Aggregate #:nodoc:
3
+ # Dwarf support class that prints a representation of the Dwarf
4
+ class DwarfPrinter
5
+ # Print the specified node at the given depth.
6
+ def self.print_node(node, depth=0, recurse=true)
7
+ #puts "printing node #{node.index}"
8
+ cells = node.cells.collect { |c| cell_to_string(c)}.join('|')
9
+
10
+ parent_node = node.parent ? "#{cell_to_string(node.parent)}:" : ''
11
+ puts "#{node.index}=#{' '*depth}#{parent_node}[#{cells}|#{all_cell_to_string(node.all_cell)}]"
12
+ if !node.leaf?
13
+ print_node(node.all_cell.child, depth + 1, false) if node.all_cell
14
+ end
15
+ if recurse
16
+ node.children.each { |child| print_node(child, depth+1) }
17
+ end
18
+ end
19
+
20
+ def self.cell_to_string(cell)
21
+ # a new String object must be created here, otherwise to_s returns a reference
22
+ # to the same String object each time and thus the value will be appended each time
23
+ # which is not what I want
24
+ s = String.new(cell.key.to_s)
25
+ s << " #{cell.value.join(',')}" if cell.node.leaf?
26
+ s
27
+ end
28
+
29
+ def self.all_cell_to_string(cell)
30
+ cell ? (cell.value ? cell.value.inspect : '') : ''
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,194 @@
1
+ require 'set'
2
+
3
+ module ActiveWarehouse #:nodoc:
4
+ module Aggregate #:nodoc:
5
+ # An aggregate which goes directly to the fact and dimensions to answer questions
6
+ class NoAggregate < Aggregate
7
+ # Populate the aggregate (in this case it is a no-op implementation)
8
+ def populate
9
+ # do nothing
10
+ end
11
+
12
+ # Query the aggregate
13
+ # def query(column_dimension_name, column_hierarchy_name,
14
+ # row_dimension_name, row_hierarchy_name, conditions=nil,
15
+ # cstage=0, rstage=0, filters={})
16
+
17
+ # Query the aggregate
18
+ def query(*args)
19
+ options = parse_query_args(*args)
20
+
21
+ column_dimension_name = options[:column_dimension_name]
22
+ column_hierarchy_name = options[:column_hierarchy_name]
23
+ row_dimension_name = options[:row_dimension_name]
24
+ row_hierarchy_name = options[:row_hierarchy_name]
25
+ conditions = options[:conditions]
26
+ cstage = options[:cstage] || 0
27
+ rstage = options[:rstage] || 0
28
+ filters = options[:filters] || {}
29
+
30
+ fact_class = cube_class.fact_class
31
+ column_dimension = fact_class.dimension_class(column_dimension_name)
32
+ column_hierarchy = column_dimension.hierarchy(column_hierarchy_name)
33
+ row_dimension = fact_class.dimension_class(row_dimension_name)
34
+ row_hierarchy = row_dimension.hierarchy(row_hierarchy_name)
35
+
36
+ used_dimensions = Set.new
37
+ used_dimensions.merge([column_dimension_name, row_dimension_name])
38
+ row_dim_reflection = fact_class.dimension_relationships[row_dimension_name].dependent_dimension_reflections
39
+ used_dimensions.merge(row_dim_reflection.collect{|d| d.name})
40
+ col_dim_reflection = fact_class.dimension_relationships[column_dimension_name].dependent_dimension_reflections
41
+ used_dimensions.merge(col_dim_reflection.collect{|d| d.name})
42
+ filters.each do |k,v|
43
+ used_dimensions << k.split('.')[0]
44
+ end
45
+ if conditions
46
+ cube_class.dimensions.each do |dimension|
47
+ if conditions =~ /#{dimension}\./i
48
+ used_dimensions << dimension
49
+ end
50
+ end
51
+ end
52
+
53
+ # This method assumes at most one dimension is hierarchical dimension
54
+ # in the query params. TODO: need to handle when both row and column
55
+ # are hierarchical dimensions.
56
+ hierarchical_dimension = nil
57
+ hierarchical_dimension_name = nil
58
+ hierarchical_stage = nil
59
+
60
+ if !column_dimension.hierarchical_dimension?
61
+ current_column_name = column_hierarchy[cstage]
62
+ else
63
+ hierarchical_dimension = column_dimension
64
+ hierarchical_dimension_name = column_dimension_name
65
+ hierarchical_stage = cstage
66
+ current_column_name = column_hierarchy[0]
67
+ end
68
+
69
+ if !row_dimension.hierarchical_dimension?
70
+ current_row_name = row_hierarchy[rstage]
71
+ else
72
+ hierarchical_dimension = row_dimension
73
+ hierarchical_dimension_name = row_dimension_name
74
+ hierarchical_stage = rstage
75
+ current_row_name = row_hierarchy[0]
76
+ end
77
+
78
+ fact_columns = cube_class.aggregate_fields.collect { |c|
79
+ agg_sql = ''
80
+ quoted_label = cube_class.connection.quote_column_name(c.label)
81
+ if hierarchical_dimension and !c.levels_from_parent.empty?
82
+ bridge = hierarchical_dimension.bridge_class
83
+ bridge_table_name = bridge.table_name
84
+ levels_from_parent = bridge.levels_from_parent
85
+ get_all = false
86
+ c.levels_from_parent.each do |level|
87
+ case level
88
+ when :all
89
+ agg_sql += " #{c.strategy_name}(#{c.from_table_name}.#{c.name}) AS #{quoted_label})"
90
+ get_all = true
91
+ when :self
92
+ agg_sql += " #{c.strategy_name}(CASE " if agg_sql.length == 0
93
+ agg_sql += " WHEN #{bridge_table_name}.#{levels_from_parent} = 0 THEN #{c.from_table_name}.#{c.name} \n"
94
+ when Integer
95
+ agg_sql += " #{c.strategy_name}(CASE " if agg_sql.length == 0
96
+ agg_sql += " WHEN #{bridge_table_name}.#{levels_from_parent} = #{level} then #{c.from_table_name}.#{c.name} \n"
97
+ else
98
+ raise ArgumentError, "Each element to :levels_from_parent option must be :all, :self, or Integer"
99
+ end
100
+ end
101
+ agg_sql += " ELSE 0 END) AS #{quoted_label}" unless get_all
102
+ else
103
+ agg_sql = " #{c.strategy_name}(#{c.from_table_name}.#{c.name}) AS #{quoted_label}"
104
+ end
105
+ agg_sql
106
+ }.join(",\n")
107
+
108
+ sql = ''
109
+ sql += "SELECT\n"
110
+ sql += " #{column_dimension_name}.#{current_column_name},\n"
111
+ sql += " #{row_dimension_name}.#{current_row_name},\n"
112
+ sql += fact_columns
113
+ sql += "\nFROM\n"
114
+
115
+ sql += " #{fact_class.table_name}"
116
+ cube_class.dimensions_hierarchies.each do |dimension_name, hierarchy_names|
117
+ next if !used_dimensions.include?(dimension_name)
118
+ dimension = fact_class.dimension_class(dimension_name)
119
+ if !dimension.hierarchical_dimension?
120
+ sql += "\nJOIN #{dimension.table_name} as #{dimension_name}"
121
+ sql += "\n ON #{fact_class.table_name}.#{fact_class.foreign_key_for(dimension_name)} = "
122
+ sql += "#{dimension_name}.#{dimension.primary_key}"
123
+ else
124
+ dimension_bridge = dimension.bridge_class
125
+ sql += "\nJOIN #{dimension_bridge.table_name}"
126
+ sql += "\n ON #{fact_class.table_name}.#{fact_class.foreign_key_for(dimension_name)} = "
127
+ sql += "#{dimension_bridge.table_name}.#{dimension.parent_foreign_key}"
128
+ if dimension.slowly_changing_dimension?
129
+ sql += " and (#{dimension_bridge.table_name}.#{dimension_bridge.effective_date} <= "
130
+ sql += "#{fact_class.slowly_changes_over_name(dimension_name)}."
131
+ sql += "#{fact_class.slowly_changes_over_class(dimension_name).sql_date_stamp} "
132
+ sql += "and #{dimension_bridge.table_name}.#{dimension_bridge.expiration_date} >= "
133
+ sql += "#{fact_class.slowly_changes_over_name(dimension_name)}."
134
+ sql += "#{fact_class.slowly_changes_over_class(dimension_name).sql_date_stamp}) "
135
+ end
136
+ sql += "\nJOIN #{dimension.table_name} as #{dimension_name}"
137
+ sql += "\n ON #{dimension_bridge.table_name}.#{dimension.child_foreign_key} = "
138
+ sql += "#{dimension_name}.#{dimension.primary_key}"
139
+ end
140
+ end
141
+
142
+ # build the where clause
143
+ # first add conditions
144
+ where_clause = Array(conditions)
145
+
146
+ # apply filters
147
+ filters.each do |key, value|
148
+ dimension_name, column = key.split('.')
149
+ where_clause << "#{dimension_name}.#{column} = #{cube_class.connection.quote(value)}"
150
+ end
151
+ sql += %Q(\nWHERE\n #{where_clause.join(" AND\n ")} ) if where_clause.length > 0
152
+
153
+ # for hierarchical dimension we need to add where clause in for drill downs
154
+ if !hierarchical_dimension.nil?
155
+ if where_clause.length == 0
156
+ sql += "\n WHERE "
157
+ else
158
+ sql += " \n AND "
159
+ end
160
+ sql += "\n #{hierarchical_dimension_name}.#{hierarchical_dimension.primary_key} IN ( "
161
+ sql += "\n SELECT #{hierarchical_dimension.parent_foreign_key} FROM #{hierarchical_dimension.bridge_class.table_name} "
162
+ if hierarchical_stage == 0
163
+ sql += "\n WHERE #{hierarchical_dimension.bridge_class.top_flag} = #{connection.send(:quote, hierarchical_dimension.bridge_class.top_flag_value)})"
164
+ else
165
+ sql += "\n WHERE #{hierarchical_dimension.child_foreign_key} = #{hierarchical_stage} AND #{hierarchical_dimension.levels_from_parent} = 1)"
166
+ end
167
+ end
168
+
169
+ sql += "\nGROUP BY\n"
170
+ sql += " #{column_dimension_name}.#{current_column_name},\n"
171
+ sql += " #{row_dimension_name}.#{current_row_name}"
172
+
173
+ if options[:order]
174
+ order_by = options[:order]
175
+ order_by = [order_by] if order_by.is_a?(String)
176
+ order_by.collect!{ |v| cube_class.connection.quote_column_name(order_by) }
177
+ sql += %Q(\nORDER BY\n #{order_by.join(",\n")})
178
+ end
179
+
180
+ result = ActiveWarehouse::CubeQueryResult.new(
181
+ cube_class.aggregate_fields
182
+ )
183
+
184
+ cube_class.connection.select_all(sql).each do |row|
185
+ result.add_data(row.delete(current_row_name.to_s),
186
+ row.delete(current_column_name.to_s),
187
+ row) # the rest of the members of row are the fact columns
188
+ end
189
+
190
+ result
191
+ end
192
+ end
193
+ end
194
+ end
@@ -0,0 +1,29 @@
1
+ module ActiveWarehouse #:nodoc:
2
+ module Aggregate #:nodoc:
3
+ # Implementation of a Partitioning and Inserting Dwarf algorithm as defined
4
+ # in http://www.zju.edu.cn/jzus/2005/A0506/A050608.pdf
5
+ class PidAggregate < Aggregate
6
+ include DwarfCommon
7
+
8
+ # Initialize the aggregate
9
+ def initialize(cube_class)
10
+ super
11
+ end
12
+
13
+ # Populate the aggregate
14
+ def populate
15
+ create_dwarf_cube(sorted_facts)
16
+ end
17
+
18
+ # Query the aggregate
19
+ def query(*args)
20
+ options = parse_query_args(*args)
21
+ end
22
+
23
+ def create_dwarf_cube(sorted_facts)
24
+
25
+ end
26
+
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,129 @@
1
+ require 'erb'
2
+
3
+ module ActiveWarehouse #:nodoc
4
+ module Aggregate #:nodoc
5
+ # A Pipelined implementation of a ROLAP engine that stores all possible
6
+ # combinations
7
+ # of fact and dimensional values for a specific cube.
8
+ #
9
+ # This implementation attempts to reduce the amount of work required
10
+ # by aggregating facts in a pipelined fashion. This means that smaller
11
+ # aggregates are generated from a preceding aggregate, in order to avoid
12
+ # having to query the entire raw data set for every aggregate.
13
+ #
14
+ # E.g.
15
+ #
16
+ # ABCD -> ABC -> AB -> A -> *all*
17
+ class PipelinedRolapAggregate < Aggregate
18
+ include RolapCommon
19
+
20
+ # Build and populate the data store
21
+ def populate(options={})
22
+ create_and_populate_all_table
23
+ create_all_pipelined_agg_tables
24
+ create_insert_statements.each_with_index do |insert, i|
25
+ next if i == 0 #handled by create_and_populate_all_table
26
+ connection.transaction {connection.execute(insert)}
27
+ end
28
+ end
29
+
30
+ protected
31
+
32
+ # build and populate a table which group by's all dimension columns.
33
+ def create_and_populate_all_table
34
+ dimension_column_names = dimensions_to_columns.collect do |c|
35
+ "#{c.table_alias}.#{c.name} as #{c.table_alias}_#{c.name}"
36
+ end
37
+
38
+ fact_column_names = aggregate_fields.collect do |c|
39
+ "#{c.from_table_name}.#{c.name} as #{c.label_for_table}"
40
+ end
41
+
42
+ sql = <<-SQL
43
+ SELECT
44
+ #{dimension_column_names.join(",")},
45
+ #{aggregated_fact_column_sql_for_all}
46
+ FROM #{tables_and_joins}
47
+ GROUP BY
48
+ #{dimensions_to_columns.collect{|c| "#{c.table_alias}.#{c.name}"}.join(",")}
49
+ SQL
50
+
51
+ all_table_name = indexed_rollup_table_name(dimension_column_names.length)
52
+
53
+ sql = connection.add_select_into_table(all_table_name, sql)
54
+
55
+ connection.drop_table(all_table_name) if connection.tables.include?(all_table_name)
56
+ connection.transaction { connection.execute(sql) }
57
+ end
58
+
59
+ def create_all_pipelined_agg_tables
60
+ (0..dimensions_to_columns.size-1).each do |i|
61
+ create_rollup_cube_table(i)
62
+ end
63
+ end
64
+
65
+ # Creates the rollup table
66
+ def create_rollup_cube_table(index)
67
+ table_name = indexed_rollup_table_name(index)
68
+ connection.drop_table(table_name) if connection.tables.include?(table_name)
69
+
70
+ ActiveRecord::Base.transaction do
71
+ connection.create_table(table_name, :id => false) do |t|
72
+ dimensions_to_columns.each do |c|
73
+ t.column(c.label, c.column_type)
74
+ end
75
+ aggregate_fields.each do |c|
76
+ options = {}
77
+ options[:limit] = c.column_type == :integer ? 8 : c.limit
78
+ options[:scale] = c.scale if c.scale
79
+ options[:precision] = c.precision if c.precision
80
+ t.column(c.label_for_table, c.column_type, options)
81
+ end
82
+ end
83
+ end
84
+ end
85
+
86
+ def create_insert_statements
87
+ dim_columns = dimensions_to_columns
88
+ template_filename = File.dirname(__FILE__) + "/templates/pipelined_rollup_#{dim_columns.length}.sql"
89
+ dim_columns.length.times do |i|
90
+ eval("@dimension_#{i} = '#{dim_columns[i].label}'")
91
+ end
92
+ @aggregate_fields_from_flat_table = aggregated_fact_column_sql_for_rollup
93
+ @aggregate_fields = aggregated_fact_column_sql_for_rollup
94
+ @flat_table_name = flat_table_name
95
+ @rollup_table_name = rollup_table_name
96
+
97
+ inserts = []
98
+
99
+ sql = ""
100
+ ERB.new(File.read(template_filename)).result(binding).each do |line|
101
+ if line.strip == ""
102
+ inserts << sql
103
+ sql = ""
104
+ else
105
+ sql += line
106
+ end
107
+ end
108
+
109
+ inserts
110
+ end
111
+
112
+ def indexed_rollup_table_name(index)
113
+ "#{rollup_table_name}_#{index}"
114
+ end
115
+
116
+ def aggregated_fact_column_sql_for_all
117
+ aggregate_fields.collect { |c|
118
+ "#{c.strategy_name}(#{c.name}) AS #{c.label_for_table}"
119
+ }.join(",")
120
+ end
121
+
122
+ def aggregated_fact_column_sql_for_rollup
123
+ aggregate_fields.collect { |c|
124
+ "#{c.strategy_name == :avg ? :avg : :sum}(#{c.label_for_table}) AS #{c.label_for_table}"
125
+ }.join(", ")
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,181 @@
1
+ # Source file that contains a basic ROLAP engine implementation.
2
+
3
+ module ActiveWarehouse #:nodoc
4
+ module Aggregate #:nodoc
5
+ # Basic implementation of a ROLAP engine that stores all possible combinations
6
+ # of fact and dimensional values for a specific cube.
7
+ class RolapAggregate < Aggregate
8
+ include RolapCommon
9
+
10
+ # Build and populate the data store
11
+ def populate(options={})
12
+ populate_rollup_cube
13
+ end
14
+
15
+ # Query the aggregate, returning a QueryResult object
16
+ def query(*args)
17
+ options = parse_query_args(*args)
18
+
19
+ column_dimension_name = options[:column_dimension_name]
20
+ column_hierarchy_name = options[:column_hierarchy_name]
21
+ row_dimension_name = options[:row_dimension_name]
22
+ row_hierarchy_name = options[:row_hierarchy_name]
23
+ conditions = options[:conditions]
24
+ cstage = options[:cstage]
25
+ rstage = options[:rstage]
26
+ filters = options[:filters]
27
+
28
+ column_dimension = fact_class.dimension_class(column_dimension_name)
29
+ column_hierarchy = column_dimension.hierarchy(column_hierarchy_name)
30
+ row_dimension = fact_class.dimension_class(row_dimension_name)
31
+ row_hierarchy = row_dimension.hierarchy(row_hierarchy_name)
32
+ current_column_name = column_hierarchy[cstage]
33
+ current_row_name = row_hierarchy[rstage]
34
+ full_column_name = "#{column_dimension_name}_#{current_column_name}"
35
+ full_row_name = "#{row_dimension_name}_#{current_row_name}"
36
+
37
+ # build the SQL query
38
+ sql = ''
39
+ sql += 'SELECT '
40
+ sql += "#{full_column_name} AS #{current_column_name},"
41
+ sql += "#{full_row_name} AS #{current_row_name},"
42
+ sql += aggregate_fields.collect{|c| "#{c.label_for_table} as '#{c.label}'"}.join(",")
43
+ sql += " FROM #{rollup_table_name} "
44
+
45
+ # build the where clause
46
+ where_clause = []
47
+ 0.upto(column_hierarchy.length - 1) do |stage|
48
+ column_name = column_hierarchy[stage]
49
+ name = "#{column_dimension_name}_#{column_name}"
50
+ filter_value = filters.delete(column_name)
51
+ if filter_value
52
+ where_clause << "#{name} = '#{filter_value}'" # TODO: protect from
53
+ # SQL injection
54
+ else
55
+ where_clause << "#{name} is null" if stage > cstage
56
+ end
57
+ end
58
+ 0.upto(row_hierarchy.length - 1) do |stage|
59
+ row_name = row_hierarchy[stage]
60
+ name = "#{row_dimension_name}_#{row_name}"
61
+ filter_value = filters.delete(row_name)
62
+ if filter_value
63
+ where_clause << "#{name} = '#{filter_value}'" # TODO: protect from
64
+ # SQL injection
65
+ else
66
+ where_clause << "#{name} is null" if stage > rstage
67
+ end
68
+ end
69
+ where_clause << "#{full_column_name} is not null"
70
+ where_clause << "#{full_row_name} is not null"
71
+ filters.each do |key, value|
72
+ dimension_name, column = key.split('.')
73
+ where_clause << "#{dimension_name}_#{column} = '#{value}'" # TODO: protect from SQL injection
74
+ end
75
+ sql += %Q( WHERE #{where_clause.join(" AND ")} ) if where_clause.length > 0
76
+
77
+ if conditions
78
+ sql += "\n WHERE\n" unless sql =~ /WHERE/i
79
+ sql += conditions
80
+ end
81
+
82
+ # execute the query and return the results as a CubeQueryResult object
83
+ result = ActiveWarehouse::CubeQueryResult.new(
84
+ aggregate_fields
85
+ )
86
+ rows = connection.select_all(sql)
87
+ # fact_column_names = fact_class.aggregate_fields.collect{|f| f.to_s}
88
+ rows.each do |row|
89
+ result.add_data(row.delete(current_row_name.to_s),
90
+ row.delete(current_column_name.to_s),
91
+ row) # the rest of the members of row are the fact columns
92
+ end
93
+ result
94
+ end
95
+
96
+ protected
97
+
98
+ # Creates the rollup table
99
+ def create_rollup_cube_table(options={})
100
+ # TODO: perhaps this should all be executed in a single transaction?
101
+ connection.drop_table(rollup_table_name) if connection.tables.include?(rollup_table_name)
102
+
103
+ ActiveRecord::Base.transaction do
104
+ connection.create_table(rollup_table_name, :id => false) do |t|
105
+ dimensions_to_columns.each do |c|
106
+ t.column(c.label, c.column_type)
107
+ end
108
+ aggregate_fields.each do |c|
109
+ t.column(c.label_for_table, c.column_type)
110
+ end
111
+ end
112
+ end
113
+ end
114
+
115
+ # Builds the aggregate SQL that will be used to populate the ROLAP table.
116
+ # This SQL is just the SELECT statement and includes all of the GROUP BYs
117
+ # and aggregation functions.
118
+ #
119
+ # +column_mask+ is an array of booleans, where true is the column to group
120
+ # by. The length of this array is equal to the number of columns in
121
+ # the SELECT clause.
122
+ def build_aggregate_sql(column_mask)
123
+ dimension_column_names = dimensions_to_columns.collect do |c|
124
+ "#{c.table_alias}.#{c.name}"
125
+ end
126
+
127
+ sql = <<-SQL
128
+ SELECT
129
+ #{mask_columns_with_null(dimension_column_names, column_mask).join(",")},
130
+ #{aggregated_fact_column_sql}
131
+ FROM #{tables_and_joins}
132
+ SQL
133
+
134
+ group = mask_columns_with_null(dimension_column_names, column_mask).reject{|o| o == 'null'}.join(",")
135
+ sql += "GROUP BY #{group}" if !group.empty?
136
+
137
+ sql
138
+ end
139
+
140
+ # Populate the rollup cube
141
+ #
142
+ # Options:
143
+ # * <tt>:verbose</tt>: Set to true to print info to STDOUT during building
144
+ def populate_rollup_cube(options={})
145
+ create_rollup_cube_table(options)
146
+ puts "Populating rollup cube #{cube_class.name}" if options[:verbose]
147
+
148
+ num_columns = dimensions_to_columns.size
149
+ num_combos = (2**num_columns)-1
150
+ puts "There are #{num_combos} combinations" if options[:verbose]
151
+ (0..num_combos).each do |i|
152
+ puts "Populating agg #{i} of #{num_combos}" if i % 100 == 0 if options[:verbose]
153
+ mask = sprintf("%0#{num_columns}b", i).split(//).collect{|x| x == '1' ? true : false}
154
+
155
+ sql = ''
156
+ sql += "INSERT INTO #{rollup_table_name} "
157
+ sql += build_aggregate_sql(mask)
158
+
159
+ connection.transaction { connection.execute(sql) }
160
+ end
161
+
162
+ if options[:verbose]
163
+ row_count = connection.select_value("SELECT count(*) FROM #{rollup_table_name}")
164
+ puts "Rollup cube populated with #{row_count} rows"
165
+ end
166
+ end
167
+
168
+ # Mask columns with null
169
+ def mask_columns_with_null(column_names, mask)
170
+ if mask.size != column_names.size
171
+ raise "Columns has #{column_names.size} elements, but mask has only #{mask.size}"
172
+ end
173
+
174
+ new_columns = []
175
+ column_names.each_with_index{ |c,i| new_columns << (mask[i] ? c : 'null')}
176
+ new_columns
177
+ end
178
+
179
+ end
180
+ end
181
+ end