activewarehouse 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/README +27 -14
  2. data/Rakefile +16 -5
  3. data/doc/references.txt +4 -0
  4. data/generators/bridge/templates/migration.rb +9 -2
  5. data/generators/bridge/templates/unit_test.rb +8 -0
  6. data/generators/date_dimension/USAGE +1 -0
  7. data/generators/date_dimension/date_dimension_generator.rb +16 -0
  8. data/generators/date_dimension/templates/fixture.yml +5 -0
  9. data/generators/date_dimension/templates/migration.rb +31 -0
  10. data/generators/date_dimension/templates/model.rb +3 -0
  11. data/generators/date_dimension/templates/unit_test.rb +8 -0
  12. data/generators/dimension/templates/migration.rb +1 -10
  13. data/generators/dimension_view/dimension_view_generator.rb +2 -2
  14. data/generators/dimension_view/templates/migration.rb +8 -2
  15. data/generators/fact/templates/migration.rb +2 -0
  16. data/generators/time_dimension/USAGE +1 -0
  17. data/generators/time_dimension/templates/fixture.yml +5 -0
  18. data/generators/time_dimension/templates/migration.rb +12 -0
  19. data/generators/time_dimension/templates/model.rb +3 -0
  20. data/generators/time_dimension/templates/unit_test.rb +8 -0
  21. data/generators/time_dimension/time_dimension_generator.rb +14 -0
  22. data/lib/active_warehouse.rb +13 -2
  23. data/lib/active_warehouse/aggregate.rb +54 -253
  24. data/lib/active_warehouse/aggregate/dwarf/node.rb +36 -0
  25. data/lib/active_warehouse/aggregate/dwarf_aggregate.rb +369 -0
  26. data/lib/active_warehouse/aggregate/dwarf_common.rb +44 -0
  27. data/lib/active_warehouse/aggregate/dwarf_printer.rb +34 -0
  28. data/lib/active_warehouse/aggregate/no_aggregate.rb +194 -0
  29. data/lib/active_warehouse/aggregate/pid_aggregate.rb +29 -0
  30. data/lib/active_warehouse/aggregate/pipelined_rolap_aggregate.rb +129 -0
  31. data/lib/active_warehouse/aggregate/rolap_aggregate.rb +181 -0
  32. data/lib/active_warehouse/aggregate/rolap_common.rb +89 -0
  33. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_1.sql +12 -0
  34. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_10.sql +7166 -0
  35. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_11.sql +14334 -0
  36. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_12.sql +28670 -0
  37. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_13.sql +57342 -0
  38. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_2.sql +26 -0
  39. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_3.sql +54 -0
  40. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_4.sql +110 -0
  41. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_5.sql +222 -0
  42. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_6.sql +446 -0
  43. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_7.sql +894 -0
  44. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_8.sql +1790 -0
  45. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_9.sql +3582 -0
  46. data/lib/active_warehouse/aggregate_field.rb +49 -0
  47. data/lib/active_warehouse/{dimension/bridge.rb → bridge.rb} +7 -3
  48. data/lib/active_warehouse/bridge/hierarchy_bridge.rb +46 -0
  49. data/lib/active_warehouse/builder.rb +2 -1
  50. data/lib/active_warehouse/builder/date_dimension_builder.rb +5 -2
  51. data/lib/active_warehouse/builder/generator/generator.rb +13 -0
  52. data/lib/active_warehouse/builder/generator/name_generator.rb +20 -0
  53. data/lib/active_warehouse/builder/generator/paragraph_generator.rb +11 -0
  54. data/lib/active_warehouse/builder/random_data_builder.rb +21 -11
  55. data/lib/active_warehouse/builder/test_data_builder.rb +54 -0
  56. data/lib/active_warehouse/calculated_field.rb +27 -0
  57. data/lib/active_warehouse/compat/compat.rb +4 -4
  58. data/lib/active_warehouse/cube.rb +126 -225
  59. data/lib/active_warehouse/cube_query_result.rb +69 -0
  60. data/lib/active_warehouse/dimension.rb +64 -29
  61. data/lib/active_warehouse/dimension/date_dimension.rb +15 -0
  62. data/lib/active_warehouse/dimension/dimension_reflection.rb +21 -0
  63. data/lib/active_warehouse/dimension/dimension_view.rb +17 -2
  64. data/lib/active_warehouse/dimension/hierarchical_dimension.rb +43 -5
  65. data/lib/active_warehouse/dimension/slowly_changing_dimension.rb +22 -12
  66. data/lib/active_warehouse/fact.rb +119 -40
  67. data/lib/active_warehouse/field.rb +74 -0
  68. data/lib/active_warehouse/ordered_hash.rb +34 -0
  69. data/lib/active_warehouse/prejoin_fact.rb +97 -0
  70. data/lib/active_warehouse/report/abstract_report.rb +40 -14
  71. data/lib/active_warehouse/report/chart_report.rb +3 -3
  72. data/lib/active_warehouse/report/table_report.rb +8 -3
  73. data/lib/active_warehouse/version.rb +1 -1
  74. data/lib/active_warehouse/view/report_helper.rb +144 -34
  75. data/tasks/active_warehouse_tasks.rake +28 -10
  76. metadata +107 -30
@@ -0,0 +1,36 @@
1
+ module ActiveWarehouse
2
+ module Aggregate
3
+ module Dwarf
4
+ class NodeStruct < BinData::Struct
5
+ uint32 :id
6
+ uint16 :cells_length
7
+ array :cells, :type => :cell_struct, :initial_length => :cells_length
8
+ struct :all_cell, :type => :cell_struct
9
+ end
10
+ class CellStruct < BinData::Struct
11
+ uint8 :key_len
12
+ string :key, :initial_length => :key_len
13
+ uint16 :value # may be a value or a pointer
14
+ end
15
+ class DwarfWriter
16
+ def write(node)
17
+ n = NodeStruct.new
18
+ n.id = node.id
19
+ n.cells_length = node.cells.length
20
+
21
+ node.cells.each do |cell|
22
+ c = CellStruct.new
23
+ c.key_len = cell.key.length
24
+ c.key = cell.key
25
+ n.cells << c
26
+ end
27
+
28
+ ac = CellStruct.new
29
+ ac.key_len = n.all_cell.key.length
30
+ ac.key = n.all_cell.key
31
+ n.all_cell = ac
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,369 @@
1
+ module ActiveWarehouse #:nodoc:
2
+ module Aggregate #:nodoc:
3
+ # Implementation of the Dwarf algorithm described in
4
+ class DwarfAggregate < Aggregate
5
+ include DwarfCommon
6
+
7
+ # Initialize the aggregate
8
+ def initialize(cube_class)
9
+ super
10
+ end
11
+
12
+ # Populate the aggregate
13
+ def populate
14
+ create_dwarf_cube(sorted_facts)
15
+ end
16
+
17
+ # query
18
+ def query(*args)
19
+ options = parse_query_args(*args)
20
+
21
+ column_dimension_name = options[:column_dimension_name]
22
+ column_hierarchy_name = options[:column_hierarchy_name]
23
+ row_dimension_name = options[:row_dimension_name]
24
+ row_hierarchy_name = options[:row_hierarchy_name]
25
+ conditions = options[:conditions]
26
+ cstage = options[:cstage]
27
+ rstage = options[:rstage]
28
+ filters = options[:filters]
29
+
30
+ column_dimension = Dimension.class_for_name(column_dimension_name)
31
+ row_dimension = Dimension.class_for_name(row_dimension_name)
32
+ column_hierarchy = column_dimension.hierarchy(column_hierarchy_name)
33
+ row_hierarchy = row_dimension.hierarchy(row_hierarchy_name)
34
+ dimension_ids = {}
35
+
36
+ dimension_order.each do |d|
37
+ where_clause = []
38
+ sql = "SELECT id FROM #{d.table_name}"
39
+ filters.each do |key, value|
40
+ dimension, column = key.split('.')
41
+ if d.table_name == dimension
42
+ where_clause << "#{dimension}.#{column} = '#{value}'" # TODO: protect from SQL injection
43
+ end
44
+ end
45
+ sql += %Q(\nWHERE\n #{where_clause.join(" AND\n ")}) if where_clause.length > 0
46
+ dimension_ids[d] = cube_class.connection.select_values(sql)
47
+ end
48
+ #puts "dimension ids: #{dimension_ids.inspect}"
49
+
50
+ values = Array.new(cube_class.fact_class.aggregate_fields.length, 0)
51
+
52
+ home_nodes = []
53
+ filter_nodes(@root_node, dimension_ids, 0, home_nodes)
54
+ #puts "filtered nodes: #{home_nodes.collect(&:id)}"
55
+
56
+ values
57
+ end
58
+
59
+ def filter_nodes(node, dimension_ids, depth, filtered_nodes)
60
+ #puts "filtering node #{print_node(node, depth, false)}"
61
+ dimension = dimension_order[depth]
62
+ #puts "dimension at #{depth} is #{dimension}"
63
+ node.cells.each do |c|
64
+ if dimension_ids[dimension].include?(c.key)
65
+ if depth == dimension_order.length - 1
66
+ filtered_nodes << node
67
+ else
68
+ filter_nodes(c.child, dimension_ids, depth+1, filtered_nodes) unless c.child.nil?
69
+ end
70
+ end
71
+ end
72
+ end
73
+
74
+ # Aggregate the node by summing all of the values in the cells
75
+ # TODO: support aggregations other than sum
76
+ def calculate_aggregate(cells)
77
+ value = Array.new(cells.first.value.length, 0)
78
+ cells.each do |c|
79
+ c.value.each_with_index do |v, index|
80
+ value[index] += v
81
+ end
82
+ end
83
+ value
84
+ end
85
+
86
+ # Create the dwarf cube with the sorted_facts
87
+ def create_dwarf_cube(sorted_facts)
88
+ last_tuple = nil
89
+ @last_nodes = nil
90
+ sorted_facts.each do |row|
91
+ tuple = row.is_a?(Hash) ? create_tuple(row) : row
92
+
93
+ prefix = calculate_prefix(tuple, last_tuple)
94
+
95
+ close_nodes(prefix).each do |n|
96
+ if n.leaf?
97
+ n.all_cell = Cell.new('*', calculate_aggregate(n.cells))
98
+ else
99
+ n.all_cell = Cell.new('*')
100
+ n.all_cell.child = suffix_coalesce(n.children)
101
+ end
102
+ n.processed = true
103
+ end
104
+
105
+ nodes = create_nodes(tuple, prefix)
106
+
107
+ write_nodes(nodes)
108
+ last_tuple = tuple
109
+ if @last_nodes.nil? then @root_node = nodes.first end
110
+ @last_nodes = nodes
111
+ end
112
+
113
+ # Alg 1, Line 13
114
+ last_leaf_node = @last_nodes.last
115
+ last_leaf_node.all_cell = Cell.new('*', calculate_aggregate(last_leaf_node.cells))
116
+
117
+ # Alg 1, Line 14
118
+ @last_nodes[0..@last_nodes.length - 2].reverse.each do |n|
119
+ n.all_cell = Cell.new('*')
120
+ n.all_cell.child = suffix_coalesce(n.children)
121
+ end
122
+
123
+ require File.dirname(__FILE__) + '/dwarf_printer'
124
+ puts DwarfPrinter.print_node(@root_node)
125
+ end
126
+
127
+ # Coalesce the nodes and return a single node
128
+ def suffix_coalesce(nodes)
129
+ if nodes.length == 1
130
+ return nodes[0]
131
+ else
132
+ sub_dwarf = Node.new
133
+ sub_dwarf.leaf = nodes.first.leaf
134
+
135
+ keys = sorted_keys(nodes)
136
+ keys.each do |k|
137
+ to_merge = []
138
+ nodes.each do |n|
139
+ n.cells.each do |c|
140
+ to_merge << c if c.key == k
141
+ end
142
+ end
143
+
144
+ if sub_dwarf.leaf?
145
+ cur_aggr = calculate_aggregate(to_merge) # Alg 2, Line 8
146
+ sub_dwarf.add_cell(Cell.new(k, cur_aggr)) # Alg 2, Line 9
147
+ else
148
+ # Alg 2, Line 11
149
+ cell = Cell.new(k)
150
+ cell.child = suffix_coalesce(to_merge.collect{|c| c.child})
151
+ sub_dwarf.add_cell(cell)
152
+ end
153
+ end
154
+
155
+ if sub_dwarf.leaf?
156
+ sub_dwarf.all_cell = Cell.new("*", calculate_aggregate(sub_dwarf.cells))
157
+ else
158
+ cell = Cell.new("*")
159
+ cell.child = suffix_coalesce(sub_dwarf.children)
160
+ sub_dwarf.all_cell = cell
161
+ end
162
+ end
163
+
164
+ sub_dwarf
165
+ end
166
+
167
+ # Get a list of sorted keys for the cells in the specified nodes
168
+ def sorted_keys(nodes)
169
+ keys = []
170
+ nodes.each do |n|
171
+ n.cells.each do |c|
172
+ keys << c.key
173
+ end
174
+ end
175
+ keys.uniq.sort { |a, b| a <=> b }
176
+ end
177
+
178
+ # Accessor for the number of dimensions in the cube.
179
+ attr_accessor :number_of_dimensions
180
+ def number_of_dimensions
181
+ @number_of_dimensions ||= cube_class.dimension_classes.length
182
+ end
183
+
184
+ # Calculates a common prefix between the two tuples
185
+ def calculate_prefix(current_tuple, last_tuple)
186
+ return [] if last_tuple.nil?
187
+ prefix = []
188
+ last_matched_index = nil
189
+ 0.upto(number_of_dimensions) do |i|
190
+ if current_tuple[i] == last_tuple[i]
191
+ prefix << current_tuple[i]
192
+ else
193
+ break
194
+ end
195
+ end
196
+ prefix
197
+ end
198
+
199
+ # Close all of the last nodes that match the specified prefix and return
200
+ # the list of newly closed nodes
201
+ def close_nodes(prefix)
202
+ new_closed = []
203
+ if @last_nodes
204
+ @last_nodes[prefix.length + 1, @last_nodes.length].each do |n|
205
+ n.closed = true
206
+ new_closed << n
207
+ end
208
+ end
209
+ new_closed
210
+ end
211
+
212
+ # Create the nodes for the current tuple
213
+ def create_nodes(current_tuple, prefix)
214
+ nodes = []
215
+ new_nodes_needed_for = []
216
+ if @last_nodes.nil?
217
+ 0.upto(number_of_dimensions - 1) do |i|
218
+ k = current_tuple[i]
219
+ parent_cell = (nodes.last.nil?) ? nil : nodes.last.cells.last
220
+ nodes << Node.new(k, parent_cell)
221
+ end
222
+ else
223
+ if prefix.length > 0
224
+ 0.upto(prefix.length - 1) do |i|
225
+ nodes << @last_nodes[i]
226
+ end
227
+ end
228
+ k = current_tuple[prefix.length]
229
+ n = @last_nodes[prefix.length]
230
+ n.add_cell(Cell.new(k))
231
+ nodes << n
232
+
233
+ (prefix.length + 1).upto(number_of_dimensions - 1) do |i|
234
+ k = current_tuple[i]
235
+ parent_cell = (nodes.last.nil?) ? nil : nodes.last.cells.last
236
+ nodes << Node.new(k, parent_cell)
237
+ end
238
+ end
239
+
240
+ nodes.last.leaf = true
241
+ cell = nodes.last.cells.last
242
+ unless cell.value
243
+ cell.value = current_tuple[number_of_dimensions..current_tuple.length-1]
244
+ end
245
+
246
+ nodes
247
+ end
248
+
249
+ # Write nodes to the filesystem.
250
+ def write_nodes(nodes)
251
+ # open(File.new(cube_class.name + '.dat'), 'w') do |f|
252
+ #
253
+ # end
254
+ end
255
+
256
+ class Cell
257
+ # The cell key, which will always be a dimension id
258
+ attr_accessor :key
259
+ # The child of the cell which will always be a node
260
+ attr_accessor :child
261
+ # The value of the cell which will only be non-nil in the cells that appear in nodes in the last dimension
262
+ attr_accessor :value
263
+ # The node that this cell is a member of
264
+ attr_accessor :node
265
+
266
+ def initialize(key, value=nil)
267
+ @key = key
268
+ @value = value
269
+ end
270
+
271
+ def child=(node)
272
+ node.parent = self
273
+ @child = node
274
+ end
275
+
276
+ def to_s
277
+ key
278
+ end
279
+ end
280
+
281
+ class Node
282
+ # A special cell which will hold either a reference to a sub node or the aggregate values for all
283
+ # of the values in the node's cells
284
+ attr_accessor :all_cell
285
+
286
+ # The parent cell or nil
287
+ attr_accessor :parent
288
+
289
+ # Set the true if the node is closed
290
+ attr_accessor :closed
291
+
292
+ # Set to true if the node has been processed
293
+ attr_accessor :processed
294
+
295
+ # Set to true if this node is a leaf node
296
+ attr_accessor :leaf
297
+
298
+ # Reader accessor for the node index, a sequential number identifying order of creation
299
+ attr_reader :index
300
+
301
+ @@sequence = 0
302
+
303
+ # Initialize the node with a cell that has the given key
304
+ def initialize(key=nil, parent_cell=nil)
305
+ @closed = false
306
+ @processed = false
307
+ @parent = parent_cell
308
+ @parent.child = self if @parent
309
+ @index = @@sequence += 1
310
+ #puts "creating node #{@index} with parent: #{@parent}"
311
+ add_cell(Cell.new(key)) if key
312
+ end
313
+
314
+ # Return an array of cells for the node
315
+ def cells
316
+ @cells ||= []
317
+ end
318
+
319
+ def keys
320
+ cells.collect { |cell| cell.key }
321
+ end
322
+
323
+ def has_cell_with_key?(key)
324
+ cells.each do |cell|
325
+ return true if cell.key == key
326
+ end
327
+ return false
328
+ end
329
+
330
+ def child(key)
331
+ cells.each do |cell|
332
+ return cell.child if cell.key == key
333
+ end
334
+ return nil
335
+ end
336
+
337
+ def children
338
+ cells.collect { |cell| cell.child }.compact
339
+ end
340
+
341
+ def closed?
342
+ closed
343
+ end
344
+
345
+ def processed?
346
+ processed
347
+ end
348
+
349
+ def leaf?
350
+ leaf
351
+ end
352
+
353
+ def add_cell(cell)
354
+ cell.node = self
355
+ cells << cell
356
+ end
357
+
358
+ def all_cell=(cell)
359
+ @all_cell = cell
360
+ @all_cell.node = self
361
+ end
362
+
363
+ def to_s
364
+ index.to_s
365
+ end
366
+ end
367
+ end
368
+ end
369
+ end
@@ -0,0 +1,44 @@
1
+ module ActiveWarehouse #:nodoc:
2
+ module Aggregate #:nodoc:
3
+ # Common methods for use inside dwarf implementations
4
+ module DwarfCommon
5
+ # Get the dimension order, defaults to sorting from highest cardinality to lowest
6
+ def dimension_order
7
+ @dimension_order ||= cube_class.dimension_classes.sort { |a, b| a.count <=> b.count }.reverse
8
+ end
9
+
10
+ # Set the dimension order
11
+ def dimension_order=(dimensions)
12
+ @dimension_order = dimensions
13
+ end
14
+
15
+ # Get the sorted fact rows for this cube, sorted by dimensions returned from dimension_order.
16
+ def sorted_facts
17
+ #puts "dimension order: #{dimension_order.inspect}"
18
+ # Determine the dimension to order by (high cardinality)
19
+ order_by = dimension_order.collect { |d| cube_class.fact_class.foreign_key_for(d) }.join(",")
20
+
21
+ # Get the sorted fact table
22
+ # TODO: determine if querying with select_all will bring the entire result set into memory
23
+ sql = "SELECT * FROM #{cube_class.fact_class.table_name} ORDER BY #{order_by}"
24
+ cube_class.connection.select_all(sql)
25
+ end
26
+
27
+ # Create a tuple from a row
28
+ def create_tuple(row)
29
+ fact_class = cube_class.fact_class
30
+ tuple = []
31
+ dimension_order.each do |d|
32
+ column_name = fact_class.foreign_key_for(d)
33
+ tuple << fact_class.columns_hash[column_name].type_cast(row[column_name])
34
+ end
35
+ fact_class.aggregate_fields.each do |f|
36
+ tuple << fact_class.columns_hash[f.to_s].type_cast(row[f.to_s])
37
+ end
38
+ #puts "tuple: #{tuple.inspect}"
39
+ tuple
40
+ end
41
+
42
+ end
43
+ end
44
+ end