activewarehouse 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/README +27 -14
  2. data/Rakefile +16 -5
  3. data/doc/references.txt +4 -0
  4. data/generators/bridge/templates/migration.rb +9 -2
  5. data/generators/bridge/templates/unit_test.rb +8 -0
  6. data/generators/date_dimension/USAGE +1 -0
  7. data/generators/date_dimension/date_dimension_generator.rb +16 -0
  8. data/generators/date_dimension/templates/fixture.yml +5 -0
  9. data/generators/date_dimension/templates/migration.rb +31 -0
  10. data/generators/date_dimension/templates/model.rb +3 -0
  11. data/generators/date_dimension/templates/unit_test.rb +8 -0
  12. data/generators/dimension/templates/migration.rb +1 -10
  13. data/generators/dimension_view/dimension_view_generator.rb +2 -2
  14. data/generators/dimension_view/templates/migration.rb +8 -2
  15. data/generators/fact/templates/migration.rb +2 -0
  16. data/generators/time_dimension/USAGE +1 -0
  17. data/generators/time_dimension/templates/fixture.yml +5 -0
  18. data/generators/time_dimension/templates/migration.rb +12 -0
  19. data/generators/time_dimension/templates/model.rb +3 -0
  20. data/generators/time_dimension/templates/unit_test.rb +8 -0
  21. data/generators/time_dimension/time_dimension_generator.rb +14 -0
  22. data/lib/active_warehouse.rb +13 -2
  23. data/lib/active_warehouse/aggregate.rb +54 -253
  24. data/lib/active_warehouse/aggregate/dwarf/node.rb +36 -0
  25. data/lib/active_warehouse/aggregate/dwarf_aggregate.rb +369 -0
  26. data/lib/active_warehouse/aggregate/dwarf_common.rb +44 -0
  27. data/lib/active_warehouse/aggregate/dwarf_printer.rb +34 -0
  28. data/lib/active_warehouse/aggregate/no_aggregate.rb +194 -0
  29. data/lib/active_warehouse/aggregate/pid_aggregate.rb +29 -0
  30. data/lib/active_warehouse/aggregate/pipelined_rolap_aggregate.rb +129 -0
  31. data/lib/active_warehouse/aggregate/rolap_aggregate.rb +181 -0
  32. data/lib/active_warehouse/aggregate/rolap_common.rb +89 -0
  33. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_1.sql +12 -0
  34. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_10.sql +7166 -0
  35. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_11.sql +14334 -0
  36. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_12.sql +28670 -0
  37. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_13.sql +57342 -0
  38. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_2.sql +26 -0
  39. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_3.sql +54 -0
  40. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_4.sql +110 -0
  41. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_5.sql +222 -0
  42. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_6.sql +446 -0
  43. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_7.sql +894 -0
  44. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_8.sql +1790 -0
  45. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_9.sql +3582 -0
  46. data/lib/active_warehouse/aggregate_field.rb +49 -0
  47. data/lib/active_warehouse/{dimension/bridge.rb → bridge.rb} +7 -3
  48. data/lib/active_warehouse/bridge/hierarchy_bridge.rb +46 -0
  49. data/lib/active_warehouse/builder.rb +2 -1
  50. data/lib/active_warehouse/builder/date_dimension_builder.rb +5 -2
  51. data/lib/active_warehouse/builder/generator/generator.rb +13 -0
  52. data/lib/active_warehouse/builder/generator/name_generator.rb +20 -0
  53. data/lib/active_warehouse/builder/generator/paragraph_generator.rb +11 -0
  54. data/lib/active_warehouse/builder/random_data_builder.rb +21 -11
  55. data/lib/active_warehouse/builder/test_data_builder.rb +54 -0
  56. data/lib/active_warehouse/calculated_field.rb +27 -0
  57. data/lib/active_warehouse/compat/compat.rb +4 -4
  58. data/lib/active_warehouse/cube.rb +126 -225
  59. data/lib/active_warehouse/cube_query_result.rb +69 -0
  60. data/lib/active_warehouse/dimension.rb +64 -29
  61. data/lib/active_warehouse/dimension/date_dimension.rb +15 -0
  62. data/lib/active_warehouse/dimension/dimension_reflection.rb +21 -0
  63. data/lib/active_warehouse/dimension/dimension_view.rb +17 -2
  64. data/lib/active_warehouse/dimension/hierarchical_dimension.rb +43 -5
  65. data/lib/active_warehouse/dimension/slowly_changing_dimension.rb +22 -12
  66. data/lib/active_warehouse/fact.rb +119 -40
  67. data/lib/active_warehouse/field.rb +74 -0
  68. data/lib/active_warehouse/ordered_hash.rb +34 -0
  69. data/lib/active_warehouse/prejoin_fact.rb +97 -0
  70. data/lib/active_warehouse/report/abstract_report.rb +40 -14
  71. data/lib/active_warehouse/report/chart_report.rb +3 -3
  72. data/lib/active_warehouse/report/table_report.rb +8 -3
  73. data/lib/active_warehouse/version.rb +1 -1
  74. data/lib/active_warehouse/view/report_helper.rb +144 -34
  75. data/tasks/active_warehouse_tasks.rake +28 -10
  76. metadata +107 -30
@@ -0,0 +1,36 @@
1
+ module ActiveWarehouse
2
+ module Aggregate
3
+ module Dwarf
4
+ class NodeStruct < BinData::Struct
5
+ uint32 :id
6
+ uint16 :cells_length
7
+ array :cells, :type => :cell_struct, :initial_length => :cells_length
8
+ struct :all_cell, :type => :cell_struct
9
+ end
10
+ class CellStruct < BinData::Struct
11
+ uint8 :key_len
12
+ string :key, :initial_length => :key_len
13
+ uint16 :value # may be a value or a pointer
14
+ end
15
+ class DwarfWriter
16
+ def write(node)
17
+ n = NodeStruct.new
18
+ n.id = node.id
19
+ n.cells_length = node.cells.length
20
+
21
+ node.cells.each do |cell|
22
+ c = CellStruct.new
23
+ c.key_len = cell.key.length
24
+ c.key = cell.key
25
+ n.cells << c
26
+ end
27
+
28
+ ac = CellStruct.new
29
+ ac.key_len = n.all_cell.key.length
30
+ ac.key = n.all_cell.key
31
+ n.all_cell = ac
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,369 @@
1
+ module ActiveWarehouse #:nodoc:
2
+ module Aggregate #:nodoc:
3
+ # Implementation of the Dwarf algorithm described in
4
+ class DwarfAggregate < Aggregate
5
+ include DwarfCommon
6
+
7
+ # Initialize the aggregate
8
+ def initialize(cube_class)
9
+ super
10
+ end
11
+
12
+ # Populate the aggregate
13
+ def populate
14
+ create_dwarf_cube(sorted_facts)
15
+ end
16
+
17
+ # query
18
+ def query(*args)
19
+ options = parse_query_args(*args)
20
+
21
+ column_dimension_name = options[:column_dimension_name]
22
+ column_hierarchy_name = options[:column_hierarchy_name]
23
+ row_dimension_name = options[:row_dimension_name]
24
+ row_hierarchy_name = options[:row_hierarchy_name]
25
+ conditions = options[:conditions]
26
+ cstage = options[:cstage]
27
+ rstage = options[:rstage]
28
+ filters = options[:filters]
29
+
30
+ column_dimension = Dimension.class_for_name(column_dimension_name)
31
+ row_dimension = Dimension.class_for_name(row_dimension_name)
32
+ column_hierarchy = column_dimension.hierarchy(column_hierarchy_name)
33
+ row_hierarchy = row_dimension.hierarchy(row_hierarchy_name)
34
+ dimension_ids = {}
35
+
36
+ dimension_order.each do |d|
37
+ where_clause = []
38
+ sql = "SELECT id FROM #{d.table_name}"
39
+ filters.each do |key, value|
40
+ dimension, column = key.split('.')
41
+ if d.table_name == dimension
42
+ where_clause << "#{dimension}.#{column} = '#{value}'" # TODO: protect from SQL injection
43
+ end
44
+ end
45
+ sql += %Q(\nWHERE\n #{where_clause.join(" AND\n ")}) if where_clause.length > 0
46
+ dimension_ids[d] = cube_class.connection.select_values(sql)
47
+ end
48
+ #puts "dimension ids: #{dimension_ids.inspect}"
49
+
50
+ values = Array.new(cube_class.fact_class.aggregate_fields.length, 0)
51
+
52
+ home_nodes = []
53
+ filter_nodes(@root_node, dimension_ids, 0, home_nodes)
54
+ #puts "filtered nodes: #{home_nodes.collect(&:id)}"
55
+
56
+ values
57
+ end
58
+
59
+ def filter_nodes(node, dimension_ids, depth, filtered_nodes)
60
+ #puts "filtering node #{print_node(node, depth, false)}"
61
+ dimension = dimension_order[depth]
62
+ #puts "dimension at #{depth} is #{dimension}"
63
+ node.cells.each do |c|
64
+ if dimension_ids[dimension].include?(c.key)
65
+ if depth == dimension_order.length - 1
66
+ filtered_nodes << node
67
+ else
68
+ filter_nodes(c.child, dimension_ids, depth+1, filtered_nodes) unless c.child.nil?
69
+ end
70
+ end
71
+ end
72
+ end
73
+
74
+ # Aggregate the node by summing all of the values in the cells
75
+ # TODO: support aggregations other than sum
76
+ def calculate_aggregate(cells)
77
+ value = Array.new(cells.first.value.length, 0)
78
+ cells.each do |c|
79
+ c.value.each_with_index do |v, index|
80
+ value[index] += v
81
+ end
82
+ end
83
+ value
84
+ end
85
+
86
+ # Create the dwarf cube with the sorted_facts
87
+ def create_dwarf_cube(sorted_facts)
88
+ last_tuple = nil
89
+ @last_nodes = nil
90
+ sorted_facts.each do |row|
91
+ tuple = row.is_a?(Hash) ? create_tuple(row) : row
92
+
93
+ prefix = calculate_prefix(tuple, last_tuple)
94
+
95
+ close_nodes(prefix).each do |n|
96
+ if n.leaf?
97
+ n.all_cell = Cell.new('*', calculate_aggregate(n.cells))
98
+ else
99
+ n.all_cell = Cell.new('*')
100
+ n.all_cell.child = suffix_coalesce(n.children)
101
+ end
102
+ n.processed = true
103
+ end
104
+
105
+ nodes = create_nodes(tuple, prefix)
106
+
107
+ write_nodes(nodes)
108
+ last_tuple = tuple
109
+ if @last_nodes.nil? then @root_node = nodes.first end
110
+ @last_nodes = nodes
111
+ end
112
+
113
+ # Alg 1, Line 13
114
+ last_leaf_node = @last_nodes.last
115
+ last_leaf_node.all_cell = Cell.new('*', calculate_aggregate(last_leaf_node.cells))
116
+
117
+ # Alg 1, Line 14
118
+ @last_nodes[0..@last_nodes.length - 2].reverse.each do |n|
119
+ n.all_cell = Cell.new('*')
120
+ n.all_cell.child = suffix_coalesce(n.children)
121
+ end
122
+
123
+ require File.dirname(__FILE__) + '/dwarf_printer'
124
+ puts DwarfPrinter.print_node(@root_node)
125
+ end
126
+
127
+ # Coalesce the nodes and return a single node
128
+ def suffix_coalesce(nodes)
129
+ if nodes.length == 1
130
+ return nodes[0]
131
+ else
132
+ sub_dwarf = Node.new
133
+ sub_dwarf.leaf = nodes.first.leaf
134
+
135
+ keys = sorted_keys(nodes)
136
+ keys.each do |k|
137
+ to_merge = []
138
+ nodes.each do |n|
139
+ n.cells.each do |c|
140
+ to_merge << c if c.key == k
141
+ end
142
+ end
143
+
144
+ if sub_dwarf.leaf?
145
+ cur_aggr = calculate_aggregate(to_merge) # Alg 2, Line 8
146
+ sub_dwarf.add_cell(Cell.new(k, cur_aggr)) # Alg 2, Line 9
147
+ else
148
+ # Alg 2, Line 11
149
+ cell = Cell.new(k)
150
+ cell.child = suffix_coalesce(to_merge.collect{|c| c.child})
151
+ sub_dwarf.add_cell(cell)
152
+ end
153
+ end
154
+
155
+ if sub_dwarf.leaf?
156
+ sub_dwarf.all_cell = Cell.new("*", calculate_aggregate(sub_dwarf.cells))
157
+ else
158
+ cell = Cell.new("*")
159
+ cell.child = suffix_coalesce(sub_dwarf.children)
160
+ sub_dwarf.all_cell = cell
161
+ end
162
+ end
163
+
164
+ sub_dwarf
165
+ end
166
+
167
+ # Get a list of sorted keys for the cells in the specified nodes
168
+ def sorted_keys(nodes)
169
+ keys = []
170
+ nodes.each do |n|
171
+ n.cells.each do |c|
172
+ keys << c.key
173
+ end
174
+ end
175
+ keys.uniq.sort { |a, b| a <=> b }
176
+ end
177
+
178
+ # Accessor for the number of dimensions in the cube.
179
+ attr_accessor :number_of_dimensions
180
+ def number_of_dimensions
181
+ @number_of_dimensions ||= cube_class.dimension_classes.length
182
+ end
183
+
184
+ # Calculates a common prefix between the two tuples
185
+ def calculate_prefix(current_tuple, last_tuple)
186
+ return [] if last_tuple.nil?
187
+ prefix = []
188
+ last_matched_index = nil
189
+ 0.upto(number_of_dimensions) do |i|
190
+ if current_tuple[i] == last_tuple[i]
191
+ prefix << current_tuple[i]
192
+ else
193
+ break
194
+ end
195
+ end
196
+ prefix
197
+ end
198
+
199
+ # Close all of the last nodes that match the specified prefix and return
200
+ # the list of newly closed nodes
201
+ def close_nodes(prefix)
202
+ new_closed = []
203
+ if @last_nodes
204
+ @last_nodes[prefix.length + 1, @last_nodes.length].each do |n|
205
+ n.closed = true
206
+ new_closed << n
207
+ end
208
+ end
209
+ new_closed
210
+ end
211
+
212
+ # Create the nodes for the current tuple
213
+ def create_nodes(current_tuple, prefix)
214
+ nodes = []
215
+ new_nodes_needed_for = []
216
+ if @last_nodes.nil?
217
+ 0.upto(number_of_dimensions - 1) do |i|
218
+ k = current_tuple[i]
219
+ parent_cell = (nodes.last.nil?) ? nil : nodes.last.cells.last
220
+ nodes << Node.new(k, parent_cell)
221
+ end
222
+ else
223
+ if prefix.length > 0
224
+ 0.upto(prefix.length - 1) do |i|
225
+ nodes << @last_nodes[i]
226
+ end
227
+ end
228
+ k = current_tuple[prefix.length]
229
+ n = @last_nodes[prefix.length]
230
+ n.add_cell(Cell.new(k))
231
+ nodes << n
232
+
233
+ (prefix.length + 1).upto(number_of_dimensions - 1) do |i|
234
+ k = current_tuple[i]
235
+ parent_cell = (nodes.last.nil?) ? nil : nodes.last.cells.last
236
+ nodes << Node.new(k, parent_cell)
237
+ end
238
+ end
239
+
240
+ nodes.last.leaf = true
241
+ cell = nodes.last.cells.last
242
+ unless cell.value
243
+ cell.value = current_tuple[number_of_dimensions..current_tuple.length-1]
244
+ end
245
+
246
+ nodes
247
+ end
248
+
249
+ # Write nodes to the filesystem.
250
+ def write_nodes(nodes)
251
+ # open(File.new(cube_class.name + '.dat'), 'w') do |f|
252
+ #
253
+ # end
254
+ end
255
+
256
+ class Cell
257
+ # The cell key, which will always be a dimension id
258
+ attr_accessor :key
259
+ # The child of the cell which will always be a node
260
+ attr_accessor :child
261
+ # The value of the cell which will only be non-nil in the cells that appear in nodes in the last dimension
262
+ attr_accessor :value
263
+ # The node that this cell is a member of
264
+ attr_accessor :node
265
+
266
+ def initialize(key, value=nil)
267
+ @key = key
268
+ @value = value
269
+ end
270
+
271
+ def child=(node)
272
+ node.parent = self
273
+ @child = node
274
+ end
275
+
276
+ def to_s
277
+ key
278
+ end
279
+ end
280
+
281
+ class Node
282
+ # A special cell which will hold either a reference to a sub node or the aggregate values for all
283
+ # of the values in the node's cells
284
+ attr_accessor :all_cell
285
+
286
+ # The parent cell or nil
287
+ attr_accessor :parent
288
+
289
+ # Set the true if the node is closed
290
+ attr_accessor :closed
291
+
292
+ # Set to true if the node has been processed
293
+ attr_accessor :processed
294
+
295
+ # Set to true if this node is a leaf node
296
+ attr_accessor :leaf
297
+
298
+ # Reader accessor for the node index, a sequential number identifying order of creation
299
+ attr_reader :index
300
+
301
+ @@sequence = 0
302
+
303
+ # Initialize the node with a cell that has the given key
304
+ def initialize(key=nil, parent_cell=nil)
305
+ @closed = false
306
+ @processed = false
307
+ @parent = parent_cell
308
+ @parent.child = self if @parent
309
+ @index = @@sequence += 1
310
+ #puts "creating node #{@index} with parent: #{@parent}"
311
+ add_cell(Cell.new(key)) if key
312
+ end
313
+
314
+ # Return an array of cells for the node
315
+ def cells
316
+ @cells ||= []
317
+ end
318
+
319
+ def keys
320
+ cells.collect { |cell| cell.key }
321
+ end
322
+
323
+ def has_cell_with_key?(key)
324
+ cells.each do |cell|
325
+ return true if cell.key == key
326
+ end
327
+ return false
328
+ end
329
+
330
+ def child(key)
331
+ cells.each do |cell|
332
+ return cell.child if cell.key == key
333
+ end
334
+ return nil
335
+ end
336
+
337
+ def children
338
+ cells.collect { |cell| cell.child }.compact
339
+ end
340
+
341
+ def closed?
342
+ closed
343
+ end
344
+
345
+ def processed?
346
+ processed
347
+ end
348
+
349
+ def leaf?
350
+ leaf
351
+ end
352
+
353
+ def add_cell(cell)
354
+ cell.node = self
355
+ cells << cell
356
+ end
357
+
358
+ def all_cell=(cell)
359
+ @all_cell = cell
360
+ @all_cell.node = self
361
+ end
362
+
363
+ def to_s
364
+ index.to_s
365
+ end
366
+ end
367
+ end
368
+ end
369
+ end
@@ -0,0 +1,44 @@
1
+ module ActiveWarehouse #:nodoc:
2
+ module Aggregate #:nodoc:
3
+ # Common methods for use inside dwarf implementations
4
+ module DwarfCommon
5
+ # Get the dimension order, defaults to sorting from highest cardinality to lowest
6
+ def dimension_order
7
+ @dimension_order ||= cube_class.dimension_classes.sort { |a, b| a.count <=> b.count }.reverse
8
+ end
9
+
10
+ # Set the dimension order
11
+ def dimension_order=(dimensions)
12
+ @dimension_order = dimensions
13
+ end
14
+
15
+ # Get the sorted fact rows for this cube, sorted by dimensions returned from dimension_order.
16
+ def sorted_facts
17
+ #puts "dimension order: #{dimension_order.inspect}"
18
+ # Determine the dimension to order by (high cardinality)
19
+ order_by = dimension_order.collect { |d| cube_class.fact_class.foreign_key_for(d) }.join(",")
20
+
21
+ # Get the sorted fact table
22
+ # TODO: determine if querying with select_all will bring the entire result set into memory
23
+ sql = "SELECT * FROM #{cube_class.fact_class.table_name} ORDER BY #{order_by}"
24
+ cube_class.connection.select_all(sql)
25
+ end
26
+
27
+ # Create a tuple from a row
28
+ def create_tuple(row)
29
+ fact_class = cube_class.fact_class
30
+ tuple = []
31
+ dimension_order.each do |d|
32
+ column_name = fact_class.foreign_key_for(d)
33
+ tuple << fact_class.columns_hash[column_name].type_cast(row[column_name])
34
+ end
35
+ fact_class.aggregate_fields.each do |f|
36
+ tuple << fact_class.columns_hash[f.to_s].type_cast(row[f.to_s])
37
+ end
38
+ #puts "tuple: #{tuple.inspect}"
39
+ tuple
40
+ end
41
+
42
+ end
43
+ end
44
+ end