activewarehouse 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/README +27 -14
  2. data/Rakefile +16 -5
  3. data/doc/references.txt +4 -0
  4. data/generators/bridge/templates/migration.rb +9 -2
  5. data/generators/bridge/templates/unit_test.rb +8 -0
  6. data/generators/date_dimension/USAGE +1 -0
  7. data/generators/date_dimension/date_dimension_generator.rb +16 -0
  8. data/generators/date_dimension/templates/fixture.yml +5 -0
  9. data/generators/date_dimension/templates/migration.rb +31 -0
  10. data/generators/date_dimension/templates/model.rb +3 -0
  11. data/generators/date_dimension/templates/unit_test.rb +8 -0
  12. data/generators/dimension/templates/migration.rb +1 -10
  13. data/generators/dimension_view/dimension_view_generator.rb +2 -2
  14. data/generators/dimension_view/templates/migration.rb +8 -2
  15. data/generators/fact/templates/migration.rb +2 -0
  16. data/generators/time_dimension/USAGE +1 -0
  17. data/generators/time_dimension/templates/fixture.yml +5 -0
  18. data/generators/time_dimension/templates/migration.rb +12 -0
  19. data/generators/time_dimension/templates/model.rb +3 -0
  20. data/generators/time_dimension/templates/unit_test.rb +8 -0
  21. data/generators/time_dimension/time_dimension_generator.rb +14 -0
  22. data/lib/active_warehouse.rb +13 -2
  23. data/lib/active_warehouse/aggregate.rb +54 -253
  24. data/lib/active_warehouse/aggregate/dwarf/node.rb +36 -0
  25. data/lib/active_warehouse/aggregate/dwarf_aggregate.rb +369 -0
  26. data/lib/active_warehouse/aggregate/dwarf_common.rb +44 -0
  27. data/lib/active_warehouse/aggregate/dwarf_printer.rb +34 -0
  28. data/lib/active_warehouse/aggregate/no_aggregate.rb +194 -0
  29. data/lib/active_warehouse/aggregate/pid_aggregate.rb +29 -0
  30. data/lib/active_warehouse/aggregate/pipelined_rolap_aggregate.rb +129 -0
  31. data/lib/active_warehouse/aggregate/rolap_aggregate.rb +181 -0
  32. data/lib/active_warehouse/aggregate/rolap_common.rb +89 -0
  33. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_1.sql +12 -0
  34. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_10.sql +7166 -0
  35. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_11.sql +14334 -0
  36. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_12.sql +28670 -0
  37. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_13.sql +57342 -0
  38. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_2.sql +26 -0
  39. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_3.sql +54 -0
  40. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_4.sql +110 -0
  41. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_5.sql +222 -0
  42. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_6.sql +446 -0
  43. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_7.sql +894 -0
  44. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_8.sql +1790 -0
  45. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_9.sql +3582 -0
  46. data/lib/active_warehouse/aggregate_field.rb +49 -0
  47. data/lib/active_warehouse/{dimension/bridge.rb → bridge.rb} +7 -3
  48. data/lib/active_warehouse/bridge/hierarchy_bridge.rb +46 -0
  49. data/lib/active_warehouse/builder.rb +2 -1
  50. data/lib/active_warehouse/builder/date_dimension_builder.rb +5 -2
  51. data/lib/active_warehouse/builder/generator/generator.rb +13 -0
  52. data/lib/active_warehouse/builder/generator/name_generator.rb +20 -0
  53. data/lib/active_warehouse/builder/generator/paragraph_generator.rb +11 -0
  54. data/lib/active_warehouse/builder/random_data_builder.rb +21 -11
  55. data/lib/active_warehouse/builder/test_data_builder.rb +54 -0
  56. data/lib/active_warehouse/calculated_field.rb +27 -0
  57. data/lib/active_warehouse/compat/compat.rb +4 -4
  58. data/lib/active_warehouse/cube.rb +126 -225
  59. data/lib/active_warehouse/cube_query_result.rb +69 -0
  60. data/lib/active_warehouse/dimension.rb +64 -29
  61. data/lib/active_warehouse/dimension/date_dimension.rb +15 -0
  62. data/lib/active_warehouse/dimension/dimension_reflection.rb +21 -0
  63. data/lib/active_warehouse/dimension/dimension_view.rb +17 -2
  64. data/lib/active_warehouse/dimension/hierarchical_dimension.rb +43 -5
  65. data/lib/active_warehouse/dimension/slowly_changing_dimension.rb +22 -12
  66. data/lib/active_warehouse/fact.rb +119 -40
  67. data/lib/active_warehouse/field.rb +74 -0
  68. data/lib/active_warehouse/ordered_hash.rb +34 -0
  69. data/lib/active_warehouse/prejoin_fact.rb +97 -0
  70. data/lib/active_warehouse/report/abstract_report.rb +40 -14
  71. data/lib/active_warehouse/report/chart_report.rb +3 -3
  72. data/lib/active_warehouse/report/table_report.rb +8 -3
  73. data/lib/active_warehouse/version.rb +1 -1
  74. data/lib/active_warehouse/view/report_helper.rb +144 -34
  75. data/tasks/active_warehouse_tasks.rake +28 -10
  76. metadata +107 -30
data/README CHANGED
@@ -1,6 +1,8 @@
1
1
  == ActiveWarehouse
2
2
 
3
- The ActiveWarehouse library provides classes and functions which help with building Data Warehouses using Rails. It can be installed either as a plugin or as a Gem.
3
+ The ActiveWarehouse library provides classes and functions which help with
4
+ building Data Warehouses using Rails. It can be installed either as a plugin
5
+ or as a Gem.
4
6
 
5
7
  To install as a plugin just use:
6
8
 
@@ -11,19 +13,19 @@ To get the latest edge version.
11
13
  To install as a Gem, use:
12
14
 
13
15
  gem install activewarehouse
14
-
15
- At this point you can use ActiveWarehouse in any application using:
16
16
 
17
- require_gem 'activewarehouse'
18
- require 'active_warehouse'
19
-
20
- If you want to use the Gem and link to your Rails app then use the gemsonrails project:
17
+ On *nix you will need to run this command as root or better yet, using sudo.
18
+
19
+ Next, you will need to freeze or link the Gem to your Rails app. I prefer using
20
+ the gemsonrails project:
21
21
 
22
22
  gem install gemsonrails
23
23
 
24
24
  And then in your Rails app:
25
25
 
26
26
  rake gems:link GEM=activewarehouse
27
+
28
+ It is possible that freezing the Gem to the Rails app may not work at all times. It is most often best to install as a plugin.
27
29
 
28
30
  == Generators
29
31
 
@@ -52,12 +54,14 @@ ActiveWarehouse comes with several generators
52
54
  script/generate dimension_view OrderDate Date
53
55
  script/generate dimension_view order_date date
54
56
 
55
- Creates an OrderDateDimension class which is represented by a view on top of the DateDimension.
57
+ Creates an OrderDateDimension class which is represented by a view on top
58
+ of the DateDimension.
56
59
 
57
60
  The rules for naming are as follows:
58
61
 
59
62
  Facts:
60
- Fact classes and tables follow the typical Rails rules: classes are singular and tables are pluralized.
63
+ Fact classes and tables follow the typical Rails rules: classes are singular
64
+ and tables are pluralized.
61
65
  Both the class and table name are suffixed by "_fact".
62
66
  Dimensions:
63
67
  Dimension classes and tables are both singular.
@@ -68,19 +72,28 @@ Bridge:
68
72
  Bridge classes and tables are both singular.
69
73
  Both the class name and the table name are suffixed by "_bridge".
70
74
  Dimension View:
71
- Dimension View classes are singular. The underlying data structure is a view on top of an existing dimension.
75
+ Dimension View classes are singular. The underlying data structure is a view
76
+ on top of an existing dimension.
72
77
  Both the class name and the view name are suffixed by "_dimension"
73
78
 
74
79
  == ETL
75
80
 
76
- The ActiveWarehouse plugin does not directly handle Extract-Transform-Load processes, however the ActiveWarehouse ETL gem (installed separately) can help. To install it use:
81
+ The ActiveWarehouse plugin does not directly handle Extract-Transform-Load
82
+ processes, however the ActiveWarehouse ETL gem (installed separately) can help.
83
+ To install it use:
77
84
 
78
85
  gem install activewarehouse-etl
86
+
87
+ Once again you should run this command as root or using sudo.
79
88
 
80
- More information on the ETL process can be found at http://activewarehouse.rubyforge.org/etl
89
+ More information on the ETL process can be found at
90
+ http://activewarehouse.rubyforge.org/etl
81
91
 
82
92
  == Tutorial
83
93
 
84
- A tutorial for ActiveWarehouse is available online at http://anthonyeden.com/2006/12/20/activewarehouse-example-with-rails-svn-logs
94
+ A tutorial for ActiveWarehouse is available online at
95
+ http://anthonyeden.com/2006/12/20/activewarehouse-example-with-rails-svn-logs
96
+ (Note that is is out of date.)
85
97
 
86
- You can also get a demo from the ActiveWarehouse subversion repository. Look in the SVN_ROOT/demo directory.
98
+ You can also get a demo from the ActiveWarehouse subversion repository. Look in
99
+ the SVN_ROOT/demo directory.
data/Rakefile CHANGED
@@ -68,11 +68,13 @@ spec = Gem::Specification.new do |s|
68
68
  ActiveWarehouse extends Rails to provide functionality specific for building data warehouses.
69
69
  EOF
70
70
 
71
- s.add_dependency('rake', '>= 0.7.1')
72
- s.add_dependency('activesupport', '>= 1.3.1.5618')
73
- s.add_dependency('activerecord', '>= 1.14.4.5618')
74
- s.add_dependency('actionpack', '>= 1.12.5.5618')
75
- s.add_dependency('rails_sql_views', '>= 0.1.0')
71
+ s.add_dependency('rake', '>= 0.7.1')
72
+ s.add_dependency('fastercsv', '>= 1.1.0')
73
+ s.add_dependency('activesupport', '>= 1.3.1')
74
+ s.add_dependency('activerecord', '>= 1.14.4')
75
+ s.add_dependency('actionpack', '>= 1.12.5')
76
+ s.add_dependency('rails_sql_views', '>= 0.1.0')
77
+ s.add_dependency('adapter_extensions', '>= 0.1.0')
76
78
 
77
79
  s.rdoc_options << '--exclude' << '.'
78
80
  s.has_rdoc = false
@@ -135,4 +137,13 @@ end
135
137
  desc "Publish the API documentation"
136
138
  task :pdoc => [:rdoc] do
137
139
  Rake::SshDirPublisher.new("aeden@rubyforge.org", "/var/www/gforge-projects/activewarehouse/rdoc", "rdoc").upload
140
+ end
141
+
142
+ desc "Reinstall the gem from a local package copy"
143
+ task :reinstall => [:package] do
144
+ windows = RUBY_PLATFORM =~ /mswin/
145
+ sudo = windows ? '' : 'sudo'
146
+ gem = windows ? 'gem.bat' : 'gem'
147
+ `#{sudo} #{gem} uninstall -x -i #{PKG_NAME}`
148
+ `#{sudo} #{gem} install pkg/#{PKG_NAME}-#{PKG_VERSION}`
138
149
  end
@@ -0,0 +1,4 @@
1
+ The following papers are relevant to building, storing and querying data cubes with large databases.
2
+
3
+ http://research.microsoft.com/research/pubs/view.aspx?msr_tr_id=MSR-TR-95-22
4
+ http://dbpubs.stanford.edu/pub/showDoc.Fulltext?lang=en&doc=1995-34&format=pdf&compression=&name=1995-34.pdf
@@ -1,8 +1,14 @@
1
1
  class <%= migration_name %> < ActiveRecord::Migration
2
2
  def self.up
3
3
  fields = {
4
- # Add bridge attributes here as name => type
5
- # Example: :top_flag => :integer
4
+ # the following are the required bridge table columns for
5
+ # variable depth hierarchies. Do not change them unless you know
6
+ # what you are doing.
7
+ :parent_id => :integer,
8
+ :child_id => :integer,
9
+ :num_levels_from_parent => :integer,
10
+ :is_bottom => :boolean,
11
+ :is_top => :boolean
6
12
  }
7
13
  create_table :<%= table_name %> do |t|
8
14
  fields.each do |name,type|
@@ -12,6 +18,7 @@ class <%= migration_name %> < ActiveRecord::Migration
12
18
  fields.each do |name,type|
13
19
  add_index :<%= table_name %>, name unless type == :text
14
20
  end
21
+ add_index :<%= table_name %>, [:parent_id, :child_id, :num_levels_from_parent], :unique => true
15
22
  end
16
23
 
17
24
  def self.down
@@ -0,0 +1,8 @@
1
+ require File.dirname(__FILE__) + '<%= '/..' * class_nesting_depth %>/../test_helper'
2
+
3
+ class <%= class_name %>Test < Test::Unit::TestCase
4
+ # Replace this with your real tests.
5
+ def test_truth
6
+ assert true
7
+ end
8
+ end
@@ -0,0 +1 @@
1
+ ./script/generate date_dimension
@@ -0,0 +1,16 @@
1
+ class DateDimensionGenerator < DimensionGenerator
2
+ attr_accessor :file_name
3
+ attr_accessor :include_fiscal_year
4
+
5
+ default_options :skip_migration => false
6
+
7
+ def initialize(runtime_args, runtime_options = {})
8
+ super
9
+
10
+ @name = 'date'
11
+ @table_name = "#{@name}_dimension"
12
+ @class_name = "#{@name.camelize}Dimension"
13
+ @file_name = "#{@class_name.tableize.singularize}"
14
+ @include_fiscal_year = true # TODO: accept a runtime option to set this
15
+ end
16
+ end
@@ -0,0 +1,5 @@
1
+ # Read about fixtures at http://ar.rubyonrails.org/classes/Fixtures.html
2
+ first:
3
+ id: 1
4
+ another:
5
+ id: 2
@@ -0,0 +1,31 @@
1
+ class <%= migration_name %> < ActiveRecord::Migration
2
+ def self.up
3
+ create_table :<%= table_name %> do |t|
4
+ t.column :sql_date, :date, :null => false # SQL Date object
5
+ t.column :calendar_year, :string, :null => false # 2005, 2006, 2007, etc.
6
+ t.column :calendar_quarter, :string, :null => false, :limit => 2 # Q1, Q2, Q3 or Q4
7
+ t.column :calendar_quarter_number, :integer, :null => false # 1, 2, 3 or 4
8
+ t.column :calendar_month_name, :string, :null => false, :limit => 9 # January, February, etc.
9
+ t.column :calendar_month_number, :integer, :null => false # 1, 2, 3, ... 12
10
+ t.column :calendar_week, :string, :null => false, :limit => 2 # 1, 2, 3, ... 52
11
+ t.column :calendar_week_number, :integer, :null => false # 1, 2, 3, ... 52
12
+ t.column :day_number_in_calendar_year, :integer, :null => false # 1, 2, 3, ... 365
13
+ t.column :day_number_in_calendar_month, :integer, :null => false # 1, 2, 3, ... 31
14
+ t.column :day_in_week, :string, :null => false, :limit => 9 # Monday, Tuesday, etc.
15
+ <% if include_fiscal_year -%>
16
+ t.column :fiscal_year, :string, :null => false
17
+ t.column :fiscal_quarter, :string, :null => false, :limit => 2
18
+ t.column :fiscal_quarter_number, :integer, :null => false
19
+ t.column :fiscal_month_number, :integer, :null => false
20
+ t.column :fiscal_week, :string, :null => false, :limit => 2
21
+ t.column :fiscal_week_number, :integer, :null => false
22
+ t.column :day_number_in_fiscal_year, :integer, :null => false
23
+ <% end -%>
24
+ end
25
+ # add indexes as required
26
+ end
27
+
28
+ def self.down
29
+ drop_table :<%= table_name %>
30
+ end
31
+ end
@@ -0,0 +1,3 @@
1
+ class <%= class_name %> < ActiveWarehouse::DateDimension
2
+
3
+ end
@@ -0,0 +1,8 @@
1
+ require File.dirname(__FILE__) + '<%= '/..' * class_nesting_depth %>/../test_helper'
2
+
3
+ class <%= class_name %>Test < Test::Unit::TestCase
4
+ # Replace this with your real tests.
5
+ def test_truth
6
+ assert true
7
+ end
8
+ end
@@ -1,16 +1,7 @@
1
1
  class <%= migration_name %> < ActiveRecord::Migration
2
2
  def self.up
3
- fields = {
4
- # Add dimension attributes here as name => type
5
- # Example: :store_name => :string
6
- }
7
3
  create_table :<%= table_name %> do |t|
8
- fields.each do |name,type|
9
- t.column name, type
10
- end
11
- end
12
- fields.each do |name,type|
13
- add_index :<%= table_name %>, name unless type == :text
4
+
14
5
  end
15
6
  end
16
7
 
@@ -21,7 +21,7 @@ class DimensionViewGenerator < Rails::Generator::NamedBase
21
21
  # define the query target class and expose its columns as attributes for the view
22
22
  @query_target_class = @query_target_class_name.constantize
23
23
  @view_attributes = @query_target_class.column_names
24
- @view_query = "select * from #{query_target_table_name}"
24
+ @view_query = "select #{@view_attributes.join(',')} from #{query_target_table_name}"
25
25
  end
26
26
 
27
27
  def manifest
@@ -37,7 +37,7 @@ class DimensionViewGenerator < Rails::Generator::NamedBase
37
37
  # Generate the files
38
38
  m.template 'model.rb', File.join('app/models', class_path, "#{file_name}.rb")
39
39
  m.template 'unit_test.rb', File.join('test/unit', class_path, "#{file_name}_test.rb")
40
- m.template 'fixture.yml', File.join('test/fixtures', class_path, "#{table_name}.yml")
40
+ #m.template 'fixture.yml', File.join('test/fixtures', class_path, "#{table_name}.yml")
41
41
 
42
42
  # Generate the migration unless :skip_migration option is specified
43
43
  unless options[:skip_migration]
@@ -1,7 +1,13 @@
1
1
  class <%= migration_name %> < ActiveRecord::Migration
2
2
  def self.up
3
- create_view "<%= view_name %>", <%= view_query do |t|
4
- <%= view_attributes %>
3
+ create_view "<%= view_name %>", "<%= view_query %>" do |t|
4
+ <%- view_attributes.each do |view_attribute| -%>
5
+ <%- if view_attribute == 'id' -%>
6
+ t.column :id
7
+ <%- else -%>
8
+ t.column :<%= name %>_<%= view_attribute %>
9
+ <%- end -%>
10
+ <%- end -%>
5
11
  end
6
12
  end
7
13
 
@@ -3,6 +3,8 @@ class <%= migration_name %> < ActiveRecord::Migration
3
3
  create_table :<%= table_name %> do |t|
4
4
 
5
5
  end
6
+ # you should add indexes for each foreign key, but don't add
7
+ # the foreign key itself unless you really know what you are doing.
6
8
  end
7
9
 
8
10
  def self.down
@@ -0,0 +1 @@
1
+ ./script/generate time_dimension
@@ -0,0 +1,5 @@
1
+ # Read about fixtures at http://ar.rubyonrails.org/classes/Fixtures.html
2
+ first:
3
+ id: 1
4
+ another:
5
+ id: 2
@@ -0,0 +1,12 @@
1
+ class <%= migration_name %> < ActiveRecord::Migration
2
+ def self.up
3
+ create_table :<%= table_name %> do |t|
4
+ t.column :hour_of_day, :integer, :null => false
5
+ t.column :minute_of_hour, :integer, :null => false
6
+ end
7
+ end
8
+
9
+ def self.down
10
+ drop_table :<%= table_name %>
11
+ end
12
+ end
@@ -0,0 +1,3 @@
1
+ class <%= class_name %> < ActiveWarehouse::Dimension
2
+
3
+ end
@@ -0,0 +1,8 @@
1
+ require File.dirname(__FILE__) + '<%= '/..' * class_nesting_depth %>/../test_helper'
2
+
3
+ class <%= class_name %>Test < Test::Unit::TestCase
4
+ # Replace this with your real tests.
5
+ def test_truth
6
+ assert true
7
+ end
8
+ end
@@ -0,0 +1,14 @@
1
+ class TimeDimensionGenerator < DimensionGenerator
2
+ attr_accessor :file_name
3
+
4
+ default_options :skip_migration => false
5
+
6
+ def initialize(runtime_args, runtime_options = {})
7
+ super
8
+
9
+ @name = 'date'
10
+ @table_name = "#{@name}_dimension"
11
+ @class_name = "#{@name.camelize}Dimension"
12
+ @file_name = "#{@class_name.tableize.singularize}"
13
+ end
14
+ end
@@ -35,7 +35,7 @@ end
35
35
 
36
36
  unless defined?(ActiveSupport)
37
37
  begin
38
- $:.unshift(File.dirname(__FILE__) + "/../../activesupport/lib")
38
+ $:.unshift(File.dirname(__FILE__) + "/../../activesupport/lib")
39
39
  require 'active_support'
40
40
  rescue LoadError
41
41
  gem 'activesupport'
@@ -62,18 +62,29 @@ unless defined?(ActionView)
62
62
  end
63
63
  end
64
64
 
65
+ require 'fastercsv'
66
+ require 'fileutils'
67
+ require 'adapter_extensions'
68
+
65
69
  # Require 1.1.6 compatibility code if necessary
66
70
  require 'active_record/version'
67
71
  if ActiveRecord::VERSION::MAJOR < 1 || ActiveRecord::VERSION::MINOR < 15
68
72
  require 'active_warehouse/compat/compat'
69
73
  end
70
74
 
75
+ require 'active_warehouse/ordered_hash'
76
+ require 'active_warehouse/field'
77
+ require 'active_warehouse/aggregate_field'
78
+ require 'active_warehouse/calculated_field'
71
79
  require 'active_warehouse/version'
72
80
  require 'active_warehouse/core_ext'
73
- require 'active_warehouse/aggregate'
81
+ require 'active_warehouse/prejoin_fact'
74
82
  require 'active_warehouse/fact'
83
+ require 'active_warehouse/bridge'
75
84
  require 'active_warehouse/dimension'
76
85
  require 'active_warehouse/cube'
86
+ require 'active_warehouse/cube_query_result'
87
+ require 'active_warehouse/aggregate'
77
88
  require 'active_warehouse/report'
78
89
  require 'active_warehouse/view'
79
90
  require 'active_warehouse/builder'
@@ -1,260 +1,61 @@
1
- module ActiveWarehouse #:nodoc:
2
- # An aggreate within a cube used to store calculated values. Each aggregate will contain values for a dimension pair,
3
- # down each of the dimension hierarchies
4
- class Aggregate < ActiveRecord::Base
5
- class << self
6
- attr_accessor :name, :cube, :dimension1, :dimension2, :dimension1_hierarchy_name, :dimension2_hierarchy_name
7
-
8
- # Get the table name for the aggregate
9
- def table_name
10
- name = self.name.demodulize.underscore
11
- set_table_name(name)
12
- name
13
- end
14
-
15
- # Returns the aggregate ID
16
- def aggregate_id
17
- table_name =~ /(\d+)$/
18
- $1.to_i
19
- end
20
-
21
- # Returns the AggregateMetaData instance associated with this aggregate
22
- def meta_data
23
- AggregateMetaData.find(aggregate_id)
24
- end
25
-
26
- # Return true if the aggregate needs to be rebuilt
27
- def needs_rebuild?(last_build=nil)
28
- return true if meta_data.populated_at.nil?
29
- return true if last_build && (meta_data.populated_at < last_build)
30
- return false
31
- end
32
-
33
- # Return a key for the aggregate
34
- def key(dimension1, dimension1_hierarchy, dimension2, dimension2_hierarchy)
35
- AggregateKey.new(dimension1, dimension1_hierarchy, dimension2, dimension2_hierarchy)
36
- end
37
-
38
- # Create the aggregate table if required. Set force option to true to force creation of the table
39
- # if it already exists
40
- def create_storage_table(force=false)
41
- connection.drop_table(table_name) if force and table_exists?
42
- if !table_exists?
43
- connection.create_table(table_name, :id => false) do |t|
44
- t.column :dimension1_path, :string
45
- t.column :dimension1_stage, :integer
46
- t.column :dimension2_path, :string
47
- t.column :dimension2_stage, :integer
48
- cube.fact_class.aggregate_fields.each do |field|
49
- #options = cube.fact_class.aggregate_field_options[field]
50
- col = cube.fact_class.columns_hash[field.to_s]
51
- t.column field, col.type if col
52
- end
53
- end
54
- connection.add_index(table_name, :dimension1_path)
55
- connection.add_index(table_name, :dimension1_stage)
56
- connection.add_index(table_name, :dimension2_path)
57
- connection.add_index(table_name, :dimension2_stage)
58
- end
59
- end
60
-
61
- # Populate the aggregate table
62
- def populate
63
- # create the storage table if necessary
64
- create_storage_table
65
-
66
- #puts "Populating aggregate table #{table_name}"
67
- # clear out the current data
68
- #connection.execute("TRUNCATE TABLE #{table_name}") #TODO: make this generic to support all databases
69
- delete_all
70
-
71
- $first = false
72
-
73
- # aggregate the data for the two dimensions
74
- fact_class = cube.fact_class
75
- dim1 = Dimension.class_name(dimension1).constantize
76
- dim2 = Dimension.class_name(dimension2).constantize
77
- dim1_stage_path = []
78
- dim1.hierarchy(meta_data.dimension1_hierarchy.to_sym).each_with_index do |dim1_stage_name, dim1_stage_level|
79
- dim1_stage_path << dim1_stage_name
80
- dim2_stage_path = []
81
- dim2.hierarchy(meta_data.dimension2_hierarchy.to_sym).each_with_index do |dim2_stage_name, dim2_stage_level|
82
- dim2_stage_path << dim2_stage_name
83
-
84
- stmt, fields = build_query(fact_class, dim1, dim1_stage_path, dim2, dim2_stage_path)
85
-
86
- puts "\nSTMT: #{stmt}" if $first
87
-
88
- # Get the facts and aggregate them
89
- # TODO: replace with select_all
90
- fact_class.connection.select_all(stmt).each do |row|
91
- require 'pp'
92
- pp row if $first
93
- dim1_value = []
94
- dim1_stage_path.each do |v|
95
- dim1_value << row["#{v}"]
96
- end
97
- dim2_value = []
98
- dim2_stage_path.each do |v|
99
- dim2_value << row["#{v}"]
100
- end
101
-
102
- agg_instance = new
103
- agg_instance.dimension1_path = dim1_value.join(':')
104
- agg_instance.dimension1_stage = dim1_stage_level
105
- agg_instance.dimension2_path = dim2_value.join(':')
106
- agg_instance.dimension2_stage = dim2_stage_level
107
-
108
- puts "DIM1_PATH: #{agg_instance.dimension1_path}" if $first
109
- puts "DIM2_PATH: #{agg_instance.dimension2_path}" if $first
110
-
111
-
112
- pp fields if $first
113
- fields.each do |field|
114
- # do the average here
115
- puts "setting field #{field}, value is #{row[field.to_s]}" if $first
116
- agg_instance.send("#{field}=".to_sym, row[field.to_s])
117
- end
118
- agg_instance.save!
119
-
120
- meta_data.update_attribute(:populated_at, Time.now)
121
-
122
- $first = false
123
- end
124
- end
125
- end
126
- end
127
-
128
- # Build the aggregation query for the given dimensions and stage paths
129
- def build_query(fact_class, dim1, dim1_stage_path, dim2, dim2_stage_path)
130
- dim1_group = dim1_stage_path.collect { |p| "d1.#{p}"}.join(", ")
131
- dim2_group = dim2_stage_path.collect { |p| "d2.#{p}"}.join(", ")
132
-
133
- # Set up the find options
134
- fact_find_options = {}
135
- fact_find_options[:group] = "#{dim1_group}, #{dim2_group}"
136
- fact_find_options[:joins] = "join #{dim1.table_name} d1 on f.#{dim1.foreign_key} = d1.id"
137
- fact_find_options[:joins] << " join #{dim2.table_name} d2 on f.#{dim2.foreign_key} = d2.id"
138
-
139
- # Build the 'select' part of the query
140
- # denominator = nil
141
- fields = []
142
- fact_select = ["#{dim1_group}, #{dim2_group}"]
143
- fact_class.aggregate_fields.each do |field_name|
144
- options = fact_class.aggregate_field_options[field_name]
145
- fields << field_name
146
-
147
- options[:type] ||= :sum
148
- case options[:type]
149
- when :sum
150
- fact_select << " sum(f.#{field_name}) as #{field_name}"
151
- when Hash
152
- if options[:type][dim1.sym] == :average && options[:type][dim2.sym] == :average
153
- # I believe this is a special case, but I'm not sure how yet. If both dimensions are defined
154
- # averages then perhaps that value cannot be calculated at all. TODO: research
155
- else
156
- fact_select << " sum(f.#{field_name}) as #{field_name}"
157
- end
158
- else
159
- raise "Unsupported aggregate type: #{options[:type]}"
160
- end
161
- end
162
- fact_find_options[:select] = fact_select.join(',')
1
+ # Source file which defines the ActiveWarehouse::Aggregate module and imports
2
+ # the aggregate implementations.
163
3
 
164
- # put the SQL statement together
165
- stmt = "select #{fact_find_options[:select]} from "
166
- stmt << "#{fact_class.table_name} f #{fact_find_options[:joins]} "
167
- stmt << "group by #{fact_find_options[:group]} "
168
-
169
- return stmt.strip, fields
170
- end
171
-
172
- end
173
-
174
- public
175
- # Clone and reset at the same time
176
- def clone_and_reset
177
- o = clone
178
- o.reset
179
- o
180
- end
181
-
182
- def non_data_fields
183
- ['dimension1_path','dimension1_stage','dimension2_path','dimension2_stage']
184
- end
185
-
186
- def data_fields
187
- fields = []
188
- self.class.columns.each do |column|
189
- unless non_data_fields.include?(column.name)
190
- fields << column.name
191
- end
192
- end
193
- fields
194
- end
195
-
196
- # Reset the aggregate
197
- def reset
198
- self.class.columns.each do |column|
199
- unless non_data_fields.include?(column.name)
200
- value = column.number? ? 0 : 'None'
201
- send("#{column.name}=".to_sym, value)
202
- end
203
- end
204
- end
205
- end
206
-
207
- # ActiveRecord object which stores meta data about the aggregate
208
- class AggregateMetaData < ActiveRecord::Base
209
- # Build the underlying table. Set force to true to force the build of the table
210
- def self.build_table(force=false)
211
- connection.drop_table(table_name) if force and table_exists?
212
- if !table_exists?
213
- connection.create_table(table_name) do |t|
214
- t.column :cube_name, :string
215
- t.column :dimension1, :string
216
- t.column :dimension1_hierarchy, :string
217
- t.column :dimension2, :string
218
- t.column :dimension2_hierarchy, :string
219
- t.column :created_at, :datetime
220
- t.column :populated_at, :datetime
4
+ module ActiveWarehouse #:nodoc:
5
+ # This module contains classes which handle aggregation of cube data using
6
+ # various algorithms
7
+ module Aggregate
8
+ # Base class for aggregate implementations
9
+ class Aggregate
10
+
11
+ # Reader for the cube class
12
+ attr_reader :cube_class
13
+
14
+ # Initialize the aggregate for the given cube class
15
+ def initialize(cube_class)
16
+ @cube_class = cube_class
17
+ end
18
+
19
+ protected
20
+ # Get the connection to use for SQL execution
21
+ def connection
22
+ cube_class.connection
23
+ end
24
+
25
+ # Convenience accessor to get the cube's fact class. Delegates to the
26
+ # cube class.
27
+ def fact_class
28
+ cube_class.fact_class
29
+ end
30
+
31
+ # Parse the query args and return an options hash.
32
+ def parse_query_args(*args)
33
+ options = {}
34
+ if args.length == 1
35
+ options = args[0]
36
+ elsif args.length >= 4
37
+ options[:column_dimension_name] = args[0]
38
+ options[:column_hierarchy_name] = args[1]
39
+ options[:row_dimension_name] = args[2]
40
+ options[:row_hierarchy_name] = args[3]
41
+ options[:conditions] = args[4] if args.length >= 5
42
+ options[:cstage] = args[5] if args.length >= 6
43
+ options[:rstage] = args[6] if args.length >= 7
44
+ options[:filters] = args[7] if args.length >= 8
45
+ options.merge!(args[8]) if args.length >= 9
46
+ else
47
+ raise ArgumentError, "The query method accepts either 1 Hash (new style) or 4 to 8 arguments (old style)"
221
48
  end
222
- connection.add_index table_name, :cube_name
49
+ options
223
50
  end
224
51
  end
225
- def key
226
- Aggregate.key(dimension1, dimension1_hierarchy, dimension2, dimension2_hierarchy)
227
- end
228
- end
229
-
230
- # Key for aggregate caching
231
- class AggregateKey
232
- attr_reader :dimension1, :dimension1_hierarchy, :dimension2, :dimension2_hierarchy
233
-
234
- def initialize(dimension1, dimension1_hierarchy, dimension2, dimension2_hierarchy)
235
- @dimension1 = dimension1
236
- @dimension1_hierarchy = dimension1_hierarchy
237
- @dimension2 = dimension2
238
- @dimension2_hierarchy = dimension2_hierarchy
239
- end
240
-
241
- def ==(o)
242
- o.instance_of?(self.class) and (o.to_s == to_s or o.to_s = to_rs)
243
- end
244
-
245
- def hash
246
- to_s.hash
247
- end
248
-
249
- def to_s
250
- "#{@dimension1}.#{@dimension1_hierarchy}.#{@dimension2}.#{@dimension2_hierarchy}"
251
- end
252
-
253
- # Return the "reveresed" version of this key String representation
254
- def to_rs
255
- "#{@dimension2}.#{@dimension2_hierarchy}.#{@dimension1}.#{@dimension1_hierarchy}"
256
- end
257
52
  end
258
53
  end
259
54
 
260
- ActiveWarehouse::AggregateMetaData.build_table
55
+ require 'active_warehouse/aggregate/no_aggregate'
56
+ require 'active_warehouse/aggregate/rolap_common'
57
+ require 'active_warehouse/aggregate/pipelined_rolap_aggregate'
58
+ require 'active_warehouse/aggregate/rolap_aggregate'
59
+ require 'active_warehouse/aggregate/dwarf_common'
60
+ require 'active_warehouse/aggregate/dwarf_aggregate'
61
+ require 'active_warehouse/aggregate/pid_aggregate'