activewarehouse 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/README +27 -14
  2. data/Rakefile +16 -5
  3. data/doc/references.txt +4 -0
  4. data/generators/bridge/templates/migration.rb +9 -2
  5. data/generators/bridge/templates/unit_test.rb +8 -0
  6. data/generators/date_dimension/USAGE +1 -0
  7. data/generators/date_dimension/date_dimension_generator.rb +16 -0
  8. data/generators/date_dimension/templates/fixture.yml +5 -0
  9. data/generators/date_dimension/templates/migration.rb +31 -0
  10. data/generators/date_dimension/templates/model.rb +3 -0
  11. data/generators/date_dimension/templates/unit_test.rb +8 -0
  12. data/generators/dimension/templates/migration.rb +1 -10
  13. data/generators/dimension_view/dimension_view_generator.rb +2 -2
  14. data/generators/dimension_view/templates/migration.rb +8 -2
  15. data/generators/fact/templates/migration.rb +2 -0
  16. data/generators/time_dimension/USAGE +1 -0
  17. data/generators/time_dimension/templates/fixture.yml +5 -0
  18. data/generators/time_dimension/templates/migration.rb +12 -0
  19. data/generators/time_dimension/templates/model.rb +3 -0
  20. data/generators/time_dimension/templates/unit_test.rb +8 -0
  21. data/generators/time_dimension/time_dimension_generator.rb +14 -0
  22. data/lib/active_warehouse.rb +13 -2
  23. data/lib/active_warehouse/aggregate.rb +54 -253
  24. data/lib/active_warehouse/aggregate/dwarf/node.rb +36 -0
  25. data/lib/active_warehouse/aggregate/dwarf_aggregate.rb +369 -0
  26. data/lib/active_warehouse/aggregate/dwarf_common.rb +44 -0
  27. data/lib/active_warehouse/aggregate/dwarf_printer.rb +34 -0
  28. data/lib/active_warehouse/aggregate/no_aggregate.rb +194 -0
  29. data/lib/active_warehouse/aggregate/pid_aggregate.rb +29 -0
  30. data/lib/active_warehouse/aggregate/pipelined_rolap_aggregate.rb +129 -0
  31. data/lib/active_warehouse/aggregate/rolap_aggregate.rb +181 -0
  32. data/lib/active_warehouse/aggregate/rolap_common.rb +89 -0
  33. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_1.sql +12 -0
  34. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_10.sql +7166 -0
  35. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_11.sql +14334 -0
  36. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_12.sql +28670 -0
  37. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_13.sql +57342 -0
  38. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_2.sql +26 -0
  39. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_3.sql +54 -0
  40. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_4.sql +110 -0
  41. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_5.sql +222 -0
  42. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_6.sql +446 -0
  43. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_7.sql +894 -0
  44. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_8.sql +1790 -0
  45. data/lib/active_warehouse/aggregate/templates/pipelined_rollup_9.sql +3582 -0
  46. data/lib/active_warehouse/aggregate_field.rb +49 -0
  47. data/lib/active_warehouse/{dimension/bridge.rb → bridge.rb} +7 -3
  48. data/lib/active_warehouse/bridge/hierarchy_bridge.rb +46 -0
  49. data/lib/active_warehouse/builder.rb +2 -1
  50. data/lib/active_warehouse/builder/date_dimension_builder.rb +5 -2
  51. data/lib/active_warehouse/builder/generator/generator.rb +13 -0
  52. data/lib/active_warehouse/builder/generator/name_generator.rb +20 -0
  53. data/lib/active_warehouse/builder/generator/paragraph_generator.rb +11 -0
  54. data/lib/active_warehouse/builder/random_data_builder.rb +21 -11
  55. data/lib/active_warehouse/builder/test_data_builder.rb +54 -0
  56. data/lib/active_warehouse/calculated_field.rb +27 -0
  57. data/lib/active_warehouse/compat/compat.rb +4 -4
  58. data/lib/active_warehouse/cube.rb +126 -225
  59. data/lib/active_warehouse/cube_query_result.rb +69 -0
  60. data/lib/active_warehouse/dimension.rb +64 -29
  61. data/lib/active_warehouse/dimension/date_dimension.rb +15 -0
  62. data/lib/active_warehouse/dimension/dimension_reflection.rb +21 -0
  63. data/lib/active_warehouse/dimension/dimension_view.rb +17 -2
  64. data/lib/active_warehouse/dimension/hierarchical_dimension.rb +43 -5
  65. data/lib/active_warehouse/dimension/slowly_changing_dimension.rb +22 -12
  66. data/lib/active_warehouse/fact.rb +119 -40
  67. data/lib/active_warehouse/field.rb +74 -0
  68. data/lib/active_warehouse/ordered_hash.rb +34 -0
  69. data/lib/active_warehouse/prejoin_fact.rb +97 -0
  70. data/lib/active_warehouse/report/abstract_report.rb +40 -14
  71. data/lib/active_warehouse/report/chart_report.rb +3 -3
  72. data/lib/active_warehouse/report/table_report.rb +8 -3
  73. data/lib/active_warehouse/version.rb +1 -1
  74. data/lib/active_warehouse/view/report_helper.rb +144 -34
  75. data/tasks/active_warehouse_tasks.rake +28 -10
  76. metadata +107 -30
data/README CHANGED
@@ -1,6 +1,8 @@
1
1
  == ActiveWarehouse
2
2
 
3
- The ActiveWarehouse library provides classes and functions which help with building Data Warehouses using Rails. It can be installed either as a plugin or as a Gem.
3
+ The ActiveWarehouse library provides classes and functions which help with
4
+ building Data Warehouses using Rails. It can be installed either as a plugin
5
+ or as a Gem.
4
6
 
5
7
  To install as a plugin just use:
6
8
 
@@ -11,19 +13,19 @@ To get the latest edge version.
11
13
  To install as a Gem, use:
12
14
 
13
15
  gem install activewarehouse
14
-
15
- At this point you can use ActiveWarehouse in any application using:
16
16
 
17
- require_gem 'activewarehouse'
18
- require 'active_warehouse'
19
-
20
- If you want to use the Gem and link to your Rails app then use the gemsonrails project:
17
+ On *nix you will need to run this command as root or better yet, using sudo.
18
+
19
+ Next, you will need to freeze or link the Gem to your Rails app. I prefer using
20
+ the gemsonrails project:
21
21
 
22
22
  gem install gemsonrails
23
23
 
24
24
  And then in your Rails app:
25
25
 
26
26
  rake gems:link GEM=activewarehouse
27
+
28
+ It is possible that freezing the Gem to the Rails app may not work at all times. It is most often best to install as a plugin.
27
29
 
28
30
  == Generators
29
31
 
@@ -52,12 +54,14 @@ ActiveWarehouse comes with several generators
52
54
  script/generate dimension_view OrderDate Date
53
55
  script/generate dimension_view order_date date
54
56
 
55
- Creates an OrderDateDimension class which is represented by a view on top of the DateDimension.
57
+ Creates an OrderDateDimension class which is represented by a view on top
58
+ of the DateDimension.
56
59
 
57
60
  The rules for naming are as follows:
58
61
 
59
62
  Facts:
60
- Fact classes and tables follow the typical Rails rules: classes are singular and tables are pluralized.
63
+ Fact classes and tables follow the typical Rails rules: classes are singular
64
+ and tables are pluralized.
61
65
  Both the class and table name are suffixed by "_fact".
62
66
  Dimensions:
63
67
  Dimension classes and tables are both singular.
@@ -68,19 +72,28 @@ Bridge:
68
72
  Bridge classes and tables are both singular.
69
73
  Both the class name and the table name are suffixed by "_bridge".
70
74
  Dimension View:
71
- Dimension View classes are singular. The underlying data structure is a view on top of an existing dimension.
75
+ Dimension View classes are singular. The underlying data structure is a view
76
+ on top of an existing dimension.
72
77
  Both the class name and the view name are suffixed by "_dimension"
73
78
 
74
79
  == ETL
75
80
 
76
- The ActiveWarehouse plugin does not directly handle Extract-Transform-Load processes, however the ActiveWarehouse ETL gem (installed separately) can help. To install it use:
81
+ The ActiveWarehouse plugin does not directly handle Extract-Transform-Load
82
+ processes, however the ActiveWarehouse ETL gem (installed separately) can help.
83
+ To install it use:
77
84
 
78
85
  gem install activewarehouse-etl
86
+
87
+ Once again you should run this command as root or using sudo.
79
88
 
80
- More information on the ETL process can be found at http://activewarehouse.rubyforge.org/etl
89
+ More information on the ETL process can be found at
90
+ http://activewarehouse.rubyforge.org/etl
81
91
 
82
92
  == Tutorial
83
93
 
84
- A tutorial for ActiveWarehouse is available online at http://anthonyeden.com/2006/12/20/activewarehouse-example-with-rails-svn-logs
94
+ A tutorial for ActiveWarehouse is available online at
95
+ http://anthonyeden.com/2006/12/20/activewarehouse-example-with-rails-svn-logs
96
+ (Note that is is out of date.)
85
97
 
86
- You can also get a demo from the ActiveWarehouse subversion repository. Look in the SVN_ROOT/demo directory.
98
+ You can also get a demo from the ActiveWarehouse subversion repository. Look in
99
+ the SVN_ROOT/demo directory.
data/Rakefile CHANGED
@@ -68,11 +68,13 @@ spec = Gem::Specification.new do |s|
68
68
  ActiveWarehouse extends Rails to provide functionality specific for building data warehouses.
69
69
  EOF
70
70
 
71
- s.add_dependency('rake', '>= 0.7.1')
72
- s.add_dependency('activesupport', '>= 1.3.1.5618')
73
- s.add_dependency('activerecord', '>= 1.14.4.5618')
74
- s.add_dependency('actionpack', '>= 1.12.5.5618')
75
- s.add_dependency('rails_sql_views', '>= 0.1.0')
71
+ s.add_dependency('rake', '>= 0.7.1')
72
+ s.add_dependency('fastercsv', '>= 1.1.0')
73
+ s.add_dependency('activesupport', '>= 1.3.1')
74
+ s.add_dependency('activerecord', '>= 1.14.4')
75
+ s.add_dependency('actionpack', '>= 1.12.5')
76
+ s.add_dependency('rails_sql_views', '>= 0.1.0')
77
+ s.add_dependency('adapter_extensions', '>= 0.1.0')
76
78
 
77
79
  s.rdoc_options << '--exclude' << '.'
78
80
  s.has_rdoc = false
@@ -135,4 +137,13 @@ end
135
137
  desc "Publish the API documentation"
136
138
  task :pdoc => [:rdoc] do
137
139
  Rake::SshDirPublisher.new("aeden@rubyforge.org", "/var/www/gforge-projects/activewarehouse/rdoc", "rdoc").upload
140
+ end
141
+
142
+ desc "Reinstall the gem from a local package copy"
143
+ task :reinstall => [:package] do
144
+ windows = RUBY_PLATFORM =~ /mswin/
145
+ sudo = windows ? '' : 'sudo'
146
+ gem = windows ? 'gem.bat' : 'gem'
147
+ `#{sudo} #{gem} uninstall -x -i #{PKG_NAME}`
148
+ `#{sudo} #{gem} install pkg/#{PKG_NAME}-#{PKG_VERSION}`
138
149
  end
@@ -0,0 +1,4 @@
1
+ The following papers are relevant to building, storing and querying data cubes with large databases.
2
+
3
+ http://research.microsoft.com/research/pubs/view.aspx?msr_tr_id=MSR-TR-95-22
4
+ http://dbpubs.stanford.edu/pub/showDoc.Fulltext?lang=en&doc=1995-34&format=pdf&compression=&name=1995-34.pdf
@@ -1,8 +1,14 @@
1
1
  class <%= migration_name %> < ActiveRecord::Migration
2
2
  def self.up
3
3
  fields = {
4
- # Add bridge attributes here as name => type
5
- # Example: :top_flag => :integer
4
+ # the following are the required bridge table columns for
5
+ # variable depth hierarchies. Do not change them unless you know
6
+ # what you are doing.
7
+ :parent_id => :integer,
8
+ :child_id => :integer,
9
+ :num_levels_from_parent => :integer,
10
+ :is_bottom => :boolean,
11
+ :is_top => :boolean
6
12
  }
7
13
  create_table :<%= table_name %> do |t|
8
14
  fields.each do |name,type|
@@ -12,6 +18,7 @@ class <%= migration_name %> < ActiveRecord::Migration
12
18
  fields.each do |name,type|
13
19
  add_index :<%= table_name %>, name unless type == :text
14
20
  end
21
+ add_index :<%= table_name %>, [:parent_id, :child_id, :num_levels_from_parent], :unique => true
15
22
  end
16
23
 
17
24
  def self.down
@@ -0,0 +1,8 @@
1
+ require File.dirname(__FILE__) + '<%= '/..' * class_nesting_depth %>/../test_helper'
2
+
3
+ class <%= class_name %>Test < Test::Unit::TestCase
4
+ # Replace this with your real tests.
5
+ def test_truth
6
+ assert true
7
+ end
8
+ end
@@ -0,0 +1 @@
1
+ ./script/generate date_dimension
@@ -0,0 +1,16 @@
1
+ class DateDimensionGenerator < DimensionGenerator
2
+ attr_accessor :file_name
3
+ attr_accessor :include_fiscal_year
4
+
5
+ default_options :skip_migration => false
6
+
7
+ def initialize(runtime_args, runtime_options = {})
8
+ super
9
+
10
+ @name = 'date'
11
+ @table_name = "#{@name}_dimension"
12
+ @class_name = "#{@name.camelize}Dimension"
13
+ @file_name = "#{@class_name.tableize.singularize}"
14
+ @include_fiscal_year = true # TODO: accept a runtime option to set this
15
+ end
16
+ end
@@ -0,0 +1,5 @@
1
+ # Read about fixtures at http://ar.rubyonrails.org/classes/Fixtures.html
2
+ first:
3
+ id: 1
4
+ another:
5
+ id: 2
@@ -0,0 +1,31 @@
1
+ class <%= migration_name %> < ActiveRecord::Migration
2
+ def self.up
3
+ create_table :<%= table_name %> do |t|
4
+ t.column :sql_date, :date, :null => false # SQL Date object
5
+ t.column :calendar_year, :string, :null => false # 2005, 2006, 2007, etc.
6
+ t.column :calendar_quarter, :string, :null => false, :limit => 2 # Q1, Q2, Q3 or Q4
7
+ t.column :calendar_quarter_number, :integer, :null => false # 1, 2, 3 or 4
8
+ t.column :calendar_month_name, :string, :null => false, :limit => 9 # January, February, etc.
9
+ t.column :calendar_month_number, :integer, :null => false # 1, 2, 3, ... 12
10
+ t.column :calendar_week, :string, :null => false, :limit => 2 # 1, 2, 3, ... 52
11
+ t.column :calendar_week_number, :integer, :null => false # 1, 2, 3, ... 52
12
+ t.column :day_number_in_calendar_year, :integer, :null => false # 1, 2, 3, ... 365
13
+ t.column :day_number_in_calendar_month, :integer, :null => false # 1, 2, 3, ... 31
14
+ t.column :day_in_week, :string, :null => false, :limit => 9 # Monday, Tuesday, etc.
15
+ <% if include_fiscal_year -%>
16
+ t.column :fiscal_year, :string, :null => false
17
+ t.column :fiscal_quarter, :string, :null => false, :limit => 2
18
+ t.column :fiscal_quarter_number, :integer, :null => false
19
+ t.column :fiscal_month_number, :integer, :null => false
20
+ t.column :fiscal_week, :string, :null => false, :limit => 2
21
+ t.column :fiscal_week_number, :integer, :null => false
22
+ t.column :day_number_in_fiscal_year, :integer, :null => false
23
+ <% end -%>
24
+ end
25
+ # add indexes as required
26
+ end
27
+
28
+ def self.down
29
+ drop_table :<%= table_name %>
30
+ end
31
+ end
@@ -0,0 +1,3 @@
1
+ class <%= class_name %> < ActiveWarehouse::DateDimension
2
+
3
+ end
@@ -0,0 +1,8 @@
1
+ require File.dirname(__FILE__) + '<%= '/..' * class_nesting_depth %>/../test_helper'
2
+
3
+ class <%= class_name %>Test < Test::Unit::TestCase
4
+ # Replace this with your real tests.
5
+ def test_truth
6
+ assert true
7
+ end
8
+ end
@@ -1,16 +1,7 @@
1
1
  class <%= migration_name %> < ActiveRecord::Migration
2
2
  def self.up
3
- fields = {
4
- # Add dimension attributes here as name => type
5
- # Example: :store_name => :string
6
- }
7
3
  create_table :<%= table_name %> do |t|
8
- fields.each do |name,type|
9
- t.column name, type
10
- end
11
- end
12
- fields.each do |name,type|
13
- add_index :<%= table_name %>, name unless type == :text
4
+
14
5
  end
15
6
  end
16
7
 
@@ -21,7 +21,7 @@ class DimensionViewGenerator < Rails::Generator::NamedBase
21
21
  # define the query target class and expose its columns as attributes for the view
22
22
  @query_target_class = @query_target_class_name.constantize
23
23
  @view_attributes = @query_target_class.column_names
24
- @view_query = "select * from #{query_target_table_name}"
24
+ @view_query = "select #{@view_attributes.join(',')} from #{query_target_table_name}"
25
25
  end
26
26
 
27
27
  def manifest
@@ -37,7 +37,7 @@ class DimensionViewGenerator < Rails::Generator::NamedBase
37
37
  # Generate the files
38
38
  m.template 'model.rb', File.join('app/models', class_path, "#{file_name}.rb")
39
39
  m.template 'unit_test.rb', File.join('test/unit', class_path, "#{file_name}_test.rb")
40
- m.template 'fixture.yml', File.join('test/fixtures', class_path, "#{table_name}.yml")
40
+ #m.template 'fixture.yml', File.join('test/fixtures', class_path, "#{table_name}.yml")
41
41
 
42
42
  # Generate the migration unless :skip_migration option is specified
43
43
  unless options[:skip_migration]
@@ -1,7 +1,13 @@
1
1
  class <%= migration_name %> < ActiveRecord::Migration
2
2
  def self.up
3
- create_view "<%= view_name %>", <%= view_query do |t|
4
- <%= view_attributes %>
3
+ create_view "<%= view_name %>", "<%= view_query %>" do |t|
4
+ <%- view_attributes.each do |view_attribute| -%>
5
+ <%- if view_attribute == 'id' -%>
6
+ t.column :id
7
+ <%- else -%>
8
+ t.column :<%= name %>_<%= view_attribute %>
9
+ <%- end -%>
10
+ <%- end -%>
5
11
  end
6
12
  end
7
13
 
@@ -3,6 +3,8 @@ class <%= migration_name %> < ActiveRecord::Migration
3
3
  create_table :<%= table_name %> do |t|
4
4
 
5
5
  end
6
+ # you should add indexes for each foreign key, but don't add
7
+ # the foreign key itself unless you really know what you are doing.
6
8
  end
7
9
 
8
10
  def self.down
@@ -0,0 +1 @@
1
+ ./script/generate time_dimension
@@ -0,0 +1,5 @@
1
+ # Read about fixtures at http://ar.rubyonrails.org/classes/Fixtures.html
2
+ first:
3
+ id: 1
4
+ another:
5
+ id: 2
@@ -0,0 +1,12 @@
1
+ class <%= migration_name %> < ActiveRecord::Migration
2
+ def self.up
3
+ create_table :<%= table_name %> do |t|
4
+ t.column :hour_of_day, :integer, :null => false
5
+ t.column :minute_of_hour, :integer, :null => false
6
+ end
7
+ end
8
+
9
+ def self.down
10
+ drop_table :<%= table_name %>
11
+ end
12
+ end
@@ -0,0 +1,3 @@
1
+ class <%= class_name %> < ActiveWarehouse::Dimension
2
+
3
+ end
@@ -0,0 +1,8 @@
1
+ require File.dirname(__FILE__) + '<%= '/..' * class_nesting_depth %>/../test_helper'
2
+
3
+ class <%= class_name %>Test < Test::Unit::TestCase
4
+ # Replace this with your real tests.
5
+ def test_truth
6
+ assert true
7
+ end
8
+ end
@@ -0,0 +1,14 @@
1
+ class TimeDimensionGenerator < DimensionGenerator
2
+ attr_accessor :file_name
3
+
4
+ default_options :skip_migration => false
5
+
6
+ def initialize(runtime_args, runtime_options = {})
7
+ super
8
+
9
+ @name = 'date'
10
+ @table_name = "#{@name}_dimension"
11
+ @class_name = "#{@name.camelize}Dimension"
12
+ @file_name = "#{@class_name.tableize.singularize}"
13
+ end
14
+ end
@@ -35,7 +35,7 @@ end
35
35
 
36
36
  unless defined?(ActiveSupport)
37
37
  begin
38
- $:.unshift(File.dirname(__FILE__) + "/../../activesupport/lib")
38
+ $:.unshift(File.dirname(__FILE__) + "/../../activesupport/lib")
39
39
  require 'active_support'
40
40
  rescue LoadError
41
41
  gem 'activesupport'
@@ -62,18 +62,29 @@ unless defined?(ActionView)
62
62
  end
63
63
  end
64
64
 
65
+ require 'fastercsv'
66
+ require 'fileutils'
67
+ require 'adapter_extensions'
68
+
65
69
  # Require 1.1.6 compatibility code if necessary
66
70
  require 'active_record/version'
67
71
  if ActiveRecord::VERSION::MAJOR < 1 || ActiveRecord::VERSION::MINOR < 15
68
72
  require 'active_warehouse/compat/compat'
69
73
  end
70
74
 
75
+ require 'active_warehouse/ordered_hash'
76
+ require 'active_warehouse/field'
77
+ require 'active_warehouse/aggregate_field'
78
+ require 'active_warehouse/calculated_field'
71
79
  require 'active_warehouse/version'
72
80
  require 'active_warehouse/core_ext'
73
- require 'active_warehouse/aggregate'
81
+ require 'active_warehouse/prejoin_fact'
74
82
  require 'active_warehouse/fact'
83
+ require 'active_warehouse/bridge'
75
84
  require 'active_warehouse/dimension'
76
85
  require 'active_warehouse/cube'
86
+ require 'active_warehouse/cube_query_result'
87
+ require 'active_warehouse/aggregate'
77
88
  require 'active_warehouse/report'
78
89
  require 'active_warehouse/view'
79
90
  require 'active_warehouse/builder'
@@ -1,260 +1,61 @@
1
- module ActiveWarehouse #:nodoc:
2
- # An aggreate within a cube used to store calculated values. Each aggregate will contain values for a dimension pair,
3
- # down each of the dimension hierarchies
4
- class Aggregate < ActiveRecord::Base
5
- class << self
6
- attr_accessor :name, :cube, :dimension1, :dimension2, :dimension1_hierarchy_name, :dimension2_hierarchy_name
7
-
8
- # Get the table name for the aggregate
9
- def table_name
10
- name = self.name.demodulize.underscore
11
- set_table_name(name)
12
- name
13
- end
14
-
15
- # Returns the aggregate ID
16
- def aggregate_id
17
- table_name =~ /(\d+)$/
18
- $1.to_i
19
- end
20
-
21
- # Returns the AggregateMetaData instance associated with this aggregate
22
- def meta_data
23
- AggregateMetaData.find(aggregate_id)
24
- end
25
-
26
- # Return true if the aggregate needs to be rebuilt
27
- def needs_rebuild?(last_build=nil)
28
- return true if meta_data.populated_at.nil?
29
- return true if last_build && (meta_data.populated_at < last_build)
30
- return false
31
- end
32
-
33
- # Return a key for the aggregate
34
- def key(dimension1, dimension1_hierarchy, dimension2, dimension2_hierarchy)
35
- AggregateKey.new(dimension1, dimension1_hierarchy, dimension2, dimension2_hierarchy)
36
- end
37
-
38
- # Create the aggregate table if required. Set force option to true to force creation of the table
39
- # if it already exists
40
- def create_storage_table(force=false)
41
- connection.drop_table(table_name) if force and table_exists?
42
- if !table_exists?
43
- connection.create_table(table_name, :id => false) do |t|
44
- t.column :dimension1_path, :string
45
- t.column :dimension1_stage, :integer
46
- t.column :dimension2_path, :string
47
- t.column :dimension2_stage, :integer
48
- cube.fact_class.aggregate_fields.each do |field|
49
- #options = cube.fact_class.aggregate_field_options[field]
50
- col = cube.fact_class.columns_hash[field.to_s]
51
- t.column field, col.type if col
52
- end
53
- end
54
- connection.add_index(table_name, :dimension1_path)
55
- connection.add_index(table_name, :dimension1_stage)
56
- connection.add_index(table_name, :dimension2_path)
57
- connection.add_index(table_name, :dimension2_stage)
58
- end
59
- end
60
-
61
- # Populate the aggregate table
62
- def populate
63
- # create the storage table if necessary
64
- create_storage_table
65
-
66
- #puts "Populating aggregate table #{table_name}"
67
- # clear out the current data
68
- #connection.execute("TRUNCATE TABLE #{table_name}") #TODO: make this generic to support all databases
69
- delete_all
70
-
71
- $first = false
72
-
73
- # aggregate the data for the two dimensions
74
- fact_class = cube.fact_class
75
- dim1 = Dimension.class_name(dimension1).constantize
76
- dim2 = Dimension.class_name(dimension2).constantize
77
- dim1_stage_path = []
78
- dim1.hierarchy(meta_data.dimension1_hierarchy.to_sym).each_with_index do |dim1_stage_name, dim1_stage_level|
79
- dim1_stage_path << dim1_stage_name
80
- dim2_stage_path = []
81
- dim2.hierarchy(meta_data.dimension2_hierarchy.to_sym).each_with_index do |dim2_stage_name, dim2_stage_level|
82
- dim2_stage_path << dim2_stage_name
83
-
84
- stmt, fields = build_query(fact_class, dim1, dim1_stage_path, dim2, dim2_stage_path)
85
-
86
- puts "\nSTMT: #{stmt}" if $first
87
-
88
- # Get the facts and aggregate them
89
- # TODO: replace with select_all
90
- fact_class.connection.select_all(stmt).each do |row|
91
- require 'pp'
92
- pp row if $first
93
- dim1_value = []
94
- dim1_stage_path.each do |v|
95
- dim1_value << row["#{v}"]
96
- end
97
- dim2_value = []
98
- dim2_stage_path.each do |v|
99
- dim2_value << row["#{v}"]
100
- end
101
-
102
- agg_instance = new
103
- agg_instance.dimension1_path = dim1_value.join(':')
104
- agg_instance.dimension1_stage = dim1_stage_level
105
- agg_instance.dimension2_path = dim2_value.join(':')
106
- agg_instance.dimension2_stage = dim2_stage_level
107
-
108
- puts "DIM1_PATH: #{agg_instance.dimension1_path}" if $first
109
- puts "DIM2_PATH: #{agg_instance.dimension2_path}" if $first
110
-
111
-
112
- pp fields if $first
113
- fields.each do |field|
114
- # do the average here
115
- puts "setting field #{field}, value is #{row[field.to_s]}" if $first
116
- agg_instance.send("#{field}=".to_sym, row[field.to_s])
117
- end
118
- agg_instance.save!
119
-
120
- meta_data.update_attribute(:populated_at, Time.now)
121
-
122
- $first = false
123
- end
124
- end
125
- end
126
- end
127
-
128
- # Build the aggregation query for the given dimensions and stage paths
129
- def build_query(fact_class, dim1, dim1_stage_path, dim2, dim2_stage_path)
130
- dim1_group = dim1_stage_path.collect { |p| "d1.#{p}"}.join(", ")
131
- dim2_group = dim2_stage_path.collect { |p| "d2.#{p}"}.join(", ")
132
-
133
- # Set up the find options
134
- fact_find_options = {}
135
- fact_find_options[:group] = "#{dim1_group}, #{dim2_group}"
136
- fact_find_options[:joins] = "join #{dim1.table_name} d1 on f.#{dim1.foreign_key} = d1.id"
137
- fact_find_options[:joins] << " join #{dim2.table_name} d2 on f.#{dim2.foreign_key} = d2.id"
138
-
139
- # Build the 'select' part of the query
140
- # denominator = nil
141
- fields = []
142
- fact_select = ["#{dim1_group}, #{dim2_group}"]
143
- fact_class.aggregate_fields.each do |field_name|
144
- options = fact_class.aggregate_field_options[field_name]
145
- fields << field_name
146
-
147
- options[:type] ||= :sum
148
- case options[:type]
149
- when :sum
150
- fact_select << " sum(f.#{field_name}) as #{field_name}"
151
- when Hash
152
- if options[:type][dim1.sym] == :average && options[:type][dim2.sym] == :average
153
- # I believe this is a special case, but I'm not sure how yet. If both dimensions are defined
154
- # averages then perhaps that value cannot be calculated at all. TODO: research
155
- else
156
- fact_select << " sum(f.#{field_name}) as #{field_name}"
157
- end
158
- else
159
- raise "Unsupported aggregate type: #{options[:type]}"
160
- end
161
- end
162
- fact_find_options[:select] = fact_select.join(',')
1
+ # Source file which defines the ActiveWarehouse::Aggregate module and imports
2
+ # the aggregate implementations.
163
3
 
164
- # put the SQL statement together
165
- stmt = "select #{fact_find_options[:select]} from "
166
- stmt << "#{fact_class.table_name} f #{fact_find_options[:joins]} "
167
- stmt << "group by #{fact_find_options[:group]} "
168
-
169
- return stmt.strip, fields
170
- end
171
-
172
- end
173
-
174
- public
175
- # Clone and reset at the same time
176
- def clone_and_reset
177
- o = clone
178
- o.reset
179
- o
180
- end
181
-
182
- def non_data_fields
183
- ['dimension1_path','dimension1_stage','dimension2_path','dimension2_stage']
184
- end
185
-
186
- def data_fields
187
- fields = []
188
- self.class.columns.each do |column|
189
- unless non_data_fields.include?(column.name)
190
- fields << column.name
191
- end
192
- end
193
- fields
194
- end
195
-
196
- # Reset the aggregate
197
- def reset
198
- self.class.columns.each do |column|
199
- unless non_data_fields.include?(column.name)
200
- value = column.number? ? 0 : 'None'
201
- send("#{column.name}=".to_sym, value)
202
- end
203
- end
204
- end
205
- end
206
-
207
- # ActiveRecord object which stores meta data about the aggregate
208
- class AggregateMetaData < ActiveRecord::Base
209
- # Build the underlying table. Set force to true to force the build of the table
210
- def self.build_table(force=false)
211
- connection.drop_table(table_name) if force and table_exists?
212
- if !table_exists?
213
- connection.create_table(table_name) do |t|
214
- t.column :cube_name, :string
215
- t.column :dimension1, :string
216
- t.column :dimension1_hierarchy, :string
217
- t.column :dimension2, :string
218
- t.column :dimension2_hierarchy, :string
219
- t.column :created_at, :datetime
220
- t.column :populated_at, :datetime
4
+ module ActiveWarehouse #:nodoc:
5
+ # This module contains classes which handle aggregation of cube data using
6
+ # various algorithms
7
+ module Aggregate
8
+ # Base class for aggregate implementations
9
+ class Aggregate
10
+
11
+ # Reader for the cube class
12
+ attr_reader :cube_class
13
+
14
+ # Initialize the aggregate for the given cube class
15
+ def initialize(cube_class)
16
+ @cube_class = cube_class
17
+ end
18
+
19
+ protected
20
+ # Get the connection to use for SQL execution
21
+ def connection
22
+ cube_class.connection
23
+ end
24
+
25
+ # Convenience accessor to get the cube's fact class. Delegates to the
26
+ # cube class.
27
+ def fact_class
28
+ cube_class.fact_class
29
+ end
30
+
31
+ # Parse the query args and return an options hash.
32
+ def parse_query_args(*args)
33
+ options = {}
34
+ if args.length == 1
35
+ options = args[0]
36
+ elsif args.length >= 4
37
+ options[:column_dimension_name] = args[0]
38
+ options[:column_hierarchy_name] = args[1]
39
+ options[:row_dimension_name] = args[2]
40
+ options[:row_hierarchy_name] = args[3]
41
+ options[:conditions] = args[4] if args.length >= 5
42
+ options[:cstage] = args[5] if args.length >= 6
43
+ options[:rstage] = args[6] if args.length >= 7
44
+ options[:filters] = args[7] if args.length >= 8
45
+ options.merge!(args[8]) if args.length >= 9
46
+ else
47
+ raise ArgumentError, "The query method accepts either 1 Hash (new style) or 4 to 8 arguments (old style)"
221
48
  end
222
- connection.add_index table_name, :cube_name
49
+ options
223
50
  end
224
51
  end
225
- def key
226
- Aggregate.key(dimension1, dimension1_hierarchy, dimension2, dimension2_hierarchy)
227
- end
228
- end
229
-
230
- # Key for aggregate caching
231
- class AggregateKey
232
- attr_reader :dimension1, :dimension1_hierarchy, :dimension2, :dimension2_hierarchy
233
-
234
- def initialize(dimension1, dimension1_hierarchy, dimension2, dimension2_hierarchy)
235
- @dimension1 = dimension1
236
- @dimension1_hierarchy = dimension1_hierarchy
237
- @dimension2 = dimension2
238
- @dimension2_hierarchy = dimension2_hierarchy
239
- end
240
-
241
- def ==(o)
242
- o.instance_of?(self.class) and (o.to_s == to_s or o.to_s = to_rs)
243
- end
244
-
245
- def hash
246
- to_s.hash
247
- end
248
-
249
- def to_s
250
- "#{@dimension1}.#{@dimension1_hierarchy}.#{@dimension2}.#{@dimension2_hierarchy}"
251
- end
252
-
253
- # Return the "reveresed" version of this key String representation
254
- def to_rs
255
- "#{@dimension2}.#{@dimension2_hierarchy}.#{@dimension1}.#{@dimension1_hierarchy}"
256
- end
257
52
  end
258
53
  end
259
54
 
260
- ActiveWarehouse::AggregateMetaData.build_table
55
+ require 'active_warehouse/aggregate/no_aggregate'
56
+ require 'active_warehouse/aggregate/rolap_common'
57
+ require 'active_warehouse/aggregate/pipelined_rolap_aggregate'
58
+ require 'active_warehouse/aggregate/rolap_aggregate'
59
+ require 'active_warehouse/aggregate/dwarf_common'
60
+ require 'active_warehouse/aggregate/dwarf_aggregate'
61
+ require 'active_warehouse/aggregate/pid_aggregate'