activewarehouse 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +27 -14
- data/Rakefile +16 -5
- data/doc/references.txt +4 -0
- data/generators/bridge/templates/migration.rb +9 -2
- data/generators/bridge/templates/unit_test.rb +8 -0
- data/generators/date_dimension/USAGE +1 -0
- data/generators/date_dimension/date_dimension_generator.rb +16 -0
- data/generators/date_dimension/templates/fixture.yml +5 -0
- data/generators/date_dimension/templates/migration.rb +31 -0
- data/generators/date_dimension/templates/model.rb +3 -0
- data/generators/date_dimension/templates/unit_test.rb +8 -0
- data/generators/dimension/templates/migration.rb +1 -10
- data/generators/dimension_view/dimension_view_generator.rb +2 -2
- data/generators/dimension_view/templates/migration.rb +8 -2
- data/generators/fact/templates/migration.rb +2 -0
- data/generators/time_dimension/USAGE +1 -0
- data/generators/time_dimension/templates/fixture.yml +5 -0
- data/generators/time_dimension/templates/migration.rb +12 -0
- data/generators/time_dimension/templates/model.rb +3 -0
- data/generators/time_dimension/templates/unit_test.rb +8 -0
- data/generators/time_dimension/time_dimension_generator.rb +14 -0
- data/lib/active_warehouse.rb +13 -2
- data/lib/active_warehouse/aggregate.rb +54 -253
- data/lib/active_warehouse/aggregate/dwarf/node.rb +36 -0
- data/lib/active_warehouse/aggregate/dwarf_aggregate.rb +369 -0
- data/lib/active_warehouse/aggregate/dwarf_common.rb +44 -0
- data/lib/active_warehouse/aggregate/dwarf_printer.rb +34 -0
- data/lib/active_warehouse/aggregate/no_aggregate.rb +194 -0
- data/lib/active_warehouse/aggregate/pid_aggregate.rb +29 -0
- data/lib/active_warehouse/aggregate/pipelined_rolap_aggregate.rb +129 -0
- data/lib/active_warehouse/aggregate/rolap_aggregate.rb +181 -0
- data/lib/active_warehouse/aggregate/rolap_common.rb +89 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_1.sql +12 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_10.sql +7166 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_11.sql +14334 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_12.sql +28670 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_13.sql +57342 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_2.sql +26 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_3.sql +54 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_4.sql +110 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_5.sql +222 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_6.sql +446 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_7.sql +894 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_8.sql +1790 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_9.sql +3582 -0
- data/lib/active_warehouse/aggregate_field.rb +49 -0
- data/lib/active_warehouse/{dimension/bridge.rb → bridge.rb} +7 -3
- data/lib/active_warehouse/bridge/hierarchy_bridge.rb +46 -0
- data/lib/active_warehouse/builder.rb +2 -1
- data/lib/active_warehouse/builder/date_dimension_builder.rb +5 -2
- data/lib/active_warehouse/builder/generator/generator.rb +13 -0
- data/lib/active_warehouse/builder/generator/name_generator.rb +20 -0
- data/lib/active_warehouse/builder/generator/paragraph_generator.rb +11 -0
- data/lib/active_warehouse/builder/random_data_builder.rb +21 -11
- data/lib/active_warehouse/builder/test_data_builder.rb +54 -0
- data/lib/active_warehouse/calculated_field.rb +27 -0
- data/lib/active_warehouse/compat/compat.rb +4 -4
- data/lib/active_warehouse/cube.rb +126 -225
- data/lib/active_warehouse/cube_query_result.rb +69 -0
- data/lib/active_warehouse/dimension.rb +64 -29
- data/lib/active_warehouse/dimension/date_dimension.rb +15 -0
- data/lib/active_warehouse/dimension/dimension_reflection.rb +21 -0
- data/lib/active_warehouse/dimension/dimension_view.rb +17 -2
- data/lib/active_warehouse/dimension/hierarchical_dimension.rb +43 -5
- data/lib/active_warehouse/dimension/slowly_changing_dimension.rb +22 -12
- data/lib/active_warehouse/fact.rb +119 -40
- data/lib/active_warehouse/field.rb +74 -0
- data/lib/active_warehouse/ordered_hash.rb +34 -0
- data/lib/active_warehouse/prejoin_fact.rb +97 -0
- data/lib/active_warehouse/report/abstract_report.rb +40 -14
- data/lib/active_warehouse/report/chart_report.rb +3 -3
- data/lib/active_warehouse/report/table_report.rb +8 -3
- data/lib/active_warehouse/version.rb +1 -1
- data/lib/active_warehouse/view/report_helper.rb +144 -34
- data/tasks/active_warehouse_tasks.rake +28 -10
- metadata +107 -30
data/README
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
== ActiveWarehouse
|
2
2
|
|
3
|
-
The ActiveWarehouse library provides classes and functions which help with
|
3
|
+
The ActiveWarehouse library provides classes and functions which help with
|
4
|
+
building Data Warehouses using Rails. It can be installed either as a plugin
|
5
|
+
or as a Gem.
|
4
6
|
|
5
7
|
To install as a plugin just use:
|
6
8
|
|
@@ -11,19 +13,19 @@ To get the latest edge version.
|
|
11
13
|
To install as a Gem, use:
|
12
14
|
|
13
15
|
gem install activewarehouse
|
14
|
-
|
15
|
-
At this point you can use ActiveWarehouse in any application using:
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
17
|
+
On *nix you will need to run this command as root or better yet, using sudo.
|
18
|
+
|
19
|
+
Next, you will need to freeze or link the Gem to your Rails app. I prefer using
|
20
|
+
the gemsonrails project:
|
21
21
|
|
22
22
|
gem install gemsonrails
|
23
23
|
|
24
24
|
And then in your Rails app:
|
25
25
|
|
26
26
|
rake gems:link GEM=activewarehouse
|
27
|
+
|
28
|
+
It is possible that freezing the Gem to the Rails app may not work at all times. It is most often best to install as a plugin.
|
27
29
|
|
28
30
|
== Generators
|
29
31
|
|
@@ -52,12 +54,14 @@ ActiveWarehouse comes with several generators
|
|
52
54
|
script/generate dimension_view OrderDate Date
|
53
55
|
script/generate dimension_view order_date date
|
54
56
|
|
55
|
-
Creates an OrderDateDimension class which is represented by a view on top
|
57
|
+
Creates an OrderDateDimension class which is represented by a view on top
|
58
|
+
of the DateDimension.
|
56
59
|
|
57
60
|
The rules for naming are as follows:
|
58
61
|
|
59
62
|
Facts:
|
60
|
-
Fact classes and tables follow the typical Rails rules: classes are singular
|
63
|
+
Fact classes and tables follow the typical Rails rules: classes are singular
|
64
|
+
and tables are pluralized.
|
61
65
|
Both the class and table name are suffixed by "_fact".
|
62
66
|
Dimensions:
|
63
67
|
Dimension classes and tables are both singular.
|
@@ -68,19 +72,28 @@ Bridge:
|
|
68
72
|
Bridge classes and tables are both singular.
|
69
73
|
Both the class name and the table name are suffixed by "_bridge".
|
70
74
|
Dimension View:
|
71
|
-
Dimension View classes are singular. The underlying data structure is a view
|
75
|
+
Dimension View classes are singular. The underlying data structure is a view
|
76
|
+
on top of an existing dimension.
|
72
77
|
Both the class name and the view name are suffixed by "_dimension"
|
73
78
|
|
74
79
|
== ETL
|
75
80
|
|
76
|
-
The ActiveWarehouse plugin does not directly handle Extract-Transform-Load
|
81
|
+
The ActiveWarehouse plugin does not directly handle Extract-Transform-Load
|
82
|
+
processes, however the ActiveWarehouse ETL gem (installed separately) can help.
|
83
|
+
To install it use:
|
77
84
|
|
78
85
|
gem install activewarehouse-etl
|
86
|
+
|
87
|
+
Once again you should run this command as root or using sudo.
|
79
88
|
|
80
|
-
More information on the ETL process can be found at
|
89
|
+
More information on the ETL process can be found at
|
90
|
+
http://activewarehouse.rubyforge.org/etl
|
81
91
|
|
82
92
|
== Tutorial
|
83
93
|
|
84
|
-
A tutorial for ActiveWarehouse is available online at
|
94
|
+
A tutorial for ActiveWarehouse is available online at
|
95
|
+
http://anthonyeden.com/2006/12/20/activewarehouse-example-with-rails-svn-logs
|
96
|
+
(Note that is is out of date.)
|
85
97
|
|
86
|
-
You can also get a demo from the ActiveWarehouse subversion repository. Look in
|
98
|
+
You can also get a demo from the ActiveWarehouse subversion repository. Look in
|
99
|
+
the SVN_ROOT/demo directory.
|
data/Rakefile
CHANGED
@@ -68,11 +68,13 @@ spec = Gem::Specification.new do |s|
|
|
68
68
|
ActiveWarehouse extends Rails to provide functionality specific for building data warehouses.
|
69
69
|
EOF
|
70
70
|
|
71
|
-
s.add_dependency('rake',
|
72
|
-
s.add_dependency('
|
73
|
-
s.add_dependency('
|
74
|
-
s.add_dependency('
|
75
|
-
s.add_dependency('
|
71
|
+
s.add_dependency('rake', '>= 0.7.1')
|
72
|
+
s.add_dependency('fastercsv', '>= 1.1.0')
|
73
|
+
s.add_dependency('activesupport', '>= 1.3.1')
|
74
|
+
s.add_dependency('activerecord', '>= 1.14.4')
|
75
|
+
s.add_dependency('actionpack', '>= 1.12.5')
|
76
|
+
s.add_dependency('rails_sql_views', '>= 0.1.0')
|
77
|
+
s.add_dependency('adapter_extensions', '>= 0.1.0')
|
76
78
|
|
77
79
|
s.rdoc_options << '--exclude' << '.'
|
78
80
|
s.has_rdoc = false
|
@@ -135,4 +137,13 @@ end
|
|
135
137
|
desc "Publish the API documentation"
|
136
138
|
task :pdoc => [:rdoc] do
|
137
139
|
Rake::SshDirPublisher.new("aeden@rubyforge.org", "/var/www/gforge-projects/activewarehouse/rdoc", "rdoc").upload
|
140
|
+
end
|
141
|
+
|
142
|
+
desc "Reinstall the gem from a local package copy"
|
143
|
+
task :reinstall => [:package] do
|
144
|
+
windows = RUBY_PLATFORM =~ /mswin/
|
145
|
+
sudo = windows ? '' : 'sudo'
|
146
|
+
gem = windows ? 'gem.bat' : 'gem'
|
147
|
+
`#{sudo} #{gem} uninstall -x -i #{PKG_NAME}`
|
148
|
+
`#{sudo} #{gem} install pkg/#{PKG_NAME}-#{PKG_VERSION}`
|
138
149
|
end
|
data/doc/references.txt
ADDED
@@ -0,0 +1,4 @@
|
|
1
|
+
The following papers are relevant to building, storing and querying data cubes with large databases.
|
2
|
+
|
3
|
+
http://research.microsoft.com/research/pubs/view.aspx?msr_tr_id=MSR-TR-95-22
|
4
|
+
http://dbpubs.stanford.edu/pub/showDoc.Fulltext?lang=en&doc=1995-34&format=pdf&compression=&name=1995-34.pdf
|
@@ -1,8 +1,14 @@
|
|
1
1
|
class <%= migration_name %> < ActiveRecord::Migration
|
2
2
|
def self.up
|
3
3
|
fields = {
|
4
|
-
#
|
5
|
-
#
|
4
|
+
# the following are the required bridge table columns for
|
5
|
+
# variable depth hierarchies. Do not change them unless you know
|
6
|
+
# what you are doing.
|
7
|
+
:parent_id => :integer,
|
8
|
+
:child_id => :integer,
|
9
|
+
:num_levels_from_parent => :integer,
|
10
|
+
:is_bottom => :boolean,
|
11
|
+
:is_top => :boolean
|
6
12
|
}
|
7
13
|
create_table :<%= table_name %> do |t|
|
8
14
|
fields.each do |name,type|
|
@@ -12,6 +18,7 @@ class <%= migration_name %> < ActiveRecord::Migration
|
|
12
18
|
fields.each do |name,type|
|
13
19
|
add_index :<%= table_name %>, name unless type == :text
|
14
20
|
end
|
21
|
+
add_index :<%= table_name %>, [:parent_id, :child_id, :num_levels_from_parent], :unique => true
|
15
22
|
end
|
16
23
|
|
17
24
|
def self.down
|
@@ -0,0 +1 @@
|
|
1
|
+
./script/generate date_dimension
|
@@ -0,0 +1,16 @@
|
|
1
|
+
class DateDimensionGenerator < DimensionGenerator
|
2
|
+
attr_accessor :file_name
|
3
|
+
attr_accessor :include_fiscal_year
|
4
|
+
|
5
|
+
default_options :skip_migration => false
|
6
|
+
|
7
|
+
def initialize(runtime_args, runtime_options = {})
|
8
|
+
super
|
9
|
+
|
10
|
+
@name = 'date'
|
11
|
+
@table_name = "#{@name}_dimension"
|
12
|
+
@class_name = "#{@name.camelize}Dimension"
|
13
|
+
@file_name = "#{@class_name.tableize.singularize}"
|
14
|
+
@include_fiscal_year = true # TODO: accept a runtime option to set this
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
class <%= migration_name %> < ActiveRecord::Migration
|
2
|
+
def self.up
|
3
|
+
create_table :<%= table_name %> do |t|
|
4
|
+
t.column :sql_date, :date, :null => false # SQL Date object
|
5
|
+
t.column :calendar_year, :string, :null => false # 2005, 2006, 2007, etc.
|
6
|
+
t.column :calendar_quarter, :string, :null => false, :limit => 2 # Q1, Q2, Q3 or Q4
|
7
|
+
t.column :calendar_quarter_number, :integer, :null => false # 1, 2, 3 or 4
|
8
|
+
t.column :calendar_month_name, :string, :null => false, :limit => 9 # January, February, etc.
|
9
|
+
t.column :calendar_month_number, :integer, :null => false # 1, 2, 3, ... 12
|
10
|
+
t.column :calendar_week, :string, :null => false, :limit => 2 # 1, 2, 3, ... 52
|
11
|
+
t.column :calendar_week_number, :integer, :null => false # 1, 2, 3, ... 52
|
12
|
+
t.column :day_number_in_calendar_year, :integer, :null => false # 1, 2, 3, ... 365
|
13
|
+
t.column :day_number_in_calendar_month, :integer, :null => false # 1, 2, 3, ... 31
|
14
|
+
t.column :day_in_week, :string, :null => false, :limit => 9 # Monday, Tuesday, etc.
|
15
|
+
<% if include_fiscal_year -%>
|
16
|
+
t.column :fiscal_year, :string, :null => false
|
17
|
+
t.column :fiscal_quarter, :string, :null => false, :limit => 2
|
18
|
+
t.column :fiscal_quarter_number, :integer, :null => false
|
19
|
+
t.column :fiscal_month_number, :integer, :null => false
|
20
|
+
t.column :fiscal_week, :string, :null => false, :limit => 2
|
21
|
+
t.column :fiscal_week_number, :integer, :null => false
|
22
|
+
t.column :day_number_in_fiscal_year, :integer, :null => false
|
23
|
+
<% end -%>
|
24
|
+
end
|
25
|
+
# add indexes as required
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.down
|
29
|
+
drop_table :<%= table_name %>
|
30
|
+
end
|
31
|
+
end
|
@@ -1,16 +1,7 @@
|
|
1
1
|
class <%= migration_name %> < ActiveRecord::Migration
|
2
2
|
def self.up
|
3
|
-
fields = {
|
4
|
-
# Add dimension attributes here as name => type
|
5
|
-
# Example: :store_name => :string
|
6
|
-
}
|
7
3
|
create_table :<%= table_name %> do |t|
|
8
|
-
|
9
|
-
t.column name, type
|
10
|
-
end
|
11
|
-
end
|
12
|
-
fields.each do |name,type|
|
13
|
-
add_index :<%= table_name %>, name unless type == :text
|
4
|
+
|
14
5
|
end
|
15
6
|
end
|
16
7
|
|
@@ -21,7 +21,7 @@ class DimensionViewGenerator < Rails::Generator::NamedBase
|
|
21
21
|
# define the query target class and expose its columns as attributes for the view
|
22
22
|
@query_target_class = @query_target_class_name.constantize
|
23
23
|
@view_attributes = @query_target_class.column_names
|
24
|
-
@view_query = "select
|
24
|
+
@view_query = "select #{@view_attributes.join(',')} from #{query_target_table_name}"
|
25
25
|
end
|
26
26
|
|
27
27
|
def manifest
|
@@ -37,7 +37,7 @@ class DimensionViewGenerator < Rails::Generator::NamedBase
|
|
37
37
|
# Generate the files
|
38
38
|
m.template 'model.rb', File.join('app/models', class_path, "#{file_name}.rb")
|
39
39
|
m.template 'unit_test.rb', File.join('test/unit', class_path, "#{file_name}_test.rb")
|
40
|
-
m.template 'fixture.yml', File.join('test/fixtures', class_path, "#{table_name}.yml")
|
40
|
+
#m.template 'fixture.yml', File.join('test/fixtures', class_path, "#{table_name}.yml")
|
41
41
|
|
42
42
|
# Generate the migration unless :skip_migration option is specified
|
43
43
|
unless options[:skip_migration]
|
@@ -1,7 +1,13 @@
|
|
1
1
|
class <%= migration_name %> < ActiveRecord::Migration
|
2
2
|
def self.up
|
3
|
-
create_view "<%= view_name %>", <%= view_query do |t|
|
4
|
-
|
3
|
+
create_view "<%= view_name %>", "<%= view_query %>" do |t|
|
4
|
+
<%- view_attributes.each do |view_attribute| -%>
|
5
|
+
<%- if view_attribute == 'id' -%>
|
6
|
+
t.column :id
|
7
|
+
<%- else -%>
|
8
|
+
t.column :<%= name %>_<%= view_attribute %>
|
9
|
+
<%- end -%>
|
10
|
+
<%- end -%>
|
5
11
|
end
|
6
12
|
end
|
7
13
|
|
@@ -0,0 +1 @@
|
|
1
|
+
./script/generate time_dimension
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class <%= migration_name %> < ActiveRecord::Migration
|
2
|
+
def self.up
|
3
|
+
create_table :<%= table_name %> do |t|
|
4
|
+
t.column :hour_of_day, :integer, :null => false
|
5
|
+
t.column :minute_of_hour, :integer, :null => false
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.down
|
10
|
+
drop_table :<%= table_name %>
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
class TimeDimensionGenerator < DimensionGenerator
|
2
|
+
attr_accessor :file_name
|
3
|
+
|
4
|
+
default_options :skip_migration => false
|
5
|
+
|
6
|
+
def initialize(runtime_args, runtime_options = {})
|
7
|
+
super
|
8
|
+
|
9
|
+
@name = 'date'
|
10
|
+
@table_name = "#{@name}_dimension"
|
11
|
+
@class_name = "#{@name.camelize}Dimension"
|
12
|
+
@file_name = "#{@class_name.tableize.singularize}"
|
13
|
+
end
|
14
|
+
end
|
data/lib/active_warehouse.rb
CHANGED
@@ -35,7 +35,7 @@ end
|
|
35
35
|
|
36
36
|
unless defined?(ActiveSupport)
|
37
37
|
begin
|
38
|
-
$:.unshift(File.dirname(__FILE__) + "/../../activesupport/lib")
|
38
|
+
$:.unshift(File.dirname(__FILE__) + "/../../activesupport/lib")
|
39
39
|
require 'active_support'
|
40
40
|
rescue LoadError
|
41
41
|
gem 'activesupport'
|
@@ -62,18 +62,29 @@ unless defined?(ActionView)
|
|
62
62
|
end
|
63
63
|
end
|
64
64
|
|
65
|
+
require 'fastercsv'
|
66
|
+
require 'fileutils'
|
67
|
+
require 'adapter_extensions'
|
68
|
+
|
65
69
|
# Require 1.1.6 compatibility code if necessary
|
66
70
|
require 'active_record/version'
|
67
71
|
if ActiveRecord::VERSION::MAJOR < 1 || ActiveRecord::VERSION::MINOR < 15
|
68
72
|
require 'active_warehouse/compat/compat'
|
69
73
|
end
|
70
74
|
|
75
|
+
require 'active_warehouse/ordered_hash'
|
76
|
+
require 'active_warehouse/field'
|
77
|
+
require 'active_warehouse/aggregate_field'
|
78
|
+
require 'active_warehouse/calculated_field'
|
71
79
|
require 'active_warehouse/version'
|
72
80
|
require 'active_warehouse/core_ext'
|
73
|
-
require 'active_warehouse/
|
81
|
+
require 'active_warehouse/prejoin_fact'
|
74
82
|
require 'active_warehouse/fact'
|
83
|
+
require 'active_warehouse/bridge'
|
75
84
|
require 'active_warehouse/dimension'
|
76
85
|
require 'active_warehouse/cube'
|
86
|
+
require 'active_warehouse/cube_query_result'
|
87
|
+
require 'active_warehouse/aggregate'
|
77
88
|
require 'active_warehouse/report'
|
78
89
|
require 'active_warehouse/view'
|
79
90
|
require 'active_warehouse/builder'
|
@@ -1,260 +1,61 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
# down each of the dimension hierarchies
|
4
|
-
class Aggregate < ActiveRecord::Base
|
5
|
-
class << self
|
6
|
-
attr_accessor :name, :cube, :dimension1, :dimension2, :dimension1_hierarchy_name, :dimension2_hierarchy_name
|
7
|
-
|
8
|
-
# Get the table name for the aggregate
|
9
|
-
def table_name
|
10
|
-
name = self.name.demodulize.underscore
|
11
|
-
set_table_name(name)
|
12
|
-
name
|
13
|
-
end
|
14
|
-
|
15
|
-
# Returns the aggregate ID
|
16
|
-
def aggregate_id
|
17
|
-
table_name =~ /(\d+)$/
|
18
|
-
$1.to_i
|
19
|
-
end
|
20
|
-
|
21
|
-
# Returns the AggregateMetaData instance associated with this aggregate
|
22
|
-
def meta_data
|
23
|
-
AggregateMetaData.find(aggregate_id)
|
24
|
-
end
|
25
|
-
|
26
|
-
# Return true if the aggregate needs to be rebuilt
|
27
|
-
def needs_rebuild?(last_build=nil)
|
28
|
-
return true if meta_data.populated_at.nil?
|
29
|
-
return true if last_build && (meta_data.populated_at < last_build)
|
30
|
-
return false
|
31
|
-
end
|
32
|
-
|
33
|
-
# Return a key for the aggregate
|
34
|
-
def key(dimension1, dimension1_hierarchy, dimension2, dimension2_hierarchy)
|
35
|
-
AggregateKey.new(dimension1, dimension1_hierarchy, dimension2, dimension2_hierarchy)
|
36
|
-
end
|
37
|
-
|
38
|
-
# Create the aggregate table if required. Set force option to true to force creation of the table
|
39
|
-
# if it already exists
|
40
|
-
def create_storage_table(force=false)
|
41
|
-
connection.drop_table(table_name) if force and table_exists?
|
42
|
-
if !table_exists?
|
43
|
-
connection.create_table(table_name, :id => false) do |t|
|
44
|
-
t.column :dimension1_path, :string
|
45
|
-
t.column :dimension1_stage, :integer
|
46
|
-
t.column :dimension2_path, :string
|
47
|
-
t.column :dimension2_stage, :integer
|
48
|
-
cube.fact_class.aggregate_fields.each do |field|
|
49
|
-
#options = cube.fact_class.aggregate_field_options[field]
|
50
|
-
col = cube.fact_class.columns_hash[field.to_s]
|
51
|
-
t.column field, col.type if col
|
52
|
-
end
|
53
|
-
end
|
54
|
-
connection.add_index(table_name, :dimension1_path)
|
55
|
-
connection.add_index(table_name, :dimension1_stage)
|
56
|
-
connection.add_index(table_name, :dimension2_path)
|
57
|
-
connection.add_index(table_name, :dimension2_stage)
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
# Populate the aggregate table
|
62
|
-
def populate
|
63
|
-
# create the storage table if necessary
|
64
|
-
create_storage_table
|
65
|
-
|
66
|
-
#puts "Populating aggregate table #{table_name}"
|
67
|
-
# clear out the current data
|
68
|
-
#connection.execute("TRUNCATE TABLE #{table_name}") #TODO: make this generic to support all databases
|
69
|
-
delete_all
|
70
|
-
|
71
|
-
$first = false
|
72
|
-
|
73
|
-
# aggregate the data for the two dimensions
|
74
|
-
fact_class = cube.fact_class
|
75
|
-
dim1 = Dimension.class_name(dimension1).constantize
|
76
|
-
dim2 = Dimension.class_name(dimension2).constantize
|
77
|
-
dim1_stage_path = []
|
78
|
-
dim1.hierarchy(meta_data.dimension1_hierarchy.to_sym).each_with_index do |dim1_stage_name, dim1_stage_level|
|
79
|
-
dim1_stage_path << dim1_stage_name
|
80
|
-
dim2_stage_path = []
|
81
|
-
dim2.hierarchy(meta_data.dimension2_hierarchy.to_sym).each_with_index do |dim2_stage_name, dim2_stage_level|
|
82
|
-
dim2_stage_path << dim2_stage_name
|
83
|
-
|
84
|
-
stmt, fields = build_query(fact_class, dim1, dim1_stage_path, dim2, dim2_stage_path)
|
85
|
-
|
86
|
-
puts "\nSTMT: #{stmt}" if $first
|
87
|
-
|
88
|
-
# Get the facts and aggregate them
|
89
|
-
# TODO: replace with select_all
|
90
|
-
fact_class.connection.select_all(stmt).each do |row|
|
91
|
-
require 'pp'
|
92
|
-
pp row if $first
|
93
|
-
dim1_value = []
|
94
|
-
dim1_stage_path.each do |v|
|
95
|
-
dim1_value << row["#{v}"]
|
96
|
-
end
|
97
|
-
dim2_value = []
|
98
|
-
dim2_stage_path.each do |v|
|
99
|
-
dim2_value << row["#{v}"]
|
100
|
-
end
|
101
|
-
|
102
|
-
agg_instance = new
|
103
|
-
agg_instance.dimension1_path = dim1_value.join(':')
|
104
|
-
agg_instance.dimension1_stage = dim1_stage_level
|
105
|
-
agg_instance.dimension2_path = dim2_value.join(':')
|
106
|
-
agg_instance.dimension2_stage = dim2_stage_level
|
107
|
-
|
108
|
-
puts "DIM1_PATH: #{agg_instance.dimension1_path}" if $first
|
109
|
-
puts "DIM2_PATH: #{agg_instance.dimension2_path}" if $first
|
110
|
-
|
111
|
-
|
112
|
-
pp fields if $first
|
113
|
-
fields.each do |field|
|
114
|
-
# do the average here
|
115
|
-
puts "setting field #{field}, value is #{row[field.to_s]}" if $first
|
116
|
-
agg_instance.send("#{field}=".to_sym, row[field.to_s])
|
117
|
-
end
|
118
|
-
agg_instance.save!
|
119
|
-
|
120
|
-
meta_data.update_attribute(:populated_at, Time.now)
|
121
|
-
|
122
|
-
$first = false
|
123
|
-
end
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
|
-
# Build the aggregation query for the given dimensions and stage paths
|
129
|
-
def build_query(fact_class, dim1, dim1_stage_path, dim2, dim2_stage_path)
|
130
|
-
dim1_group = dim1_stage_path.collect { |p| "d1.#{p}"}.join(", ")
|
131
|
-
dim2_group = dim2_stage_path.collect { |p| "d2.#{p}"}.join(", ")
|
132
|
-
|
133
|
-
# Set up the find options
|
134
|
-
fact_find_options = {}
|
135
|
-
fact_find_options[:group] = "#{dim1_group}, #{dim2_group}"
|
136
|
-
fact_find_options[:joins] = "join #{dim1.table_name} d1 on f.#{dim1.foreign_key} = d1.id"
|
137
|
-
fact_find_options[:joins] << " join #{dim2.table_name} d2 on f.#{dim2.foreign_key} = d2.id"
|
138
|
-
|
139
|
-
# Build the 'select' part of the query
|
140
|
-
# denominator = nil
|
141
|
-
fields = []
|
142
|
-
fact_select = ["#{dim1_group}, #{dim2_group}"]
|
143
|
-
fact_class.aggregate_fields.each do |field_name|
|
144
|
-
options = fact_class.aggregate_field_options[field_name]
|
145
|
-
fields << field_name
|
146
|
-
|
147
|
-
options[:type] ||= :sum
|
148
|
-
case options[:type]
|
149
|
-
when :sum
|
150
|
-
fact_select << " sum(f.#{field_name}) as #{field_name}"
|
151
|
-
when Hash
|
152
|
-
if options[:type][dim1.sym] == :average && options[:type][dim2.sym] == :average
|
153
|
-
# I believe this is a special case, but I'm not sure how yet. If both dimensions are defined
|
154
|
-
# averages then perhaps that value cannot be calculated at all. TODO: research
|
155
|
-
else
|
156
|
-
fact_select << " sum(f.#{field_name}) as #{field_name}"
|
157
|
-
end
|
158
|
-
else
|
159
|
-
raise "Unsupported aggregate type: #{options[:type]}"
|
160
|
-
end
|
161
|
-
end
|
162
|
-
fact_find_options[:select] = fact_select.join(',')
|
1
|
+
# Source file which defines the ActiveWarehouse::Aggregate module and imports
|
2
|
+
# the aggregate implementations.
|
163
3
|
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
class AggregateMetaData < ActiveRecord::Base
|
209
|
-
# Build the underlying table. Set force to true to force the build of the table
|
210
|
-
def self.build_table(force=false)
|
211
|
-
connection.drop_table(table_name) if force and table_exists?
|
212
|
-
if !table_exists?
|
213
|
-
connection.create_table(table_name) do |t|
|
214
|
-
t.column :cube_name, :string
|
215
|
-
t.column :dimension1, :string
|
216
|
-
t.column :dimension1_hierarchy, :string
|
217
|
-
t.column :dimension2, :string
|
218
|
-
t.column :dimension2_hierarchy, :string
|
219
|
-
t.column :created_at, :datetime
|
220
|
-
t.column :populated_at, :datetime
|
4
|
+
module ActiveWarehouse #:nodoc:
|
5
|
+
# This module contains classes which handle aggregation of cube data using
|
6
|
+
# various algorithms
|
7
|
+
module Aggregate
|
8
|
+
# Base class for aggregate implementations
|
9
|
+
class Aggregate
|
10
|
+
|
11
|
+
# Reader for the cube class
|
12
|
+
attr_reader :cube_class
|
13
|
+
|
14
|
+
# Initialize the aggregate for the given cube class
|
15
|
+
def initialize(cube_class)
|
16
|
+
@cube_class = cube_class
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
# Get the connection to use for SQL execution
|
21
|
+
def connection
|
22
|
+
cube_class.connection
|
23
|
+
end
|
24
|
+
|
25
|
+
# Convenience accessor to get the cube's fact class. Delegates to the
|
26
|
+
# cube class.
|
27
|
+
def fact_class
|
28
|
+
cube_class.fact_class
|
29
|
+
end
|
30
|
+
|
31
|
+
# Parse the query args and return an options hash.
|
32
|
+
def parse_query_args(*args)
|
33
|
+
options = {}
|
34
|
+
if args.length == 1
|
35
|
+
options = args[0]
|
36
|
+
elsif args.length >= 4
|
37
|
+
options[:column_dimension_name] = args[0]
|
38
|
+
options[:column_hierarchy_name] = args[1]
|
39
|
+
options[:row_dimension_name] = args[2]
|
40
|
+
options[:row_hierarchy_name] = args[3]
|
41
|
+
options[:conditions] = args[4] if args.length >= 5
|
42
|
+
options[:cstage] = args[5] if args.length >= 6
|
43
|
+
options[:rstage] = args[6] if args.length >= 7
|
44
|
+
options[:filters] = args[7] if args.length >= 8
|
45
|
+
options.merge!(args[8]) if args.length >= 9
|
46
|
+
else
|
47
|
+
raise ArgumentError, "The query method accepts either 1 Hash (new style) or 4 to 8 arguments (old style)"
|
221
48
|
end
|
222
|
-
|
49
|
+
options
|
223
50
|
end
|
224
51
|
end
|
225
|
-
def key
|
226
|
-
Aggregate.key(dimension1, dimension1_hierarchy, dimension2, dimension2_hierarchy)
|
227
|
-
end
|
228
|
-
end
|
229
|
-
|
230
|
-
# Key for aggregate caching
|
231
|
-
class AggregateKey
|
232
|
-
attr_reader :dimension1, :dimension1_hierarchy, :dimension2, :dimension2_hierarchy
|
233
|
-
|
234
|
-
def initialize(dimension1, dimension1_hierarchy, dimension2, dimension2_hierarchy)
|
235
|
-
@dimension1 = dimension1
|
236
|
-
@dimension1_hierarchy = dimension1_hierarchy
|
237
|
-
@dimension2 = dimension2
|
238
|
-
@dimension2_hierarchy = dimension2_hierarchy
|
239
|
-
end
|
240
|
-
|
241
|
-
def ==(o)
|
242
|
-
o.instance_of?(self.class) and (o.to_s == to_s or o.to_s = to_rs)
|
243
|
-
end
|
244
|
-
|
245
|
-
def hash
|
246
|
-
to_s.hash
|
247
|
-
end
|
248
|
-
|
249
|
-
def to_s
|
250
|
-
"#{@dimension1}.#{@dimension1_hierarchy}.#{@dimension2}.#{@dimension2_hierarchy}"
|
251
|
-
end
|
252
|
-
|
253
|
-
# Return the "reveresed" version of this key String representation
|
254
|
-
def to_rs
|
255
|
-
"#{@dimension2}.#{@dimension2_hierarchy}.#{@dimension1}.#{@dimension1_hierarchy}"
|
256
|
-
end
|
257
52
|
end
|
258
53
|
end
|
259
54
|
|
260
|
-
|
55
|
+
require 'active_warehouse/aggregate/no_aggregate'
|
56
|
+
require 'active_warehouse/aggregate/rolap_common'
|
57
|
+
require 'active_warehouse/aggregate/pipelined_rolap_aggregate'
|
58
|
+
require 'active_warehouse/aggregate/rolap_aggregate'
|
59
|
+
require 'active_warehouse/aggregate/dwarf_common'
|
60
|
+
require 'active_warehouse/aggregate/dwarf_aggregate'
|
61
|
+
require 'active_warehouse/aggregate/pid_aggregate'
|