activewarehouse 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +27 -14
- data/Rakefile +16 -5
- data/doc/references.txt +4 -0
- data/generators/bridge/templates/migration.rb +9 -2
- data/generators/bridge/templates/unit_test.rb +8 -0
- data/generators/date_dimension/USAGE +1 -0
- data/generators/date_dimension/date_dimension_generator.rb +16 -0
- data/generators/date_dimension/templates/fixture.yml +5 -0
- data/generators/date_dimension/templates/migration.rb +31 -0
- data/generators/date_dimension/templates/model.rb +3 -0
- data/generators/date_dimension/templates/unit_test.rb +8 -0
- data/generators/dimension/templates/migration.rb +1 -10
- data/generators/dimension_view/dimension_view_generator.rb +2 -2
- data/generators/dimension_view/templates/migration.rb +8 -2
- data/generators/fact/templates/migration.rb +2 -0
- data/generators/time_dimension/USAGE +1 -0
- data/generators/time_dimension/templates/fixture.yml +5 -0
- data/generators/time_dimension/templates/migration.rb +12 -0
- data/generators/time_dimension/templates/model.rb +3 -0
- data/generators/time_dimension/templates/unit_test.rb +8 -0
- data/generators/time_dimension/time_dimension_generator.rb +14 -0
- data/lib/active_warehouse.rb +13 -2
- data/lib/active_warehouse/aggregate.rb +54 -253
- data/lib/active_warehouse/aggregate/dwarf/node.rb +36 -0
- data/lib/active_warehouse/aggregate/dwarf_aggregate.rb +369 -0
- data/lib/active_warehouse/aggregate/dwarf_common.rb +44 -0
- data/lib/active_warehouse/aggregate/dwarf_printer.rb +34 -0
- data/lib/active_warehouse/aggregate/no_aggregate.rb +194 -0
- data/lib/active_warehouse/aggregate/pid_aggregate.rb +29 -0
- data/lib/active_warehouse/aggregate/pipelined_rolap_aggregate.rb +129 -0
- data/lib/active_warehouse/aggregate/rolap_aggregate.rb +181 -0
- data/lib/active_warehouse/aggregate/rolap_common.rb +89 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_1.sql +12 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_10.sql +7166 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_11.sql +14334 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_12.sql +28670 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_13.sql +57342 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_2.sql +26 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_3.sql +54 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_4.sql +110 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_5.sql +222 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_6.sql +446 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_7.sql +894 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_8.sql +1790 -0
- data/lib/active_warehouse/aggregate/templates/pipelined_rollup_9.sql +3582 -0
- data/lib/active_warehouse/aggregate_field.rb +49 -0
- data/lib/active_warehouse/{dimension/bridge.rb → bridge.rb} +7 -3
- data/lib/active_warehouse/bridge/hierarchy_bridge.rb +46 -0
- data/lib/active_warehouse/builder.rb +2 -1
- data/lib/active_warehouse/builder/date_dimension_builder.rb +5 -2
- data/lib/active_warehouse/builder/generator/generator.rb +13 -0
- data/lib/active_warehouse/builder/generator/name_generator.rb +20 -0
- data/lib/active_warehouse/builder/generator/paragraph_generator.rb +11 -0
- data/lib/active_warehouse/builder/random_data_builder.rb +21 -11
- data/lib/active_warehouse/builder/test_data_builder.rb +54 -0
- data/lib/active_warehouse/calculated_field.rb +27 -0
- data/lib/active_warehouse/compat/compat.rb +4 -4
- data/lib/active_warehouse/cube.rb +126 -225
- data/lib/active_warehouse/cube_query_result.rb +69 -0
- data/lib/active_warehouse/dimension.rb +64 -29
- data/lib/active_warehouse/dimension/date_dimension.rb +15 -0
- data/lib/active_warehouse/dimension/dimension_reflection.rb +21 -0
- data/lib/active_warehouse/dimension/dimension_view.rb +17 -2
- data/lib/active_warehouse/dimension/hierarchical_dimension.rb +43 -5
- data/lib/active_warehouse/dimension/slowly_changing_dimension.rb +22 -12
- data/lib/active_warehouse/fact.rb +119 -40
- data/lib/active_warehouse/field.rb +74 -0
- data/lib/active_warehouse/ordered_hash.rb +34 -0
- data/lib/active_warehouse/prejoin_fact.rb +97 -0
- data/lib/active_warehouse/report/abstract_report.rb +40 -14
- data/lib/active_warehouse/report/chart_report.rb +3 -3
- data/lib/active_warehouse/report/table_report.rb +8 -3
- data/lib/active_warehouse/version.rb +1 -1
- data/lib/active_warehouse/view/report_helper.rb +144 -34
- data/tasks/active_warehouse_tasks.rake +28 -10
- metadata +107 -30
data/README
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
== ActiveWarehouse
|
2
2
|
|
3
|
-
The ActiveWarehouse library provides classes and functions which help with
|
3
|
+
The ActiveWarehouse library provides classes and functions which help with
|
4
|
+
building Data Warehouses using Rails. It can be installed either as a plugin
|
5
|
+
or as a Gem.
|
4
6
|
|
5
7
|
To install as a plugin just use:
|
6
8
|
|
@@ -11,19 +13,19 @@ To get the latest edge version.
|
|
11
13
|
To install as a Gem, use:
|
12
14
|
|
13
15
|
gem install activewarehouse
|
14
|
-
|
15
|
-
At this point you can use ActiveWarehouse in any application using:
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
17
|
+
On *nix you will need to run this command as root or better yet, using sudo.
|
18
|
+
|
19
|
+
Next, you will need to freeze or link the Gem to your Rails app. I prefer using
|
20
|
+
the gemsonrails project:
|
21
21
|
|
22
22
|
gem install gemsonrails
|
23
23
|
|
24
24
|
And then in your Rails app:
|
25
25
|
|
26
26
|
rake gems:link GEM=activewarehouse
|
27
|
+
|
28
|
+
It is possible that freezing the Gem to the Rails app may not work at all times. It is most often best to install as a plugin.
|
27
29
|
|
28
30
|
== Generators
|
29
31
|
|
@@ -52,12 +54,14 @@ ActiveWarehouse comes with several generators
|
|
52
54
|
script/generate dimension_view OrderDate Date
|
53
55
|
script/generate dimension_view order_date date
|
54
56
|
|
55
|
-
Creates an OrderDateDimension class which is represented by a view on top
|
57
|
+
Creates an OrderDateDimension class which is represented by a view on top
|
58
|
+
of the DateDimension.
|
56
59
|
|
57
60
|
The rules for naming are as follows:
|
58
61
|
|
59
62
|
Facts:
|
60
|
-
Fact classes and tables follow the typical Rails rules: classes are singular
|
63
|
+
Fact classes and tables follow the typical Rails rules: classes are singular
|
64
|
+
and tables are pluralized.
|
61
65
|
Both the class and table name are suffixed by "_fact".
|
62
66
|
Dimensions:
|
63
67
|
Dimension classes and tables are both singular.
|
@@ -68,19 +72,28 @@ Bridge:
|
|
68
72
|
Bridge classes and tables are both singular.
|
69
73
|
Both the class name and the table name are suffixed by "_bridge".
|
70
74
|
Dimension View:
|
71
|
-
Dimension View classes are singular. The underlying data structure is a view
|
75
|
+
Dimension View classes are singular. The underlying data structure is a view
|
76
|
+
on top of an existing dimension.
|
72
77
|
Both the class name and the view name are suffixed by "_dimension"
|
73
78
|
|
74
79
|
== ETL
|
75
80
|
|
76
|
-
The ActiveWarehouse plugin does not directly handle Extract-Transform-Load
|
81
|
+
The ActiveWarehouse plugin does not directly handle Extract-Transform-Load
|
82
|
+
processes, however the ActiveWarehouse ETL gem (installed separately) can help.
|
83
|
+
To install it use:
|
77
84
|
|
78
85
|
gem install activewarehouse-etl
|
86
|
+
|
87
|
+
Once again you should run this command as root or using sudo.
|
79
88
|
|
80
|
-
More information on the ETL process can be found at
|
89
|
+
More information on the ETL process can be found at
|
90
|
+
http://activewarehouse.rubyforge.org/etl
|
81
91
|
|
82
92
|
== Tutorial
|
83
93
|
|
84
|
-
A tutorial for ActiveWarehouse is available online at
|
94
|
+
A tutorial for ActiveWarehouse is available online at
|
95
|
+
http://anthonyeden.com/2006/12/20/activewarehouse-example-with-rails-svn-logs
|
96
|
+
(Note that is is out of date.)
|
85
97
|
|
86
|
-
You can also get a demo from the ActiveWarehouse subversion repository. Look in
|
98
|
+
You can also get a demo from the ActiveWarehouse subversion repository. Look in
|
99
|
+
the SVN_ROOT/demo directory.
|
data/Rakefile
CHANGED
@@ -68,11 +68,13 @@ spec = Gem::Specification.new do |s|
|
|
68
68
|
ActiveWarehouse extends Rails to provide functionality specific for building data warehouses.
|
69
69
|
EOF
|
70
70
|
|
71
|
-
s.add_dependency('rake',
|
72
|
-
s.add_dependency('
|
73
|
-
s.add_dependency('
|
74
|
-
s.add_dependency('
|
75
|
-
s.add_dependency('
|
71
|
+
s.add_dependency('rake', '>= 0.7.1')
|
72
|
+
s.add_dependency('fastercsv', '>= 1.1.0')
|
73
|
+
s.add_dependency('activesupport', '>= 1.3.1')
|
74
|
+
s.add_dependency('activerecord', '>= 1.14.4')
|
75
|
+
s.add_dependency('actionpack', '>= 1.12.5')
|
76
|
+
s.add_dependency('rails_sql_views', '>= 0.1.0')
|
77
|
+
s.add_dependency('adapter_extensions', '>= 0.1.0')
|
76
78
|
|
77
79
|
s.rdoc_options << '--exclude' << '.'
|
78
80
|
s.has_rdoc = false
|
@@ -135,4 +137,13 @@ end
|
|
135
137
|
desc "Publish the API documentation"
|
136
138
|
task :pdoc => [:rdoc] do
|
137
139
|
Rake::SshDirPublisher.new("aeden@rubyforge.org", "/var/www/gforge-projects/activewarehouse/rdoc", "rdoc").upload
|
140
|
+
end
|
141
|
+
|
142
|
+
desc "Reinstall the gem from a local package copy"
|
143
|
+
task :reinstall => [:package] do
|
144
|
+
windows = RUBY_PLATFORM =~ /mswin/
|
145
|
+
sudo = windows ? '' : 'sudo'
|
146
|
+
gem = windows ? 'gem.bat' : 'gem'
|
147
|
+
`#{sudo} #{gem} uninstall -x -i #{PKG_NAME}`
|
148
|
+
`#{sudo} #{gem} install pkg/#{PKG_NAME}-#{PKG_VERSION}`
|
138
149
|
end
|
data/doc/references.txt
ADDED
@@ -0,0 +1,4 @@
|
|
1
|
+
The following papers are relevant to building, storing and querying data cubes with large databases.
|
2
|
+
|
3
|
+
http://research.microsoft.com/research/pubs/view.aspx?msr_tr_id=MSR-TR-95-22
|
4
|
+
http://dbpubs.stanford.edu/pub/showDoc.Fulltext?lang=en&doc=1995-34&format=pdf&compression=&name=1995-34.pdf
|
@@ -1,8 +1,14 @@
|
|
1
1
|
class <%= migration_name %> < ActiveRecord::Migration
|
2
2
|
def self.up
|
3
3
|
fields = {
|
4
|
-
#
|
5
|
-
#
|
4
|
+
# the following are the required bridge table columns for
|
5
|
+
# variable depth hierarchies. Do not change them unless you know
|
6
|
+
# what you are doing.
|
7
|
+
:parent_id => :integer,
|
8
|
+
:child_id => :integer,
|
9
|
+
:num_levels_from_parent => :integer,
|
10
|
+
:is_bottom => :boolean,
|
11
|
+
:is_top => :boolean
|
6
12
|
}
|
7
13
|
create_table :<%= table_name %> do |t|
|
8
14
|
fields.each do |name,type|
|
@@ -12,6 +18,7 @@ class <%= migration_name %> < ActiveRecord::Migration
|
|
12
18
|
fields.each do |name,type|
|
13
19
|
add_index :<%= table_name %>, name unless type == :text
|
14
20
|
end
|
21
|
+
add_index :<%= table_name %>, [:parent_id, :child_id, :num_levels_from_parent], :unique => true
|
15
22
|
end
|
16
23
|
|
17
24
|
def self.down
|
@@ -0,0 +1 @@
|
|
1
|
+
./script/generate date_dimension
|
@@ -0,0 +1,16 @@
|
|
1
|
+
class DateDimensionGenerator < DimensionGenerator
|
2
|
+
attr_accessor :file_name
|
3
|
+
attr_accessor :include_fiscal_year
|
4
|
+
|
5
|
+
default_options :skip_migration => false
|
6
|
+
|
7
|
+
def initialize(runtime_args, runtime_options = {})
|
8
|
+
super
|
9
|
+
|
10
|
+
@name = 'date'
|
11
|
+
@table_name = "#{@name}_dimension"
|
12
|
+
@class_name = "#{@name.camelize}Dimension"
|
13
|
+
@file_name = "#{@class_name.tableize.singularize}"
|
14
|
+
@include_fiscal_year = true # TODO: accept a runtime option to set this
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
class <%= migration_name %> < ActiveRecord::Migration
|
2
|
+
def self.up
|
3
|
+
create_table :<%= table_name %> do |t|
|
4
|
+
t.column :sql_date, :date, :null => false # SQL Date object
|
5
|
+
t.column :calendar_year, :string, :null => false # 2005, 2006, 2007, etc.
|
6
|
+
t.column :calendar_quarter, :string, :null => false, :limit => 2 # Q1, Q2, Q3 or Q4
|
7
|
+
t.column :calendar_quarter_number, :integer, :null => false # 1, 2, 3 or 4
|
8
|
+
t.column :calendar_month_name, :string, :null => false, :limit => 9 # January, February, etc.
|
9
|
+
t.column :calendar_month_number, :integer, :null => false # 1, 2, 3, ... 12
|
10
|
+
t.column :calendar_week, :string, :null => false, :limit => 2 # 1, 2, 3, ... 52
|
11
|
+
t.column :calendar_week_number, :integer, :null => false # 1, 2, 3, ... 52
|
12
|
+
t.column :day_number_in_calendar_year, :integer, :null => false # 1, 2, 3, ... 365
|
13
|
+
t.column :day_number_in_calendar_month, :integer, :null => false # 1, 2, 3, ... 31
|
14
|
+
t.column :day_in_week, :string, :null => false, :limit => 9 # Monday, Tuesday, etc.
|
15
|
+
<% if include_fiscal_year -%>
|
16
|
+
t.column :fiscal_year, :string, :null => false
|
17
|
+
t.column :fiscal_quarter, :string, :null => false, :limit => 2
|
18
|
+
t.column :fiscal_quarter_number, :integer, :null => false
|
19
|
+
t.column :fiscal_month_number, :integer, :null => false
|
20
|
+
t.column :fiscal_week, :string, :null => false, :limit => 2
|
21
|
+
t.column :fiscal_week_number, :integer, :null => false
|
22
|
+
t.column :day_number_in_fiscal_year, :integer, :null => false
|
23
|
+
<% end -%>
|
24
|
+
end
|
25
|
+
# add indexes as required
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.down
|
29
|
+
drop_table :<%= table_name %>
|
30
|
+
end
|
31
|
+
end
|
@@ -1,16 +1,7 @@
|
|
1
1
|
class <%= migration_name %> < ActiveRecord::Migration
|
2
2
|
def self.up
|
3
|
-
fields = {
|
4
|
-
# Add dimension attributes here as name => type
|
5
|
-
# Example: :store_name => :string
|
6
|
-
}
|
7
3
|
create_table :<%= table_name %> do |t|
|
8
|
-
|
9
|
-
t.column name, type
|
10
|
-
end
|
11
|
-
end
|
12
|
-
fields.each do |name,type|
|
13
|
-
add_index :<%= table_name %>, name unless type == :text
|
4
|
+
|
14
5
|
end
|
15
6
|
end
|
16
7
|
|
@@ -21,7 +21,7 @@ class DimensionViewGenerator < Rails::Generator::NamedBase
|
|
21
21
|
# define the query target class and expose its columns as attributes for the view
|
22
22
|
@query_target_class = @query_target_class_name.constantize
|
23
23
|
@view_attributes = @query_target_class.column_names
|
24
|
-
@view_query = "select
|
24
|
+
@view_query = "select #{@view_attributes.join(',')} from #{query_target_table_name}"
|
25
25
|
end
|
26
26
|
|
27
27
|
def manifest
|
@@ -37,7 +37,7 @@ class DimensionViewGenerator < Rails::Generator::NamedBase
|
|
37
37
|
# Generate the files
|
38
38
|
m.template 'model.rb', File.join('app/models', class_path, "#{file_name}.rb")
|
39
39
|
m.template 'unit_test.rb', File.join('test/unit', class_path, "#{file_name}_test.rb")
|
40
|
-
m.template 'fixture.yml', File.join('test/fixtures', class_path, "#{table_name}.yml")
|
40
|
+
#m.template 'fixture.yml', File.join('test/fixtures', class_path, "#{table_name}.yml")
|
41
41
|
|
42
42
|
# Generate the migration unless :skip_migration option is specified
|
43
43
|
unless options[:skip_migration]
|
@@ -1,7 +1,13 @@
|
|
1
1
|
class <%= migration_name %> < ActiveRecord::Migration
|
2
2
|
def self.up
|
3
|
-
create_view "<%= view_name %>", <%= view_query do |t|
|
4
|
-
|
3
|
+
create_view "<%= view_name %>", "<%= view_query %>" do |t|
|
4
|
+
<%- view_attributes.each do |view_attribute| -%>
|
5
|
+
<%- if view_attribute == 'id' -%>
|
6
|
+
t.column :id
|
7
|
+
<%- else -%>
|
8
|
+
t.column :<%= name %>_<%= view_attribute %>
|
9
|
+
<%- end -%>
|
10
|
+
<%- end -%>
|
5
11
|
end
|
6
12
|
end
|
7
13
|
|
@@ -0,0 +1 @@
|
|
1
|
+
./script/generate time_dimension
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class <%= migration_name %> < ActiveRecord::Migration
|
2
|
+
def self.up
|
3
|
+
create_table :<%= table_name %> do |t|
|
4
|
+
t.column :hour_of_day, :integer, :null => false
|
5
|
+
t.column :minute_of_hour, :integer, :null => false
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.down
|
10
|
+
drop_table :<%= table_name %>
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
class TimeDimensionGenerator < DimensionGenerator
|
2
|
+
attr_accessor :file_name
|
3
|
+
|
4
|
+
default_options :skip_migration => false
|
5
|
+
|
6
|
+
def initialize(runtime_args, runtime_options = {})
|
7
|
+
super
|
8
|
+
|
9
|
+
@name = 'date'
|
10
|
+
@table_name = "#{@name}_dimension"
|
11
|
+
@class_name = "#{@name.camelize}Dimension"
|
12
|
+
@file_name = "#{@class_name.tableize.singularize}"
|
13
|
+
end
|
14
|
+
end
|
data/lib/active_warehouse.rb
CHANGED
@@ -35,7 +35,7 @@ end
|
|
35
35
|
|
36
36
|
unless defined?(ActiveSupport)
|
37
37
|
begin
|
38
|
-
$:.unshift(File.dirname(__FILE__) + "/../../activesupport/lib")
|
38
|
+
$:.unshift(File.dirname(__FILE__) + "/../../activesupport/lib")
|
39
39
|
require 'active_support'
|
40
40
|
rescue LoadError
|
41
41
|
gem 'activesupport'
|
@@ -62,18 +62,29 @@ unless defined?(ActionView)
|
|
62
62
|
end
|
63
63
|
end
|
64
64
|
|
65
|
+
require 'fastercsv'
|
66
|
+
require 'fileutils'
|
67
|
+
require 'adapter_extensions'
|
68
|
+
|
65
69
|
# Require 1.1.6 compatibility code if necessary
|
66
70
|
require 'active_record/version'
|
67
71
|
if ActiveRecord::VERSION::MAJOR < 1 || ActiveRecord::VERSION::MINOR < 15
|
68
72
|
require 'active_warehouse/compat/compat'
|
69
73
|
end
|
70
74
|
|
75
|
+
require 'active_warehouse/ordered_hash'
|
76
|
+
require 'active_warehouse/field'
|
77
|
+
require 'active_warehouse/aggregate_field'
|
78
|
+
require 'active_warehouse/calculated_field'
|
71
79
|
require 'active_warehouse/version'
|
72
80
|
require 'active_warehouse/core_ext'
|
73
|
-
require 'active_warehouse/
|
81
|
+
require 'active_warehouse/prejoin_fact'
|
74
82
|
require 'active_warehouse/fact'
|
83
|
+
require 'active_warehouse/bridge'
|
75
84
|
require 'active_warehouse/dimension'
|
76
85
|
require 'active_warehouse/cube'
|
86
|
+
require 'active_warehouse/cube_query_result'
|
87
|
+
require 'active_warehouse/aggregate'
|
77
88
|
require 'active_warehouse/report'
|
78
89
|
require 'active_warehouse/view'
|
79
90
|
require 'active_warehouse/builder'
|
@@ -1,260 +1,61 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
# down each of the dimension hierarchies
|
4
|
-
class Aggregate < ActiveRecord::Base
|
5
|
-
class << self
|
6
|
-
attr_accessor :name, :cube, :dimension1, :dimension2, :dimension1_hierarchy_name, :dimension2_hierarchy_name
|
7
|
-
|
8
|
-
# Get the table name for the aggregate
|
9
|
-
def table_name
|
10
|
-
name = self.name.demodulize.underscore
|
11
|
-
set_table_name(name)
|
12
|
-
name
|
13
|
-
end
|
14
|
-
|
15
|
-
# Returns the aggregate ID
|
16
|
-
def aggregate_id
|
17
|
-
table_name =~ /(\d+)$/
|
18
|
-
$1.to_i
|
19
|
-
end
|
20
|
-
|
21
|
-
# Returns the AggregateMetaData instance associated with this aggregate
|
22
|
-
def meta_data
|
23
|
-
AggregateMetaData.find(aggregate_id)
|
24
|
-
end
|
25
|
-
|
26
|
-
# Return true if the aggregate needs to be rebuilt
|
27
|
-
def needs_rebuild?(last_build=nil)
|
28
|
-
return true if meta_data.populated_at.nil?
|
29
|
-
return true if last_build && (meta_data.populated_at < last_build)
|
30
|
-
return false
|
31
|
-
end
|
32
|
-
|
33
|
-
# Return a key for the aggregate
|
34
|
-
def key(dimension1, dimension1_hierarchy, dimension2, dimension2_hierarchy)
|
35
|
-
AggregateKey.new(dimension1, dimension1_hierarchy, dimension2, dimension2_hierarchy)
|
36
|
-
end
|
37
|
-
|
38
|
-
# Create the aggregate table if required. Set force option to true to force creation of the table
|
39
|
-
# if it already exists
|
40
|
-
def create_storage_table(force=false)
|
41
|
-
connection.drop_table(table_name) if force and table_exists?
|
42
|
-
if !table_exists?
|
43
|
-
connection.create_table(table_name, :id => false) do |t|
|
44
|
-
t.column :dimension1_path, :string
|
45
|
-
t.column :dimension1_stage, :integer
|
46
|
-
t.column :dimension2_path, :string
|
47
|
-
t.column :dimension2_stage, :integer
|
48
|
-
cube.fact_class.aggregate_fields.each do |field|
|
49
|
-
#options = cube.fact_class.aggregate_field_options[field]
|
50
|
-
col = cube.fact_class.columns_hash[field.to_s]
|
51
|
-
t.column field, col.type if col
|
52
|
-
end
|
53
|
-
end
|
54
|
-
connection.add_index(table_name, :dimension1_path)
|
55
|
-
connection.add_index(table_name, :dimension1_stage)
|
56
|
-
connection.add_index(table_name, :dimension2_path)
|
57
|
-
connection.add_index(table_name, :dimension2_stage)
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
# Populate the aggregate table
|
62
|
-
def populate
|
63
|
-
# create the storage table if necessary
|
64
|
-
create_storage_table
|
65
|
-
|
66
|
-
#puts "Populating aggregate table #{table_name}"
|
67
|
-
# clear out the current data
|
68
|
-
#connection.execute("TRUNCATE TABLE #{table_name}") #TODO: make this generic to support all databases
|
69
|
-
delete_all
|
70
|
-
|
71
|
-
$first = false
|
72
|
-
|
73
|
-
# aggregate the data for the two dimensions
|
74
|
-
fact_class = cube.fact_class
|
75
|
-
dim1 = Dimension.class_name(dimension1).constantize
|
76
|
-
dim2 = Dimension.class_name(dimension2).constantize
|
77
|
-
dim1_stage_path = []
|
78
|
-
dim1.hierarchy(meta_data.dimension1_hierarchy.to_sym).each_with_index do |dim1_stage_name, dim1_stage_level|
|
79
|
-
dim1_stage_path << dim1_stage_name
|
80
|
-
dim2_stage_path = []
|
81
|
-
dim2.hierarchy(meta_data.dimension2_hierarchy.to_sym).each_with_index do |dim2_stage_name, dim2_stage_level|
|
82
|
-
dim2_stage_path << dim2_stage_name
|
83
|
-
|
84
|
-
stmt, fields = build_query(fact_class, dim1, dim1_stage_path, dim2, dim2_stage_path)
|
85
|
-
|
86
|
-
puts "\nSTMT: #{stmt}" if $first
|
87
|
-
|
88
|
-
# Get the facts and aggregate them
|
89
|
-
# TODO: replace with select_all
|
90
|
-
fact_class.connection.select_all(stmt).each do |row|
|
91
|
-
require 'pp'
|
92
|
-
pp row if $first
|
93
|
-
dim1_value = []
|
94
|
-
dim1_stage_path.each do |v|
|
95
|
-
dim1_value << row["#{v}"]
|
96
|
-
end
|
97
|
-
dim2_value = []
|
98
|
-
dim2_stage_path.each do |v|
|
99
|
-
dim2_value << row["#{v}"]
|
100
|
-
end
|
101
|
-
|
102
|
-
agg_instance = new
|
103
|
-
agg_instance.dimension1_path = dim1_value.join(':')
|
104
|
-
agg_instance.dimension1_stage = dim1_stage_level
|
105
|
-
agg_instance.dimension2_path = dim2_value.join(':')
|
106
|
-
agg_instance.dimension2_stage = dim2_stage_level
|
107
|
-
|
108
|
-
puts "DIM1_PATH: #{agg_instance.dimension1_path}" if $first
|
109
|
-
puts "DIM2_PATH: #{agg_instance.dimension2_path}" if $first
|
110
|
-
|
111
|
-
|
112
|
-
pp fields if $first
|
113
|
-
fields.each do |field|
|
114
|
-
# do the average here
|
115
|
-
puts "setting field #{field}, value is #{row[field.to_s]}" if $first
|
116
|
-
agg_instance.send("#{field}=".to_sym, row[field.to_s])
|
117
|
-
end
|
118
|
-
agg_instance.save!
|
119
|
-
|
120
|
-
meta_data.update_attribute(:populated_at, Time.now)
|
121
|
-
|
122
|
-
$first = false
|
123
|
-
end
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
|
-
# Build the aggregation query for the given dimensions and stage paths
|
129
|
-
def build_query(fact_class, dim1, dim1_stage_path, dim2, dim2_stage_path)
|
130
|
-
dim1_group = dim1_stage_path.collect { |p| "d1.#{p}"}.join(", ")
|
131
|
-
dim2_group = dim2_stage_path.collect { |p| "d2.#{p}"}.join(", ")
|
132
|
-
|
133
|
-
# Set up the find options
|
134
|
-
fact_find_options = {}
|
135
|
-
fact_find_options[:group] = "#{dim1_group}, #{dim2_group}"
|
136
|
-
fact_find_options[:joins] = "join #{dim1.table_name} d1 on f.#{dim1.foreign_key} = d1.id"
|
137
|
-
fact_find_options[:joins] << " join #{dim2.table_name} d2 on f.#{dim2.foreign_key} = d2.id"
|
138
|
-
|
139
|
-
# Build the 'select' part of the query
|
140
|
-
# denominator = nil
|
141
|
-
fields = []
|
142
|
-
fact_select = ["#{dim1_group}, #{dim2_group}"]
|
143
|
-
fact_class.aggregate_fields.each do |field_name|
|
144
|
-
options = fact_class.aggregate_field_options[field_name]
|
145
|
-
fields << field_name
|
146
|
-
|
147
|
-
options[:type] ||= :sum
|
148
|
-
case options[:type]
|
149
|
-
when :sum
|
150
|
-
fact_select << " sum(f.#{field_name}) as #{field_name}"
|
151
|
-
when Hash
|
152
|
-
if options[:type][dim1.sym] == :average && options[:type][dim2.sym] == :average
|
153
|
-
# I believe this is a special case, but I'm not sure how yet. If both dimensions are defined
|
154
|
-
# averages then perhaps that value cannot be calculated at all. TODO: research
|
155
|
-
else
|
156
|
-
fact_select << " sum(f.#{field_name}) as #{field_name}"
|
157
|
-
end
|
158
|
-
else
|
159
|
-
raise "Unsupported aggregate type: #{options[:type]}"
|
160
|
-
end
|
161
|
-
end
|
162
|
-
fact_find_options[:select] = fact_select.join(',')
|
1
|
+
# Source file which defines the ActiveWarehouse::Aggregate module and imports
|
2
|
+
# the aggregate implementations.
|
163
3
|
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
class AggregateMetaData < ActiveRecord::Base
|
209
|
-
# Build the underlying table. Set force to true to force the build of the table
|
210
|
-
def self.build_table(force=false)
|
211
|
-
connection.drop_table(table_name) if force and table_exists?
|
212
|
-
if !table_exists?
|
213
|
-
connection.create_table(table_name) do |t|
|
214
|
-
t.column :cube_name, :string
|
215
|
-
t.column :dimension1, :string
|
216
|
-
t.column :dimension1_hierarchy, :string
|
217
|
-
t.column :dimension2, :string
|
218
|
-
t.column :dimension2_hierarchy, :string
|
219
|
-
t.column :created_at, :datetime
|
220
|
-
t.column :populated_at, :datetime
|
4
|
+
module ActiveWarehouse #:nodoc:
|
5
|
+
# This module contains classes which handle aggregation of cube data using
|
6
|
+
# various algorithms
|
7
|
+
module Aggregate
|
8
|
+
# Base class for aggregate implementations
|
9
|
+
class Aggregate
|
10
|
+
|
11
|
+
# Reader for the cube class
|
12
|
+
attr_reader :cube_class
|
13
|
+
|
14
|
+
# Initialize the aggregate for the given cube class
|
15
|
+
def initialize(cube_class)
|
16
|
+
@cube_class = cube_class
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
# Get the connection to use for SQL execution
|
21
|
+
def connection
|
22
|
+
cube_class.connection
|
23
|
+
end
|
24
|
+
|
25
|
+
# Convenience accessor to get the cube's fact class. Delegates to the
|
26
|
+
# cube class.
|
27
|
+
def fact_class
|
28
|
+
cube_class.fact_class
|
29
|
+
end
|
30
|
+
|
31
|
+
# Parse the query args and return an options hash.
|
32
|
+
def parse_query_args(*args)
|
33
|
+
options = {}
|
34
|
+
if args.length == 1
|
35
|
+
options = args[0]
|
36
|
+
elsif args.length >= 4
|
37
|
+
options[:column_dimension_name] = args[0]
|
38
|
+
options[:column_hierarchy_name] = args[1]
|
39
|
+
options[:row_dimension_name] = args[2]
|
40
|
+
options[:row_hierarchy_name] = args[3]
|
41
|
+
options[:conditions] = args[4] if args.length >= 5
|
42
|
+
options[:cstage] = args[5] if args.length >= 6
|
43
|
+
options[:rstage] = args[6] if args.length >= 7
|
44
|
+
options[:filters] = args[7] if args.length >= 8
|
45
|
+
options.merge!(args[8]) if args.length >= 9
|
46
|
+
else
|
47
|
+
raise ArgumentError, "The query method accepts either 1 Hash (new style) or 4 to 8 arguments (old style)"
|
221
48
|
end
|
222
|
-
|
49
|
+
options
|
223
50
|
end
|
224
51
|
end
|
225
|
-
def key
|
226
|
-
Aggregate.key(dimension1, dimension1_hierarchy, dimension2, dimension2_hierarchy)
|
227
|
-
end
|
228
|
-
end
|
229
|
-
|
230
|
-
# Key for aggregate caching
|
231
|
-
class AggregateKey
|
232
|
-
attr_reader :dimension1, :dimension1_hierarchy, :dimension2, :dimension2_hierarchy
|
233
|
-
|
234
|
-
def initialize(dimension1, dimension1_hierarchy, dimension2, dimension2_hierarchy)
|
235
|
-
@dimension1 = dimension1
|
236
|
-
@dimension1_hierarchy = dimension1_hierarchy
|
237
|
-
@dimension2 = dimension2
|
238
|
-
@dimension2_hierarchy = dimension2_hierarchy
|
239
|
-
end
|
240
|
-
|
241
|
-
def ==(o)
|
242
|
-
o.instance_of?(self.class) and (o.to_s == to_s or o.to_s = to_rs)
|
243
|
-
end
|
244
|
-
|
245
|
-
def hash
|
246
|
-
to_s.hash
|
247
|
-
end
|
248
|
-
|
249
|
-
def to_s
|
250
|
-
"#{@dimension1}.#{@dimension1_hierarchy}.#{@dimension2}.#{@dimension2_hierarchy}"
|
251
|
-
end
|
252
|
-
|
253
|
-
# Return the "reveresed" version of this key String representation
|
254
|
-
def to_rs
|
255
|
-
"#{@dimension2}.#{@dimension2_hierarchy}.#{@dimension1}.#{@dimension1_hierarchy}"
|
256
|
-
end
|
257
52
|
end
|
258
53
|
end
|
259
54
|
|
260
|
-
|
55
|
+
require 'active_warehouse/aggregate/no_aggregate'
|
56
|
+
require 'active_warehouse/aggregate/rolap_common'
|
57
|
+
require 'active_warehouse/aggregate/pipelined_rolap_aggregate'
|
58
|
+
require 'active_warehouse/aggregate/rolap_aggregate'
|
59
|
+
require 'active_warehouse/aggregate/dwarf_common'
|
60
|
+
require 'active_warehouse/aggregate/dwarf_aggregate'
|
61
|
+
require 'active_warehouse/aggregate/pid_aggregate'
|