chicagowarehouse 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/Gemfile +1 -2
- data/Rakefile +1 -1
- data/chicagowarehouse.gemspec +9 -11
- data/lib/chicago.rb +2 -1
- data/lib/chicago/database/concrete_schema_strategies.rb +153 -0
- data/lib/chicago/database/index_generator.rb +35 -0
- data/lib/chicago/database/migration_file_writer.rb +14 -17
- data/lib/chicago/database/schema_generator.rb +11 -41
- data/lib/chicago/rake_tasks.rb +26 -6
- data/spec/database/{db_type_converter_spec.rb → concrete_schema_strategies.rb} +15 -8
- data/spec/database/migration_file_writer_spec.rb +6 -10
- data/spec/database/schema_generator_spec.rb +3 -3
- metadata +7 -21
- data/lib/chicago/database/type_converters.rb +0 -107
checksums.yaml
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
---
|
|
2
2
|
!binary "U0hBMQ==":
|
|
3
3
|
metadata.gz: !binary |-
|
|
4
|
-
|
|
4
|
+
NmI5NTE0ZmI4MTJhZDc3MjE2YTkzMTM5NWZhY2M5MDIxMTBjODI1Yg==
|
|
5
5
|
data.tar.gz: !binary |-
|
|
6
|
-
|
|
6
|
+
MGM2YjI2ZWFkNzhlMzI3NzM0MmY0YjgyMGQ5MjdhYzYxNGQ3ZWM1MQ==
|
|
7
7
|
SHA512:
|
|
8
8
|
metadata.gz: !binary |-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
Y2MwMDQ2M2M4NTdiMzE0MzY0MjAwOWU4YmYwYzI5ZjI0NDdhYTU2ZTU3NGQw
|
|
10
|
+
OThiZjZiZjFmYWYyOGZmYzdkY2EyZjZiYzRjYjE2OWI5Y2ZlNmZiZDNjOWQy
|
|
11
|
+
NDIwZTZhNGMwYjAxNWJkMTJjZmQ3NDAzNTdkMWVjZDlmMjgxMWQ=
|
|
12
12
|
data.tar.gz: !binary |-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
NzQ1YTA5NjM4NWVlMmU4YmJhMTM3YjhlMzQ5MzgwMWY4YmEwZmU2MTliYzFk
|
|
14
|
+
OWRmNDAwMDQwNWY1NWFkMzVkODlhYjZiM2U1MGNjZGI0OWQ3ZTViMWQ4ZDg1
|
|
15
|
+
NDYyMzBjZDA5NzU4NDBkOGE1NTQ2OGM1NGJlZDQ0ZTNkMzJkZGM=
|
data/Gemfile
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
source "http://rubygems.org"
|
|
2
2
|
|
|
3
3
|
gem "sequel", ">= 4.7"
|
|
4
|
-
gem "sequel_migration_builder", ">= 0.
|
|
4
|
+
gem "sequel_migration_builder", ">= 0.4.0"
|
|
5
5
|
gem "chronic"
|
|
6
6
|
|
|
7
7
|
group :development do
|
|
@@ -10,7 +10,6 @@ group :development do
|
|
|
10
10
|
gem "rspec", "~> 2.0"
|
|
11
11
|
gem "bundler"
|
|
12
12
|
gem "jeweler"
|
|
13
|
-
gem "rcov", :platform => :ruby_18
|
|
14
13
|
gem "simplecov", :platform => :ruby_19
|
|
15
14
|
gem "flog"
|
|
16
15
|
gem "timecop"
|
data/Rakefile
CHANGED
|
@@ -14,7 +14,7 @@ require 'rake'
|
|
|
14
14
|
require 'jeweler'
|
|
15
15
|
Jeweler::Tasks.new do |gem|
|
|
16
16
|
gem.name = "chicagowarehouse"
|
|
17
|
-
gem.version = "0.
|
|
17
|
+
gem.version = "0.6.0"
|
|
18
18
|
gem.summary = "Ruby Data Warehousing"
|
|
19
19
|
gem.description = "Simple Data Warehouse toolkit for ruby"
|
|
20
20
|
gem.author = "Roland Swingler"
|
data/chicagowarehouse.gemspec
CHANGED
|
@@ -2,16 +2,16 @@
|
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
|
4
4
|
# -*- encoding: utf-8 -*-
|
|
5
|
-
# stub: chicagowarehouse 0.
|
|
5
|
+
# stub: chicagowarehouse 0.6.0 ruby lib
|
|
6
6
|
|
|
7
7
|
Gem::Specification.new do |s|
|
|
8
8
|
s.name = "chicagowarehouse"
|
|
9
|
-
s.version = "0.
|
|
9
|
+
s.version = "0.6.0"
|
|
10
10
|
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
12
12
|
s.require_paths = ["lib"]
|
|
13
13
|
s.authors = ["Roland Swingler"]
|
|
14
|
-
s.date = "2014-
|
|
14
|
+
s.date = "2014-09-01"
|
|
15
15
|
s.description = "Simple Data Warehouse toolkit for ruby"
|
|
16
16
|
s.email = "roland.swingler@gmail.com"
|
|
17
17
|
s.extra_rdoc_files = [
|
|
@@ -30,12 +30,13 @@ Gem::Specification.new do |s|
|
|
|
30
30
|
"lib/chicago/core_ext/hash.rb",
|
|
31
31
|
"lib/chicago/core_ext/sequel/dataset.rb",
|
|
32
32
|
"lib/chicago/data/month.rb",
|
|
33
|
+
"lib/chicago/database/concrete_schema_strategies.rb",
|
|
33
34
|
"lib/chicago/database/constants.rb",
|
|
34
35
|
"lib/chicago/database/dataset_builder.rb",
|
|
35
36
|
"lib/chicago/database/filter.rb",
|
|
37
|
+
"lib/chicago/database/index_generator.rb",
|
|
36
38
|
"lib/chicago/database/migration_file_writer.rb",
|
|
37
39
|
"lib/chicago/database/schema_generator.rb",
|
|
38
|
-
"lib/chicago/database/type_converters.rb",
|
|
39
40
|
"lib/chicago/database/value_parser.rb",
|
|
40
41
|
"lib/chicago/errors.rb",
|
|
41
42
|
"lib/chicago/query.rb",
|
|
@@ -57,7 +58,7 @@ Gem::Specification.new do |s|
|
|
|
57
58
|
"lib/chicago/schema/table.rb",
|
|
58
59
|
"lib/chicago/star_schema.rb",
|
|
59
60
|
"spec/data/month_spec.rb",
|
|
60
|
-
"spec/database/
|
|
61
|
+
"spec/database/concrete_schema_strategies.rb",
|
|
61
62
|
"spec/database/migration_file_writer_spec.rb",
|
|
62
63
|
"spec/database/schema_generator_spec.rb",
|
|
63
64
|
"spec/db_connections.yml.dist",
|
|
@@ -90,41 +91,38 @@ Gem::Specification.new do |s|
|
|
|
90
91
|
|
|
91
92
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
|
92
93
|
s.add_runtime_dependency(%q<sequel>, [">= 4.7"])
|
|
93
|
-
s.add_runtime_dependency(%q<sequel_migration_builder>, [">= 0.
|
|
94
|
+
s.add_runtime_dependency(%q<sequel_migration_builder>, [">= 0.4.0"])
|
|
94
95
|
s.add_runtime_dependency(%q<chronic>, [">= 0"])
|
|
95
96
|
s.add_development_dependency(%q<mysql2>, [">= 0"])
|
|
96
97
|
s.add_development_dependency(%q<yard>, [">= 0"])
|
|
97
98
|
s.add_development_dependency(%q<rspec>, ["~> 2.0"])
|
|
98
99
|
s.add_development_dependency(%q<bundler>, [">= 0"])
|
|
99
100
|
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
|
100
|
-
s.add_development_dependency(%q<rcov>, [">= 0"])
|
|
101
101
|
s.add_development_dependency(%q<simplecov>, [">= 0"])
|
|
102
102
|
s.add_development_dependency(%q<flog>, [">= 0"])
|
|
103
103
|
s.add_development_dependency(%q<timecop>, [">= 0"])
|
|
104
104
|
else
|
|
105
105
|
s.add_dependency(%q<sequel>, [">= 4.7"])
|
|
106
|
-
s.add_dependency(%q<sequel_migration_builder>, [">= 0.
|
|
106
|
+
s.add_dependency(%q<sequel_migration_builder>, [">= 0.4.0"])
|
|
107
107
|
s.add_dependency(%q<chronic>, [">= 0"])
|
|
108
108
|
s.add_dependency(%q<mysql2>, [">= 0"])
|
|
109
109
|
s.add_dependency(%q<yard>, [">= 0"])
|
|
110
110
|
s.add_dependency(%q<rspec>, ["~> 2.0"])
|
|
111
111
|
s.add_dependency(%q<bundler>, [">= 0"])
|
|
112
112
|
s.add_dependency(%q<jeweler>, [">= 0"])
|
|
113
|
-
s.add_dependency(%q<rcov>, [">= 0"])
|
|
114
113
|
s.add_dependency(%q<simplecov>, [">= 0"])
|
|
115
114
|
s.add_dependency(%q<flog>, [">= 0"])
|
|
116
115
|
s.add_dependency(%q<timecop>, [">= 0"])
|
|
117
116
|
end
|
|
118
117
|
else
|
|
119
118
|
s.add_dependency(%q<sequel>, [">= 4.7"])
|
|
120
|
-
s.add_dependency(%q<sequel_migration_builder>, [">= 0.
|
|
119
|
+
s.add_dependency(%q<sequel_migration_builder>, [">= 0.4.0"])
|
|
121
120
|
s.add_dependency(%q<chronic>, [">= 0"])
|
|
122
121
|
s.add_dependency(%q<mysql2>, [">= 0"])
|
|
123
122
|
s.add_dependency(%q<yard>, [">= 0"])
|
|
124
123
|
s.add_dependency(%q<rspec>, ["~> 2.0"])
|
|
125
124
|
s.add_dependency(%q<bundler>, [">= 0"])
|
|
126
125
|
s.add_dependency(%q<jeweler>, [">= 0"])
|
|
127
|
-
s.add_dependency(%q<rcov>, [">= 0"])
|
|
128
126
|
s.add_dependency(%q<simplecov>, [">= 0"])
|
|
129
127
|
s.add_dependency(%q<flog>, [">= 0"])
|
|
130
128
|
s.add_dependency(%q<timecop>, [">= 0"])
|
data/lib/chicago.rb
CHANGED
|
@@ -12,7 +12,8 @@ require 'chicago/data/month'
|
|
|
12
12
|
|
|
13
13
|
require 'chicago/star_schema'
|
|
14
14
|
require 'chicago/database/constants'
|
|
15
|
-
require 'chicago/database/
|
|
15
|
+
require 'chicago/database/index_generator'
|
|
16
|
+
require 'chicago/database/concrete_schema_strategies'
|
|
16
17
|
require 'chicago/database/migration_file_writer'
|
|
17
18
|
require 'chicago/database/schema_generator'
|
|
18
19
|
require 'chicago/query'
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
module Chicago
|
|
2
|
+
module Database
|
|
3
|
+
# Generic database strategy.
|
|
4
|
+
#
|
|
5
|
+
# This supplements Sequel's type conversion strategy rather than
|
|
6
|
+
# replaces it, so +:boolean+ will still return +:boolean+ rather
|
|
7
|
+
# than +tinyint(1)+ in the case of mysql.
|
|
8
|
+
class ConcreteSchemaStrategy
|
|
9
|
+
# Factory method that returns an appropriate type conversion
|
|
10
|
+
# stratgey for the given database.
|
|
11
|
+
#
|
|
12
|
+
# If a database-specific strategy cannot be found, returns a
|
|
13
|
+
# generic strategy.
|
|
14
|
+
#
|
|
15
|
+
# @return [ConcreteSchemaStrategy]
|
|
16
|
+
def self.for_db(db)
|
|
17
|
+
if db.database_type == :mysql
|
|
18
|
+
MysqlStrategy.new
|
|
19
|
+
elsif db.database_type == :postgres && db.opts[:adapter] == "redshift"
|
|
20
|
+
RedshiftStrategy.new
|
|
21
|
+
else
|
|
22
|
+
self.new
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def migration_options
|
|
27
|
+
{}
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def column_hash(column)
|
|
31
|
+
hsh = column.to_hash.merge(:column_type => db_type(column))
|
|
32
|
+
hsh.delete(:elements) if hsh.has_key?(:elements)
|
|
33
|
+
hsh
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Returns the indexes for the given table.
|
|
37
|
+
def indexes(table)
|
|
38
|
+
IndexGenerator.new(table).indexes
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Returns a db type given a column definition
|
|
42
|
+
#
|
|
43
|
+
# @return [Symbol]
|
|
44
|
+
def db_type(column)
|
|
45
|
+
case column.column_type
|
|
46
|
+
when :integer then integer_type(column.min, column.max)
|
|
47
|
+
when :string then string_type(column.min, column.max)
|
|
48
|
+
when :money then :decimal
|
|
49
|
+
when :percent then :decimal
|
|
50
|
+
else
|
|
51
|
+
column.column_type
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Returns sequel table options for a dimension or fact table.
|
|
56
|
+
#
|
|
57
|
+
# None by default, but database-specific subclasses may
|
|
58
|
+
# override this.
|
|
59
|
+
#
|
|
60
|
+
# @return [Hash]
|
|
61
|
+
def table_options
|
|
62
|
+
{}
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Returns a database type for a string column.
|
|
66
|
+
#
|
|
67
|
+
# @return [Symbol]
|
|
68
|
+
def string_type(min, max)
|
|
69
|
+
min && max && min == max ? :char : :varchar
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Returns a database integer column type, big enough to fit
|
|
73
|
+
# values between min and max, or integer if a specific type
|
|
74
|
+
# cannot be found.
|
|
75
|
+
#
|
|
76
|
+
# @return [Symbol]
|
|
77
|
+
# @raise an ArgumentError if min or max is too large for a
|
|
78
|
+
# single database column.
|
|
79
|
+
def integer_type(min, max)
|
|
80
|
+
if min && max && in_numeric_range?(min, max, SMALL_INT_MAX)
|
|
81
|
+
:smallint
|
|
82
|
+
else
|
|
83
|
+
:integer
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
protected
|
|
88
|
+
|
|
89
|
+
def in_numeric_range?(min, max, unsigned_limit)
|
|
90
|
+
signed_limit = (unsigned_limit + 1) / 2
|
|
91
|
+
(min >= -signed_limit && max <= signed_limit - 1) || (min >= 0 && max <= unsigned_limit)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Redshift-specific database schema strategy
|
|
96
|
+
class RedshiftStrategy < ConcreteSchemaStrategy
|
|
97
|
+
def migration_options
|
|
98
|
+
{:separate_alter_table_statements => true, :immutable_columns => true}
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def column_hash(column)
|
|
102
|
+
hsh = super(column)
|
|
103
|
+
|
|
104
|
+
if column.column_type == :string && hsh[:size]
|
|
105
|
+
# Redshift column sizes are in bytes, not characters, so
|
|
106
|
+
# increase to 4 bytes per-char for UTF-8 reasons.
|
|
107
|
+
hsh[:size] *= 4
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
hsh
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Redshift does not support indexes, so do not output any.
|
|
114
|
+
def indexes(table)
|
|
115
|
+
[]
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# MySql-specific database schema strategy
|
|
120
|
+
class MysqlStrategy < ConcreteSchemaStrategy
|
|
121
|
+
def column_hash(column)
|
|
122
|
+
column.to_hash.merge :column_type => db_type(column)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def db_type(column)
|
|
126
|
+
return :enum if column.elements && column.elements.size < 65_536
|
|
127
|
+
super(column)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Returns table options for a dimension or fact table.
|
|
131
|
+
#
|
|
132
|
+
# Dimension tables are defined as MyISAM tables in MySQL.
|
|
133
|
+
def table_options
|
|
134
|
+
{:engine => "myisam"}
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def integer_type(min, max)
|
|
138
|
+
return :integer unless min && max
|
|
139
|
+
|
|
140
|
+
case
|
|
141
|
+
when in_numeric_range?(min, max, TINY_INT_MAX) then :tinyint
|
|
142
|
+
when in_numeric_range?(min, max, SMALL_INT_MAX) then :smallint
|
|
143
|
+
when in_numeric_range?(min, max, MEDIUM_INT_MAX) then :mediumint
|
|
144
|
+
when in_numeric_range?(min, max, INT_MAX) then :integer
|
|
145
|
+
when in_numeric_range?(min, max, BIG_INT_MAX) then :bigint
|
|
146
|
+
else
|
|
147
|
+
raise ArgumentError.new("#{min} is too small or #{max} is too large for a single column")
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
module Chicago
|
|
2
|
+
module Database
|
|
3
|
+
class IndexGenerator
|
|
4
|
+
def initialize(table)
|
|
5
|
+
@table = table
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def indexes
|
|
9
|
+
indexes = @table.columns.select(&:indexed?).inject({}) do |hsh, d|
|
|
10
|
+
hsh.merge("#{d.name}_idx".to_sym => {
|
|
11
|
+
:columns => d.database_name,
|
|
12
|
+
:unique => d.unique?})
|
|
13
|
+
end
|
|
14
|
+
indexes.merge!(natural_key_index) if @table.natural_key
|
|
15
|
+
indexes.merge!(:_inserted_at_idx => {:columns => :_inserted_at, :unique => false})
|
|
16
|
+
indexes
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def natural_key_index
|
|
20
|
+
{
|
|
21
|
+
"#{@table.natural_key.first}_idx".to_sym => {
|
|
22
|
+
:columns => natural_key_index_columns,
|
|
23
|
+
:unique => true
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def natural_key_index_columns
|
|
29
|
+
@table.natural_key.map do |name|
|
|
30
|
+
@table[name].database_name rescue raise MissingDefinitionError.new("Column #{name} is not defined in #{@table.name}")
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -4,30 +4,27 @@ module Chicago
|
|
|
4
4
|
module Database
|
|
5
5
|
# Writes Sequel migrations for the star schema
|
|
6
6
|
class MigrationFileWriter
|
|
7
|
-
# Creates a new migration file writer, given a Sequel::Database
|
|
8
|
-
# connection and a directory. If the directory does not exist, an
|
|
9
|
-
# error will be raised.
|
|
10
|
-
def initialize(db, migration_directory)
|
|
11
|
-
@db = db
|
|
12
|
-
@migration_directory = migration_directory
|
|
13
|
-
end
|
|
14
|
-
|
|
15
7
|
# Writes the migration file necessary for all defined facts and
|
|
16
8
|
# dimensions.
|
|
17
|
-
def write_migration_file(schema)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
tables = SchemaGenerator.new(type_converter).traverse(schema)
|
|
9
|
+
def write_migration_file(db, schema, directory, generate_key_tables=true)
|
|
10
|
+
schema_strategy = ConcreteSchemaStrategy.for_db(db)
|
|
11
|
+
tables = SchemaGenerator.new(schema_strategy, generate_key_tables).traverse(schema)
|
|
21
12
|
|
|
22
|
-
File.open(migration_file, "w") do |fh|
|
|
23
|
-
fh.write Sequel::MigrationBuilder.new(
|
|
13
|
+
File.open(migration_file(directory), "w") do |fh|
|
|
14
|
+
fh.write Sequel::MigrationBuilder.new(db, schema_strategy.migration_options).
|
|
15
|
+
generate_migration(tables)
|
|
24
16
|
end
|
|
25
17
|
end
|
|
26
18
|
|
|
27
19
|
# Returns the path the migration file has been written to.
|
|
28
|
-
def migration_file
|
|
29
|
-
|
|
30
|
-
|
|
20
|
+
def migration_file(directory)
|
|
21
|
+
File.join(directory, migration_file_name)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
def migration_file_name
|
|
27
|
+
@migration_file_name ||= "#{Time.now.strftime("%Y%m%d%H%M%S")}_auto_migration.rb"
|
|
31
28
|
end
|
|
32
29
|
end
|
|
33
30
|
end
|
|
@@ -5,10 +5,15 @@ module Chicago
|
|
|
5
5
|
class SchemaGenerator
|
|
6
6
|
attr_writer :type_converter
|
|
7
7
|
|
|
8
|
-
def initialize(type_converter)
|
|
8
|
+
def initialize(type_converter, generate_key_tables=true)
|
|
9
9
|
@type_converter = type_converter
|
|
10
|
+
@generate_key_tables = generate_key_tables
|
|
10
11
|
end
|
|
11
12
|
|
|
13
|
+
def generate_key_tables?
|
|
14
|
+
@generate_key_tables
|
|
15
|
+
end
|
|
16
|
+
|
|
12
17
|
def traverse(schema)
|
|
13
18
|
schema.tables.inject({}) {|hsh,t| hsh.merge(t.visit(self)) }
|
|
14
19
|
end
|
|
@@ -18,12 +23,13 @@ module Chicago
|
|
|
18
23
|
end
|
|
19
24
|
|
|
20
25
|
def visit_dimension(dimension)
|
|
21
|
-
{dimension.table_name => basic_table(dimension)}
|
|
22
|
-
|
|
26
|
+
hash = {dimension.table_name => basic_table(dimension)}
|
|
27
|
+
hash.merge!(key_table(dimension)) if generate_key_tables?
|
|
28
|
+
hash
|
|
23
29
|
end
|
|
24
30
|
|
|
25
31
|
def visit_column(column)
|
|
26
|
-
|
|
32
|
+
@type_converter.column_hash(column)
|
|
27
33
|
end
|
|
28
34
|
|
|
29
35
|
alias :visit_measure :visit_column
|
|
@@ -35,7 +41,7 @@ module Chicago
|
|
|
35
41
|
t = {
|
|
36
42
|
:primary_key => [:id],
|
|
37
43
|
:table_options => @type_converter.table_options,
|
|
38
|
-
:indexes => indexes(table),
|
|
44
|
+
:indexes => @type_converter.indexes(table),
|
|
39
45
|
:columns => [{
|
|
40
46
|
:name => :id,
|
|
41
47
|
:column_type => :integer,
|
|
@@ -75,42 +81,6 @@ module Chicago
|
|
|
75
81
|
}
|
|
76
82
|
}
|
|
77
83
|
end
|
|
78
|
-
|
|
79
|
-
def indexes(table)
|
|
80
|
-
IndexGenerator.new(table).indexes
|
|
81
|
-
end
|
|
82
|
-
end
|
|
83
|
-
|
|
84
|
-
class IndexGenerator
|
|
85
|
-
def initialize(table)
|
|
86
|
-
@table = table
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
def indexes
|
|
90
|
-
indexes = @table.columns.select(&:indexed?).inject({}) do |hsh, d|
|
|
91
|
-
hsh.merge("#{d.name}_idx".to_sym => {
|
|
92
|
-
:columns => d.database_name,
|
|
93
|
-
:unique => d.unique?})
|
|
94
|
-
end
|
|
95
|
-
indexes.merge!(natural_key_index) if @table.natural_key
|
|
96
|
-
indexes.merge!(:_inserted_at_idx => {:columns => :_inserted_at, :unique => false})
|
|
97
|
-
indexes
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
def natural_key_index
|
|
101
|
-
{
|
|
102
|
-
"#{@table.natural_key.first}_idx".to_sym => {
|
|
103
|
-
:columns => natural_key_index_columns,
|
|
104
|
-
:unique => true
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
def natural_key_index_columns
|
|
110
|
-
@table.natural_key.map do |name|
|
|
111
|
-
@table[name].database_name rescue raise MissingDefinitionError.new("Column #{name} is not defined in #{@table.name}")
|
|
112
|
-
end
|
|
113
|
-
end
|
|
114
84
|
end
|
|
115
85
|
end
|
|
116
86
|
end
|
data/lib/chicago/rake_tasks.rb
CHANGED
|
@@ -20,10 +20,12 @@ module Chicago
|
|
|
20
20
|
#
|
|
21
21
|
# @api public
|
|
22
22
|
class RakeTasks < Rake::TaskLib
|
|
23
|
-
def initialize(
|
|
24
|
-
@migration_dir = "migrations"
|
|
25
|
-
@db = db
|
|
23
|
+
def initialize(schema, options)
|
|
26
24
|
@schema = schema
|
|
25
|
+
@base_migration_dir = options[:migration_directory] ||= "migrations"
|
|
26
|
+
@staging_db = options[:staging_db] or raise ArgumentError.new("staging_db option must be provided.")
|
|
27
|
+
@presentation_db = options[:presentation_db]
|
|
28
|
+
|
|
27
29
|
define
|
|
28
30
|
end
|
|
29
31
|
|
|
@@ -36,15 +38,33 @@ module Chicago
|
|
|
36
38
|
task :create_null_records do
|
|
37
39
|
# TODO: replace this with proper logging.
|
|
38
40
|
warn "Loading NULL records."
|
|
39
|
-
@schema.dimensions.each
|
|
41
|
+
@schema.dimensions.each do |dimension|
|
|
42
|
+
dimension.create_null_records(@db)
|
|
43
|
+
end
|
|
40
44
|
end
|
|
41
45
|
|
|
42
46
|
desc "Writes a migration file to change the database based on defined Facts & Dimensions"
|
|
43
47
|
task :write_migrations do
|
|
44
|
-
Database::MigrationFileWriter.new
|
|
45
|
-
|
|
48
|
+
writer = Database::MigrationFileWriter.new
|
|
49
|
+
writer.write_migration_file(@staging_db, @schema,
|
|
50
|
+
staging_directory)
|
|
51
|
+
|
|
52
|
+
if @presentation_db
|
|
53
|
+
writer.write_migration_file(@presentation_db, @schema,
|
|
54
|
+
presentation_directory, false)
|
|
55
|
+
end
|
|
46
56
|
end
|
|
47
57
|
end
|
|
48
58
|
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
def staging_directory
|
|
63
|
+
File.join(@base_migration_dir, "staging")
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def presentation_directory
|
|
67
|
+
File.join(@base_migration_dir, "presentation")
|
|
68
|
+
end
|
|
49
69
|
end
|
|
50
70
|
end
|
|
@@ -46,15 +46,22 @@ describe "DbTypeConverter.for_db" do
|
|
|
46
46
|
it "should return a type converter specific to MySQL if the database type is :mysql" do
|
|
47
47
|
@mock_db.should_receive(:database_type).and_return(:mysql)
|
|
48
48
|
|
|
49
|
-
converter = Database::
|
|
50
|
-
converter.should be_kind_of(Database::
|
|
49
|
+
converter = Database::ConcreteSchemaStrategy.for_db(@mock_db)
|
|
50
|
+
converter.should be_kind_of(Database::MysqlStrategy)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
it "should return a type converter specific to Redshift if the database type is :mysql" do
|
|
54
|
+
@mock_db.stub(:database_type => :postgres, :opts => {:adapter => 'redshift'})
|
|
55
|
+
|
|
56
|
+
converter = Database::ConcreteSchemaStrategy.for_db(@mock_db)
|
|
57
|
+
converter.should be_kind_of(Database::RedshiftStrategy)
|
|
51
58
|
end
|
|
52
59
|
|
|
53
60
|
it "should return a generic type converter for an unknown database type" do
|
|
54
|
-
@mock_db.
|
|
61
|
+
@mock_db.stub(:database_type => :foodb)
|
|
55
62
|
|
|
56
|
-
converter = Database::
|
|
57
|
-
converter.should be_kind_of(Database::
|
|
63
|
+
converter = Database::ConcreteSchemaStrategy.for_db(@mock_db)
|
|
64
|
+
converter.should be_kind_of(Database::ConcreteSchemaStrategy)
|
|
58
65
|
end
|
|
59
66
|
end
|
|
60
67
|
|
|
@@ -62,7 +69,7 @@ describe "Generic DbTypeConverter" do
|
|
|
62
69
|
it_behaves_like "All DB type converters"
|
|
63
70
|
|
|
64
71
|
before :each do
|
|
65
|
-
@tc = Database::
|
|
72
|
+
@tc = Database::ConcreteSchemaStrategy.new
|
|
66
73
|
end
|
|
67
74
|
|
|
68
75
|
{ :smallint => [-32768, 32767],
|
|
@@ -76,11 +83,11 @@ describe "Generic DbTypeConverter" do
|
|
|
76
83
|
end
|
|
77
84
|
end
|
|
78
85
|
|
|
79
|
-
describe Chicago::Database::
|
|
86
|
+
describe Chicago::Database::MysqlStrategy do
|
|
80
87
|
it_behaves_like "All DB type converters"
|
|
81
88
|
|
|
82
89
|
before :each do
|
|
83
|
-
@tc = Database::
|
|
90
|
+
@tc = Database::MysqlStrategy.new
|
|
84
91
|
end
|
|
85
92
|
|
|
86
93
|
context "#db_type" do
|
|
@@ -1,16 +1,12 @@
|
|
|
1
1
|
require "spec_helper"
|
|
2
2
|
|
|
3
3
|
describe Chicago::Database::MigrationFileWriter do
|
|
4
|
-
|
|
5
|
-
@mock_db = double(:db)
|
|
6
|
-
@mock_db.stub(:database_type).and_return(:generic)
|
|
7
|
-
@builder = described_class.new(@mock_db, "schema")
|
|
8
|
-
end
|
|
4
|
+
let(:db) { double(:db, :database_type => :generic) }
|
|
9
5
|
|
|
10
|
-
it "
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
6
|
+
it "returns a migration file name with the current timestamp" do
|
|
7
|
+
Timecop.freeze(2010, 1, 1, 12, 30)
|
|
8
|
+
subject.migration_file("schema").
|
|
9
|
+
should == "schema/20100101123000_auto_migration.rb"
|
|
14
10
|
end
|
|
15
11
|
|
|
16
12
|
it "should write out a migration file generated by Sequel::MigrationBuilder" do
|
|
@@ -29,7 +25,7 @@ describe Chicago::Database::MigrationFileWriter do
|
|
|
29
25
|
file = StringIO.new
|
|
30
26
|
File.stub(:open).and_yield(file)
|
|
31
27
|
|
|
32
|
-
|
|
28
|
+
subject.write_migration_file(db, schema, "directory")
|
|
33
29
|
|
|
34
30
|
file.rewind
|
|
35
31
|
file.read.should == "migration content"
|
|
@@ -2,7 +2,7 @@ require 'spec_helper'
|
|
|
2
2
|
require 'chicago/database/schema_generator'
|
|
3
3
|
|
|
4
4
|
describe Chicago::Database::SchemaGenerator do
|
|
5
|
-
subject { described_class.new(Chicago::Database::
|
|
5
|
+
subject { described_class.new(Chicago::Database::ConcreteSchemaStrategy.new) }
|
|
6
6
|
|
|
7
7
|
it_behaves_like "a schema visitor"
|
|
8
8
|
|
|
@@ -41,7 +41,7 @@ describe Chicago::Database::SchemaGenerator do
|
|
|
41
41
|
end
|
|
42
42
|
|
|
43
43
|
it "should have a table type of MyISAM for mysql" do
|
|
44
|
-
subject.type_converter = Chicago::Database::
|
|
44
|
+
subject.type_converter = Chicago::Database::MysqlStrategy.new
|
|
45
45
|
subject.visit_fact(@fact)[:facts_sales][:table_options].should == {:engine => "myisam"}
|
|
46
46
|
end
|
|
47
47
|
|
|
@@ -108,7 +108,7 @@ describe Chicago::Database::SchemaGenerator do
|
|
|
108
108
|
|
|
109
109
|
it "should have a table type of MyISAM for mysql" do
|
|
110
110
|
@dimension = @schema.define_dimension(:user)
|
|
111
|
-
subject.type_converter = Chicago::Database::
|
|
111
|
+
subject.type_converter = Chicago::Database::MysqlStrategy.new
|
|
112
112
|
subject.visit_dimension(@dimension)[:dimension_user][:table_options].should == {:engine => "myisam"}
|
|
113
113
|
end
|
|
114
114
|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: chicagowarehouse
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Roland Swingler
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2014-
|
|
11
|
+
date: 2014-09-01 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: sequel
|
|
@@ -30,14 +30,14 @@ dependencies:
|
|
|
30
30
|
requirements:
|
|
31
31
|
- - ! '>='
|
|
32
32
|
- !ruby/object:Gem::Version
|
|
33
|
-
version: 0.
|
|
33
|
+
version: 0.4.0
|
|
34
34
|
type: :runtime
|
|
35
35
|
prerelease: false
|
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
37
|
requirements:
|
|
38
38
|
- - ! '>='
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
|
-
version: 0.
|
|
40
|
+
version: 0.4.0
|
|
41
41
|
- !ruby/object:Gem::Dependency
|
|
42
42
|
name: chronic
|
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -122,20 +122,6 @@ dependencies:
|
|
|
122
122
|
- - ! '>='
|
|
123
123
|
- !ruby/object:Gem::Version
|
|
124
124
|
version: '0'
|
|
125
|
-
- !ruby/object:Gem::Dependency
|
|
126
|
-
name: rcov
|
|
127
|
-
requirement: !ruby/object:Gem::Requirement
|
|
128
|
-
requirements:
|
|
129
|
-
- - ! '>='
|
|
130
|
-
- !ruby/object:Gem::Version
|
|
131
|
-
version: '0'
|
|
132
|
-
type: :development
|
|
133
|
-
prerelease: false
|
|
134
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
135
|
-
requirements:
|
|
136
|
-
- - ! '>='
|
|
137
|
-
- !ruby/object:Gem::Version
|
|
138
|
-
version: '0'
|
|
139
125
|
- !ruby/object:Gem::Dependency
|
|
140
126
|
name: simplecov
|
|
141
127
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -197,12 +183,13 @@ files:
|
|
|
197
183
|
- lib/chicago/core_ext/hash.rb
|
|
198
184
|
- lib/chicago/core_ext/sequel/dataset.rb
|
|
199
185
|
- lib/chicago/data/month.rb
|
|
186
|
+
- lib/chicago/database/concrete_schema_strategies.rb
|
|
200
187
|
- lib/chicago/database/constants.rb
|
|
201
188
|
- lib/chicago/database/dataset_builder.rb
|
|
202
189
|
- lib/chicago/database/filter.rb
|
|
190
|
+
- lib/chicago/database/index_generator.rb
|
|
203
191
|
- lib/chicago/database/migration_file_writer.rb
|
|
204
192
|
- lib/chicago/database/schema_generator.rb
|
|
205
|
-
- lib/chicago/database/type_converters.rb
|
|
206
193
|
- lib/chicago/database/value_parser.rb
|
|
207
194
|
- lib/chicago/errors.rb
|
|
208
195
|
- lib/chicago/query.rb
|
|
@@ -224,7 +211,7 @@ files:
|
|
|
224
211
|
- lib/chicago/schema/table.rb
|
|
225
212
|
- lib/chicago/star_schema.rb
|
|
226
213
|
- spec/data/month_spec.rb
|
|
227
|
-
- spec/database/
|
|
214
|
+
- spec/database/concrete_schema_strategies.rb
|
|
228
215
|
- spec/database/migration_file_writer_spec.rb
|
|
229
216
|
- spec/database/schema_generator_spec.rb
|
|
230
217
|
- spec/db_connections.yml.dist
|
|
@@ -271,4 +258,3 @@ signing_key:
|
|
|
271
258
|
specification_version: 4
|
|
272
259
|
summary: Ruby Data Warehousing
|
|
273
260
|
test_files: []
|
|
274
|
-
has_rdoc:
|
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
module Chicago
|
|
2
|
-
module Database
|
|
3
|
-
module TypeConverters
|
|
4
|
-
# Generic type conversion strategy.
|
|
5
|
-
#
|
|
6
|
-
# This supplements Sequel's type conversion strategy rather than
|
|
7
|
-
# replaces it, so +:boolean+ will still return +:boolean+ rather
|
|
8
|
-
# than +tinyint(1)+ in the case of mysql.
|
|
9
|
-
class DbTypeConverter
|
|
10
|
-
# Factory method that returns an appropriate type conversion
|
|
11
|
-
# stratgey for the given database.
|
|
12
|
-
#
|
|
13
|
-
# If a database-specific strategy cannot be found, returns a
|
|
14
|
-
# generic strategy.
|
|
15
|
-
#
|
|
16
|
-
# @return [DbTypeConverter]
|
|
17
|
-
def self.for_db(db)
|
|
18
|
-
return MysqlTypeConverter.new if db.database_type == :mysql
|
|
19
|
-
self.new
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
# Returns a db type given a column definition
|
|
23
|
-
#
|
|
24
|
-
# @return [Symbol]
|
|
25
|
-
def db_type(column)
|
|
26
|
-
case column.column_type
|
|
27
|
-
when :integer then integer_type(column.min, column.max)
|
|
28
|
-
when :string then string_type(column.min, column.max)
|
|
29
|
-
when :money then :decimal
|
|
30
|
-
when :percent then :decimal
|
|
31
|
-
else
|
|
32
|
-
column.column_type
|
|
33
|
-
end
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
# Returns sequel table options for a dimension or fact table.
|
|
37
|
-
#
|
|
38
|
-
# None by default, but database-specific subclasses may
|
|
39
|
-
# override this.
|
|
40
|
-
#
|
|
41
|
-
# @return [Hash]
|
|
42
|
-
def table_options
|
|
43
|
-
{}
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
# Returns a database type for a string column.
|
|
47
|
-
#
|
|
48
|
-
# @return [Symbol]
|
|
49
|
-
def string_type(min, max)
|
|
50
|
-
min && max && min == max ? :char : :varchar
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
# Returns a database integer column type, big enough to fit
|
|
54
|
-
# values between min and max, or integer if a specific type
|
|
55
|
-
# cannot be found.
|
|
56
|
-
#
|
|
57
|
-
# @return [Symbol]
|
|
58
|
-
# @raise an ArgumentError if min or max is too large for a
|
|
59
|
-
# single database column.
|
|
60
|
-
def integer_type(min, max)
|
|
61
|
-
signed_limit = (SMALL_INT_MAX + 1) / 2
|
|
62
|
-
if min && max && ((min >= -signed_limit && max <= signed_limit - 1) || (min >= 0 && max <= SMALL_INT_MAX))
|
|
63
|
-
:smallint
|
|
64
|
-
else
|
|
65
|
-
:integer
|
|
66
|
-
end
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
# MySql-specific type conversion strategy
|
|
71
|
-
class MysqlTypeConverter < DbTypeConverter
|
|
72
|
-
def db_type(column)
|
|
73
|
-
return :enum if column.elements && column.elements.size < 65_536
|
|
74
|
-
super(column)
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
# Returns table options for a dimension or fact table.
|
|
78
|
-
#
|
|
79
|
-
# Dimension tables are defined as MyISAM tables in MySQL.
|
|
80
|
-
def table_options
|
|
81
|
-
{:engine => "myisam"}
|
|
82
|
-
end
|
|
83
|
-
|
|
84
|
-
def integer_type(min, max)
|
|
85
|
-
return :integer unless min && max
|
|
86
|
-
|
|
87
|
-
case
|
|
88
|
-
when in_numeric_range?(min, max, TINY_INT_MAX) then :tinyint
|
|
89
|
-
when in_numeric_range?(min, max, SMALL_INT_MAX) then :smallint
|
|
90
|
-
when in_numeric_range?(min, max, MEDIUM_INT_MAX) then :mediumint
|
|
91
|
-
when in_numeric_range?(min, max, INT_MAX) then :integer
|
|
92
|
-
when in_numeric_range?(min, max, BIG_INT_MAX) then :bigint
|
|
93
|
-
else
|
|
94
|
-
raise ArgumentError.new("#{min} is too small or #{max} is too large for a single column")
|
|
95
|
-
end
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
private
|
|
99
|
-
|
|
100
|
-
def in_numeric_range?(min, max, unsigned_limit)
|
|
101
|
-
signed_limit = (unsigned_limit + 1) / 2
|
|
102
|
-
(min >= -signed_limit && max <= signed_limit - 1) || (min >= 0 && max <= unsigned_limit)
|
|
103
|
-
end
|
|
104
|
-
end
|
|
105
|
-
end
|
|
106
|
-
end
|
|
107
|
-
end
|