chicagowarehouse 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/.document +5 -0
  2. data/.rspec +1 -0
  3. data/Gemfile +18 -0
  4. data/LICENSE +20 -0
  5. data/README +11 -0
  6. data/Rakefile +50 -0
  7. data/chicagowarehouse.gemspec +134 -0
  8. data/lib/chicago.rb +32 -0
  9. data/lib/chicago/core_ext/hash.rb +18 -0
  10. data/lib/chicago/core_ext/sequel/dataset.rb +7 -0
  11. data/lib/chicago/core_ext/sequel/sql.rb +62 -0
  12. data/lib/chicago/data/month.rb +98 -0
  13. data/lib/chicago/database/constants.rb +18 -0
  14. data/lib/chicago/database/dataset_builder.rb +75 -0
  15. data/lib/chicago/database/filter.rb +109 -0
  16. data/lib/chicago/database/migration_file_writer.rb +34 -0
  17. data/lib/chicago/database/schema_generator.rb +117 -0
  18. data/lib/chicago/database/type_converters.rb +107 -0
  19. data/lib/chicago/database/value_parser.rb +23 -0
  20. data/lib/chicago/errors.rb +23 -0
  21. data/lib/chicago/query.rb +109 -0
  22. data/lib/chicago/rake_tasks.rb +50 -0
  23. data/lib/chicago/schema/builders/column_builder.rb +21 -0
  24. data/lib/chicago/schema/builders/dimension_builder.rb +69 -0
  25. data/lib/chicago/schema/builders/fact_builder.rb +74 -0
  26. data/lib/chicago/schema/builders/shrunken_dimension_builder.rb +54 -0
  27. data/lib/chicago/schema/builders/table_builder.rb +33 -0
  28. data/lib/chicago/schema/column.rb +221 -0
  29. data/lib/chicago/schema/column_parser.rb +127 -0
  30. data/lib/chicago/schema/dimension.rb +129 -0
  31. data/lib/chicago/schema/dimension_reference.rb +47 -0
  32. data/lib/chicago/schema/fact.rb +70 -0
  33. data/lib/chicago/schema/measure.rb +35 -0
  34. data/lib/chicago/schema/named_element.rb +16 -0
  35. data/lib/chicago/schema/named_element_collection.rb +64 -0
  36. data/lib/chicago/schema/query_column.rb +199 -0
  37. data/lib/chicago/schema/table.rb +41 -0
  38. data/lib/chicago/star_schema.rb +127 -0
  39. data/spec/core_ext/sequel_extensions_spec.rb +29 -0
  40. data/spec/data/month_spec.rb +67 -0
  41. data/spec/database/db_type_converter_spec.rb +125 -0
  42. data/spec/database/migration_file_writer_spec.rb +37 -0
  43. data/spec/database/schema_generator_spec.rb +199 -0
  44. data/spec/db_connections.yml.dist +4 -0
  45. data/spec/query_spec.rb +495 -0
  46. data/spec/schema/column_spec.rb +213 -0
  47. data/spec/schema/dimension_builder_spec.rb +32 -0
  48. data/spec/schema/dimension_reference_spec.rb +90 -0
  49. data/spec/schema/dimension_spec.rb +111 -0
  50. data/spec/schema/fact_spec.rb +83 -0
  51. data/spec/schema/measure_spec.rb +27 -0
  52. data/spec/schema/named_element_collection_spec.rb +67 -0
  53. data/spec/schema/pivoted_column_spec.rb +17 -0
  54. data/spec/schema/query_column_spec.rb +120 -0
  55. data/spec/spec_helper.rb +20 -0
  56. data/spec/star_schema_spec.rb +219 -0
  57. data/spec/support/matchers/be_one_of.rb +11 -0
  58. data/spec/support/matchers/column_matchers.rb +11 -0
  59. data/spec/support/shared_examples/column.rb +13 -0
  60. data/spec/support/shared_examples/schema_table.rb +17 -0
  61. data/spec/support/shared_examples/schema_visitor.rb +25 -0
  62. data/tasks/stats.rake +108 -0
  63. metadata +300 -0
@@ -0,0 +1,83 @@
1
+ require 'spec_helper'
2
+
3
+ describe Chicago::Schema::Fact do
4
+ it_behaves_like "a named schema element"
5
+
6
+ it "has a table name" do
7
+ described_class.new("foo").table_name.should == :facts_foo
8
+ end
9
+
10
+ it "has no dimensions by default" do
11
+ described_class.new("foo").dimensions.should be_empty
12
+ end
13
+
14
+ it "has no degenerate dimensions by default" do
15
+ described_class.new("foo").degenerate_dimensions.should be_empty
16
+ end
17
+
18
+ it "has no measures by default" do
19
+ described_class.new("foo").degenerate_dimensions.should be_empty
20
+ end
21
+
22
+ it "can have a description" do
23
+ described_class.new(:foo, :description => "bar").description.should == "bar"
24
+ end
25
+
26
+ it "can have dimensions" do
27
+ dimension = stub(:dimension)
28
+ described_class.new("foo", :dimensions => [dimension]).
29
+ dimensions.should == [dimension]
30
+ end
31
+
32
+ it "has degenerate dimensions" do
33
+ column = stub(:column)
34
+ described_class.new("foo", :degenerate_dimensions => [column]).
35
+ degenerate_dimensions.should == [column]
36
+ end
37
+
38
+ it "has measures" do
39
+ column = stub(:column)
40
+ described_class.new("foo", :measures => [column]).
41
+ measures.should == [column]
42
+ end
43
+
44
+ it "has columns defined as dimensions, degenerate dimensions & measures" do
45
+
46
+ dimension = stub(:dimension)
47
+ column = stub(:column)
48
+ column_2 = stub(:column_2)
49
+ fact = described_class.new("foo",
50
+ :dimensions => [dimension],
51
+ :degenerate_dimensions => [column],
52
+ :measures => [column_2])
53
+ fact.columns.should == [dimension, column, column_2]
54
+ end
55
+
56
+ it "can qualify a column" do
57
+ described_class.new(:foo).qualify(stub(:column, :name => :bar)).
58
+ should == :bar.qualify(:facts_foo)
59
+ end
60
+
61
+ it "provides a hash-like accessor syntax for columns" do
62
+ measure = stub(:column, :name => :bar)
63
+ fact = described_class.new(:foo, :measures => [measure])
64
+ fact[:bar].should == measure
65
+ end
66
+
67
+ it "is factless if it has no measures" do
68
+ described_class.new(:foo, :measures => [stub()]).should_not be_factless
69
+ described_class.new(:foo).should be_factless
70
+ end
71
+
72
+ it "can define a natural key" do
73
+ described_class.new(:f, :natural_key => [:foo, :bar]).
74
+ natural_key.should == [:foo, :bar]
75
+ end
76
+
77
+ it "is visitable" do
78
+ visitor = mock(:visitor)
79
+ fact = described_class.new(:foo)
80
+ visitor.should_receive(:visit_fact).with(fact)
81
+ fact.visit(visitor)
82
+ end
83
+ end
@@ -0,0 +1,27 @@
1
+ require 'spec_helper'
2
+
3
+ describe Chicago::Schema::Measure do
4
+ subject { described_class.new(:user_name, :string) }
5
+
6
+ it_behaves_like "a column"
7
+
8
+ it "should not be semi_additive by default" do
9
+ described_class.new(:rate, :integer).should_not be_semi_additive
10
+ end
11
+
12
+ it "can be defined as semi_additive" do
13
+ described_class.new(:rate, :integer, :semi_additive => true).
14
+ should be_semi_additive
15
+ end
16
+
17
+ it "is not indexed" do
18
+ described_class.new(:rate, :integer).should_not be_indexed
19
+ end
20
+
21
+ it "is visitable" do
22
+ visitor = mock(:visitor)
23
+ measure = described_class.new(:foo, :integer)
24
+ visitor.should_receive(:visit_measure).with(measure)
25
+ measure.visit(visitor)
26
+ end
27
+ end
@@ -0,0 +1,67 @@
1
+ require 'spec_helper'
2
+
3
+ describe Chicago::Schema::NamedElementCollection do
4
+ before :each do
5
+ @e = stub(:element, :name => :foo)
6
+ end
7
+
8
+ it "supports adding an element via add" do
9
+ subject.add(@e)
10
+ subject.to_a.should == [@e]
11
+ end
12
+
13
+ it "supports adding an element via <<" do
14
+ subject << @e
15
+ subject.to_a.should == [@e]
16
+ end
17
+
18
+ it "returns the element just added" do
19
+ subject.add(@e).should == @e
20
+ end
21
+
22
+ it "can be iterated over" do
23
+ subject.add(@e)
24
+ subject.each {|element| element.should == @e }
25
+ end
26
+
27
+ it "is enumerable" do
28
+ subject.should be_kind_of(Enumerable)
29
+ end
30
+
31
+ it "supports access by name" do
32
+ subject.add @e
33
+ subject[:foo].should == @e
34
+ end
35
+
36
+ it "returns true from contains? if the collection contains the same-named element" do
37
+ subject.add @e
38
+ subject.contain?(stub(:element, :name => :foo)).should be_true
39
+ subject.contain?(stub(:element, :name => :bar)).should be_false
40
+ end
41
+
42
+ it "returns the number of elements in a collection from size" do
43
+ subject.add @e
44
+ subject.size.should == 1
45
+ end
46
+
47
+ it "returns the number of elements in a collection from length" do
48
+ subject.add @e
49
+ subject.length.should == 1
50
+ end
51
+
52
+ it "responds to empty?" do
53
+ subject.should be_empty
54
+ subject.add @e
55
+ subject.should_not be_empty
56
+ end
57
+
58
+ it "can be constructed with a list of elements" do
59
+ described_class.new(@e).contain?(@e).should be_true
60
+ end
61
+
62
+ it "has elements that are unique by name" do
63
+ subject.add(@e)
64
+ subject.add(stub(:element, :name => :foo))
65
+ subject.size.should == 1
66
+ end
67
+ end
@@ -0,0 +1,17 @@
1
+ require 'spec_helper'
2
+
3
+ describe Chicago::Schema::PivotedColumn do
4
+ before :each do
5
+ @column = stub(:column)
6
+ @pivot = stub(:pivot)
7
+ end
8
+
9
+ it "has a nil group name" do
10
+ described_class.new(@column, @pivot, 0, true).group_name.should be_nil
11
+ end
12
+
13
+ it "has a pair of labels as the label" do
14
+ @column.stub(:label => :foo)
15
+ described_class.new(@column, @pivot, 0, true).label.should == [:foo, true]
16
+ end
17
+ end
@@ -0,0 +1,120 @@
1
+ require 'spec_helper'
2
+ require 'chicago/schema/query_column'
3
+
4
+ describe Chicago::Schema::QueryColumn do
5
+ describe "a standard column" do
6
+ let(:owner) { stub(:owner).as_null_object }
7
+ let(:column) { stub(:column).as_null_object }
8
+ subject { described_class.column(owner, column, "foo.bar") }
9
+
10
+ it "should have a column alias" do
11
+ subject.column_alias.should == "foo.bar"
12
+ end
13
+
14
+ it "has an owner" do
15
+ subject.owner.should == owner
16
+ end
17
+
18
+ it "has a sequel qualified name for use in SELECT statements" do
19
+ owner.stub(:name).and_return(:foo)
20
+ column.stub(:name).and_return(:bar)
21
+ subject.select_name.should == :bar.qualify(:foo)
22
+ end
23
+
24
+ it "has a sequel qualified name for use in COUNT" do
25
+ owner.stub(:name).and_return(:foo)
26
+ column.stub(:name).and_return(:bar)
27
+ subject.count_name.should == :bar.qualify(:foo)
28
+ end
29
+
30
+ it "uses the alias in GROUP BY" do
31
+ subject.group_name.should == :"foo.bar"
32
+ end
33
+
34
+ it "delegates label to the decorated column" do
35
+ column.should_receive(:label).and_return("Bar")
36
+ subject.label.should == "Bar"
37
+ end
38
+ end
39
+
40
+ describe "a dimension column" do
41
+ let(:owner) { stub(:owner).as_null_object }
42
+ let(:column) { stub(:column).as_null_object }
43
+ subject { described_class.column(owner, column, "foo.bar") }
44
+
45
+ before :each do
46
+ column.stub(:main_identifier).and_return(:name)
47
+ column.stub(:original_key).and_return(stub(:name => :original_id))
48
+ column.stub(:kind_of?).with(Chicago::Schema::Dimension).and_return(true)
49
+ end
50
+
51
+ it "should have a column alias" do
52
+ subject.column_alias.should == "foo.bar"
53
+ end
54
+
55
+ it "has an owner" do
56
+ subject.owner.should == owner
57
+ end
58
+
59
+ it "uses the main identifier in SELECT statements" do
60
+ column.stub(:name).and_return(:bar)
61
+ subject.select_name.should == :name.qualify(:bar)
62
+ end
63
+
64
+ it "uses the original id in COUNT" do
65
+ column.stub(:name).and_return(:bar)
66
+ subject.count_name.should == :original_id.qualify(:bar)
67
+ end
68
+
69
+ it "uses the original id in GROUP BY" do
70
+ column.stub(:name).and_return(:bar)
71
+ subject.group_name.should == :original_id.qualify(:bar)
72
+ end
73
+
74
+ it "delegates label to the decorated column" do
75
+ column.should_receive(:label).and_return("Bar")
76
+ subject.label.should == "Bar"
77
+ end
78
+ end
79
+
80
+ describe "a dimension identifier column" do
81
+ let(:owner) { stub(:owner).as_null_object }
82
+ let(:column) { stub(:column).as_null_object }
83
+ subject { described_class.column(owner, column, "foo.bar") }
84
+
85
+ before :each do
86
+ column.stub(:name).and_return(:bar)
87
+
88
+ owner.stub(:name).and_return(:foo)
89
+ owner.stub(:original_key).and_return(stub(:name => :original_id))
90
+ owner.stub(:kind_of?).with(Chicago::Schema::Dimension).and_return(true)
91
+ owner.stub(:identifiable?).and_return(true)
92
+ owner.stub(:identifiers).and_return([:bar])
93
+ end
94
+
95
+ it "should have a column alias" do
96
+ subject.column_alias.should == "foo.bar"
97
+ end
98
+
99
+ it "has an owner" do
100
+ subject.owner.should == owner
101
+ end
102
+
103
+ it "uses the name in SELECT statements" do
104
+ subject.select_name.should == :bar.qualify(:foo)
105
+ end
106
+
107
+ it "uses the original id in COUNT" do
108
+ subject.count_name.should == :original_id.qualify(:foo)
109
+ end
110
+
111
+ it "uses the original id in GROUP BY" do
112
+ subject.group_name.should == :original_id.qualify(:foo)
113
+ end
114
+
115
+ it "delegates label to the decorated column" do
116
+ column.should_receive(:label).and_return("Bar")
117
+ subject.label.should == "Bar"
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,20 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'chicago'
4
+ require 'rspec'
5
+ require 'yaml'
6
+ require 'rspec/autorun'
7
+ require 'timecop'
8
+
9
+ include Chicago
10
+
11
+ unless defined? TEST_DB
12
+ TEST_DB = Sequel.connect(YAML.load(File.read(File.dirname(__FILE__) + "/db_connections.yml")))
13
+ end
14
+
15
+ # Requires supporting files with custom matchers and macros, etc,
16
+ # in ./support/ and its subdirectories.
17
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
18
+
19
+ RSpec.configure do |config|
20
+ end
@@ -0,0 +1,219 @@
1
+ require 'spec_helper'
2
+
3
+ describe Chicago::StarSchema do
4
+ before :each do
5
+ @schema = Chicago::StarSchema.new
6
+ end
7
+
8
+ describe "dimensions" do
9
+ specify "are not defined initially" do
10
+ @schema.dimensions.should be_empty
11
+ end
12
+
13
+ specify "can be defined" do
14
+ @schema.define_dimension(:user).should be_kind_of(Chicago::Schema::Dimension)
15
+ @schema.dimensions.should_not be_empty
16
+ end
17
+
18
+ specify "are unique by name within a schema" do
19
+ @schema.define_dimension(:user)
20
+ expect { @schema.define_dimension(:user) }.
21
+ to raise_error(Chicago::DuplicateTableError)
22
+ end
23
+
24
+ specify "can be defined with columns" do
25
+ dimension = @schema.define_dimension(:user) do
26
+ columns { string :email }
27
+ end
28
+ dimension.should have_column_named(:email)
29
+ end
30
+
31
+ specify "can define columns in multiple blocks" do
32
+ dimension = @schema.define_dimension(:user) do
33
+ columns { string :email }
34
+ columns { string :name }
35
+ end
36
+ dimension.should have_column_named(:name)
37
+ end
38
+
39
+ specify "can have a natural key defined" do
40
+ dimension = @schema.define_dimension(:user) do
41
+ columns { string :email }
42
+
43
+ natural_key :email
44
+ end
45
+
46
+ dimension.natural_key.should == [:email]
47
+ end
48
+
49
+ specify "can have a description defined" do
50
+ dimension = @schema.define_dimension(:user) do
51
+ description "Hello"
52
+ end
53
+ dimension.description.should == "Hello"
54
+ end
55
+
56
+ specify "can have null records defined" do
57
+ dimension = @schema.define_dimension(:user) do
58
+ columns { string :email }
59
+
60
+ null_record :id => 1, :email => "Missing"
61
+ null_record :id => 2, :email => "Not Applicable"
62
+ end
63
+
64
+ db = stub(:db, :table_exists? => true)
65
+ db.stub_chain(:[], :insert_replace).and_return(db)
66
+ db.should_receive(:insert_multiple).with([{:id => 1, :email => "Missing"},
67
+ {:id => 2, :email => "Not Applicable"}])
68
+ db.should_receive(:insert_multiple).with([{:dimension_id => 1},
69
+ {:dimension_id => 2}])
70
+ dimension.create_null_records(db)
71
+ end
72
+
73
+ specify "can be prebuilt and attached" do
74
+ d = Chicago::Schema::Dimension.new(:foo)
75
+ @schema.add(d)
76
+ @schema.dimensions.should include(d)
77
+ end
78
+ end
79
+
80
+ describe "shrunken dimensions" do
81
+ specify "can be defined" do
82
+ @schema.define_dimension(:date) do
83
+ columns do
84
+ integer :year
85
+ integer :month
86
+ integer :day
87
+ end
88
+ end
89
+
90
+ @schema.define_shrunken_dimension(:month, :date) do
91
+ columns :year, :month
92
+ end.columns.map(&:name).should == [:year, :month]
93
+ end
94
+
95
+ specify "must have a subset of columns from the base dimension" do
96
+ @schema.define_dimension(:date) do
97
+ columns { integer :year }
98
+ end
99
+
100
+ expect {
101
+ @schema.define_shrunken_dimension(:month, :date) do
102
+ columns :year, :month
103
+ end
104
+ }.to raise_error(Chicago::MissingDefinitionError)
105
+ end
106
+
107
+ specify "raises an error if the base dimension is not defined" do
108
+ expect {
109
+ @schema.define_shrunken_dimension(:month, :date)
110
+ }.to raise_error(Chicago::MissingDefinitionError)
111
+ end
112
+ end
113
+
114
+ describe "facts" do
115
+ specify "are not defined initially" do
116
+ @schema.facts.should be_empty
117
+ end
118
+
119
+ specify "can be defined" do
120
+ @schema.define_fact(:user).should be_kind_of(Chicago::Schema::Fact)
121
+ @schema.facts.should_not be_empty
122
+ end
123
+
124
+ specify "are unique by name within a schema" do
125
+ @schema.define_fact(:user)
126
+ expect { @schema.define_fact(:user) }.
127
+ to raise_error(Chicago::DuplicateTableError)
128
+ end
129
+
130
+ specify "can be prebuilt and attached" do
131
+ f = Chicago::Schema::Fact.new(:foo)
132
+ @schema.add(f)
133
+ @schema.facts.should include(f)
134
+ end
135
+
136
+ specify "can be prebuilt and attached, but still must have unique names" do
137
+ f = Chicago::Schema::Fact.new(:foo)
138
+ f2 = Chicago::Schema::Fact.new(:foo)
139
+ @schema.add(f)
140
+ expect { @schema.add(f2) }.to raise_error(Chicago::DuplicateTableError)
141
+ end
142
+
143
+ specify "can have dimensions defined" do
144
+ dim = @schema.define_dimension(:date)
145
+ fact = @schema.define_fact(:foo) do
146
+ dimensions :date
147
+ end
148
+ fact.dimensions.map(&:name).should include(:date)
149
+ end
150
+
151
+ specify "raises an error if unspecified dimension is referenced" do
152
+ expect { @schema.define_fact(:foo) { dimensions :date } }.
153
+ to raise_error(Chicago::MissingDefinitionError)
154
+ end
155
+
156
+ specify "can have roleplayed dimensions defined, using Sequel's aliasing" do
157
+ dim = @schema.define_dimension(:date)
158
+ fact = @schema.define_fact(:foo) do
159
+ dimensions :date.as(:start_date)
160
+ end
161
+ fact.dimensions.map(&:name).should include(:start_date)
162
+ end
163
+
164
+ specify "can have degenerate dimensions defined" do
165
+ fact = @schema.define_fact(:foo) do
166
+ degenerate_dimensions do
167
+ integer :reference_number
168
+ end
169
+ end
170
+ fact.degenerate_dimensions.map(&:name).should include(:reference_number)
171
+ end
172
+
173
+ specify "can have measures defined" do
174
+ fact = @schema.define_fact(:foo) do
175
+ measures do
176
+ integer :amount
177
+ end
178
+ end
179
+ fact.measures.map(&:name).should include(:amount)
180
+ end
181
+
182
+ specify "allows defined measures to be null by default" do
183
+ fact = @schema.define_fact(:foo) do
184
+ measures { integer :amount }
185
+ end
186
+ fact.measures.first.should be_null
187
+ end
188
+
189
+ specify "can have a description defined" do
190
+ fact = @schema.define_fact(:foo) do
191
+ description "Hello"
192
+ end
193
+ fact.description.should == "Hello"
194
+ end
195
+
196
+ specify "can have a natural key defined" do
197
+ dim = @schema.define_dimension(:date)
198
+ fact = @schema.define_fact(:foo) do
199
+ dimensions :date
200
+
201
+ natural_key :date
202
+ end
203
+
204
+ fact.natural_key.should == [:date]
205
+ end
206
+ end
207
+
208
+ it "allows definition of a fact and a dimension with the same name" do
209
+ @schema.define_fact(:user)
210
+ expect { @schema.define_dimension(:user) }.
211
+ to_not raise_error(Chicago::DuplicateTableError)
212
+ end
213
+
214
+ it "returns all dimensions and facts from #tables" do
215
+ @schema.define_fact(:fact)
216
+ @schema.define_dimension(:dimension)
217
+ @schema.tables.map(&:name).should == [:dimension, :fact]
218
+ end
219
+ end