chicago-etl 0.0.12 → 0.0.13
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/chicago-etl.gemspec +2 -2
- data/lib/chicago/etl/key_builder.rb +24 -0
- data/spec/etl/key_builder_spec.rb +37 -0
- data/spec/etl/sequel/load_data_infile_expression_spec.rb +6 -2
- metadata +4 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.13
|
data/chicago-etl.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "chicago-etl"
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.13"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Roland Swingler"]
|
12
|
-
s.date = "2013-
|
12
|
+
s.date = "2013-04-16"
|
13
13
|
s.description = "ETL tools for Chicago"
|
14
14
|
s.email = "roland.swingler@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -29,6 +29,8 @@ module Chicago
|
|
29
29
|
|
30
30
|
if table.identifiable?
|
31
31
|
IdentifiableDimensionKeyBuilder.new(key_table, key_sink)
|
32
|
+
elsif existing_hash_column?(table)
|
33
|
+
ExistingHashColumnKeyBuilder.new(key_table, key_sink)
|
32
34
|
else
|
33
35
|
HashingKeyBuilder.new(key_table, key_sink, columns_to_hash)
|
34
36
|
end
|
@@ -39,6 +41,10 @@ module Chicago
|
|
39
41
|
|
40
42
|
private
|
41
43
|
|
44
|
+
def existing_hash_column?(table)
|
45
|
+
table.columns.any? {|c| c.binary? && c.name == :hash && c.unique? }
|
46
|
+
end
|
47
|
+
|
42
48
|
def dimension?
|
43
49
|
table.kind_of?(Chicago::Schema::Dimension)
|
44
50
|
end
|
@@ -135,6 +141,24 @@ module Chicago
|
|
135
141
|
end
|
136
142
|
end
|
137
143
|
|
144
|
+
# Key builder for dimensions with a single unique hash column
|
145
|
+
# already present.
|
146
|
+
#
|
147
|
+
# @api private
|
148
|
+
class ExistingHashColumnKeyBuilder < KeyBuilder
|
149
|
+
def original_key(row)
|
150
|
+
row[:hash].upcase
|
151
|
+
end
|
152
|
+
|
153
|
+
def key_for_insert(original_id)
|
154
|
+
("0x" + original_id).lit
|
155
|
+
end
|
156
|
+
|
157
|
+
def original_key_select_fragment
|
158
|
+
:hex.sql_function(:original_id).as(:original_id)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
138
162
|
# Key builder for dimensions with natuaral keys, but no simple
|
139
163
|
# key.
|
140
164
|
#
|
@@ -22,6 +22,14 @@ describe Chicago::ETL::KeyBuilder do
|
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
|
+
@schema.define_dimension(:with_hash) do
|
26
|
+
columns do
|
27
|
+
binary :hash, :unique => true
|
28
|
+
end
|
29
|
+
|
30
|
+
natural_key :hash
|
31
|
+
end
|
32
|
+
|
25
33
|
@schema.define_fact(:addresses) do
|
26
34
|
dimensions :user, :address
|
27
35
|
natural_key :user, :address
|
@@ -114,6 +122,35 @@ describe Chicago::ETL::KeyBuilder do
|
|
114
122
|
end
|
115
123
|
end
|
116
124
|
|
125
|
+
describe "for non-identifiable dimensions with an existing hash" do
|
126
|
+
before :each do
|
127
|
+
@builder = described_class.
|
128
|
+
for_table(@schema.dimension(:with_hash), @db)
|
129
|
+
end
|
130
|
+
|
131
|
+
it "returns an incrementing key, given a row" do
|
132
|
+
@builder.key(:hash => "aaa").should == 1
|
133
|
+
@builder.key(:hash => "aab").should == 2
|
134
|
+
end
|
135
|
+
|
136
|
+
it "returns the same incrementing key" do
|
137
|
+
@builder.key(:hash => "aaa").should == 1
|
138
|
+
@builder.key(:hash => "aaa").should == 1
|
139
|
+
end
|
140
|
+
|
141
|
+
it "returns the same incrementing key, ignoring case" do
|
142
|
+
@builder.key(:hash => "aaa").should == 1
|
143
|
+
@builder.key(:hash => "AAA").should == 1
|
144
|
+
end
|
145
|
+
|
146
|
+
it "inserts the hash as a binary literal" do
|
147
|
+
# Yuck. Don't like the implementation test, but mock
|
148
|
+
# expectations fail here for some reason, maybe because of the
|
149
|
+
# Sequel::LiteralString?
|
150
|
+
@builder.key_for_insert(@builder.original_key(:hash => "aaa")).should == "0xAAA".lit
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
117
154
|
describe "for non-identifiable dimensions with natural keys" do
|
118
155
|
before :each do
|
119
156
|
@builder = described_class.for_table(@schema.dimension(:address), @db)
|
@@ -48,9 +48,13 @@ describe Chicago::ETL::SequelExtensions::LoadDataInfileExpression do
|
|
48
48
|
end
|
49
49
|
|
50
50
|
it "can set column values" do
|
51
|
-
described_class.new("bar.csv", :foo, ['@bar', 'quux'],
|
51
|
+
sql = described_class.new("bar.csv", :foo, ['@bar', 'quux'],
|
52
52
|
:set => {:bar => :unhex.sql_function("@bar".lit),
|
53
53
|
:etl_batch_id => 3}).
|
54
|
-
to_sql(TEST_DB)
|
54
|
+
to_sql(TEST_DB)
|
55
|
+
|
56
|
+
sql.should include("SET")
|
57
|
+
sql.should include("`etl_batch_id` = 3")
|
58
|
+
sql.should include("`bar` = unhex(@bar)")
|
55
59
|
end
|
56
60
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chicago-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 5
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 13
|
10
|
+
version: 0.0.13
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Roland Swingler
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2013-
|
18
|
+
date: 2013-04-16 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
version_requirements: &id001 !ruby/object:Gem::Requirement
|