chicago-etl 0.0.12 → 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/chicago-etl.gemspec +2 -2
- data/lib/chicago/etl/key_builder.rb +24 -0
- data/spec/etl/key_builder_spec.rb +37 -0
- data/spec/etl/sequel/load_data_infile_expression_spec.rb +6 -2
- metadata +4 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.13
|
data/chicago-etl.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "chicago-etl"
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.13"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Roland Swingler"]
|
12
|
-
s.date = "2013-
|
12
|
+
s.date = "2013-04-16"
|
13
13
|
s.description = "ETL tools for Chicago"
|
14
14
|
s.email = "roland.swingler@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -29,6 +29,8 @@ module Chicago
|
|
29
29
|
|
30
30
|
if table.identifiable?
|
31
31
|
IdentifiableDimensionKeyBuilder.new(key_table, key_sink)
|
32
|
+
elsif existing_hash_column?(table)
|
33
|
+
ExistingHashColumnKeyBuilder.new(key_table, key_sink)
|
32
34
|
else
|
33
35
|
HashingKeyBuilder.new(key_table, key_sink, columns_to_hash)
|
34
36
|
end
|
@@ -39,6 +41,10 @@ module Chicago
|
|
39
41
|
|
40
42
|
private
|
41
43
|
|
44
|
+
def existing_hash_column?(table)
|
45
|
+
table.columns.any? {|c| c.binary? && c.name == :hash && c.unique? }
|
46
|
+
end
|
47
|
+
|
42
48
|
def dimension?
|
43
49
|
table.kind_of?(Chicago::Schema::Dimension)
|
44
50
|
end
|
@@ -135,6 +141,24 @@ module Chicago
|
|
135
141
|
end
|
136
142
|
end
|
137
143
|
|
144
|
+
# Key builder for dimensions with a single unique hash column
|
145
|
+
# already present.
|
146
|
+
#
|
147
|
+
# @api private
|
148
|
+
class ExistingHashColumnKeyBuilder < KeyBuilder
|
149
|
+
def original_key(row)
|
150
|
+
row[:hash].upcase
|
151
|
+
end
|
152
|
+
|
153
|
+
def key_for_insert(original_id)
|
154
|
+
("0x" + original_id).lit
|
155
|
+
end
|
156
|
+
|
157
|
+
def original_key_select_fragment
|
158
|
+
:hex.sql_function(:original_id).as(:original_id)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
138
162
|
# Key builder for dimensions with natuaral keys, but no simple
|
139
163
|
# key.
|
140
164
|
#
|
@@ -22,6 +22,14 @@ describe Chicago::ETL::KeyBuilder do
|
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
|
+
@schema.define_dimension(:with_hash) do
|
26
|
+
columns do
|
27
|
+
binary :hash, :unique => true
|
28
|
+
end
|
29
|
+
|
30
|
+
natural_key :hash
|
31
|
+
end
|
32
|
+
|
25
33
|
@schema.define_fact(:addresses) do
|
26
34
|
dimensions :user, :address
|
27
35
|
natural_key :user, :address
|
@@ -114,6 +122,35 @@ describe Chicago::ETL::KeyBuilder do
|
|
114
122
|
end
|
115
123
|
end
|
116
124
|
|
125
|
+
describe "for non-identifiable dimensions with an existing hash" do
|
126
|
+
before :each do
|
127
|
+
@builder = described_class.
|
128
|
+
for_table(@schema.dimension(:with_hash), @db)
|
129
|
+
end
|
130
|
+
|
131
|
+
it "returns an incrementing key, given a row" do
|
132
|
+
@builder.key(:hash => "aaa").should == 1
|
133
|
+
@builder.key(:hash => "aab").should == 2
|
134
|
+
end
|
135
|
+
|
136
|
+
it "returns the same incrementing key" do
|
137
|
+
@builder.key(:hash => "aaa").should == 1
|
138
|
+
@builder.key(:hash => "aaa").should == 1
|
139
|
+
end
|
140
|
+
|
141
|
+
it "returns the same incrementing key, ignoring case" do
|
142
|
+
@builder.key(:hash => "aaa").should == 1
|
143
|
+
@builder.key(:hash => "AAA").should == 1
|
144
|
+
end
|
145
|
+
|
146
|
+
it "inserts the hash as a binary literal" do
|
147
|
+
# Yuck. Don't like the implementation test, but mock
|
148
|
+
# expectations fail here for some reason, maybe because of the
|
149
|
+
# Sequel::LiteralString?
|
150
|
+
@builder.key_for_insert(@builder.original_key(:hash => "aaa")).should == "0xAAA".lit
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
117
154
|
describe "for non-identifiable dimensions with natural keys" do
|
118
155
|
before :each do
|
119
156
|
@builder = described_class.for_table(@schema.dimension(:address), @db)
|
@@ -48,9 +48,13 @@ describe Chicago::ETL::SequelExtensions::LoadDataInfileExpression do
|
|
48
48
|
end
|
49
49
|
|
50
50
|
it "can set column values" do
|
51
|
-
described_class.new("bar.csv", :foo, ['@bar', 'quux'],
|
51
|
+
sql = described_class.new("bar.csv", :foo, ['@bar', 'quux'],
|
52
52
|
:set => {:bar => :unhex.sql_function("@bar".lit),
|
53
53
|
:etl_batch_id => 3}).
|
54
|
-
to_sql(TEST_DB)
|
54
|
+
to_sql(TEST_DB)
|
55
|
+
|
56
|
+
sql.should include("SET")
|
57
|
+
sql.should include("`etl_batch_id` = 3")
|
58
|
+
sql.should include("`bar` = unhex(@bar)")
|
55
59
|
end
|
56
60
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chicago-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 5
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 13
|
10
|
+
version: 0.0.13
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Roland Swingler
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2013-
|
18
|
+
date: 2013-04-16 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
version_requirements: &id001 !ruby/object:Gem::Requirement
|