chicago-etl 0.0.12 → 0.0.13

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.12
1
+ 0.0.13
data/chicago-etl.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "chicago-etl"
8
- s.version = "0.0.12"
8
+ s.version = "0.0.13"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Roland Swingler"]
12
- s.date = "2013-02-26"
12
+ s.date = "2013-04-16"
13
13
  s.description = "ETL tools for Chicago"
14
14
  s.email = "roland.swingler@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -29,6 +29,8 @@ module Chicago
29
29
 
30
30
  if table.identifiable?
31
31
  IdentifiableDimensionKeyBuilder.new(key_table, key_sink)
32
+ elsif existing_hash_column?(table)
33
+ ExistingHashColumnKeyBuilder.new(key_table, key_sink)
32
34
  else
33
35
  HashingKeyBuilder.new(key_table, key_sink, columns_to_hash)
34
36
  end
@@ -39,6 +41,10 @@ module Chicago
39
41
 
40
42
  private
41
43
 
44
+ def existing_hash_column?(table)
45
+ table.columns.any? {|c| c.binary? && c.name == :hash && c.unique? }
46
+ end
47
+
42
48
  def dimension?
43
49
  table.kind_of?(Chicago::Schema::Dimension)
44
50
  end
@@ -135,6 +141,24 @@ module Chicago
135
141
  end
136
142
  end
137
143
 
144
+ # Key builder for dimensions with a single unique hash column
145
+ # already present.
146
+ #
147
+ # @api private
148
+ class ExistingHashColumnKeyBuilder < KeyBuilder
149
+ def original_key(row)
150
+ row[:hash].upcase
151
+ end
152
+
153
+ def key_for_insert(original_id)
154
+ ("0x" + original_id).lit
155
+ end
156
+
157
+ def original_key_select_fragment
158
+ :hex.sql_function(:original_id).as(:original_id)
159
+ end
160
+ end
161
+
138
162
  # Key builder for dimensions with natuaral keys, but no simple
139
163
  # key.
140
164
  #
@@ -22,6 +22,14 @@ describe Chicago::ETL::KeyBuilder do
22
22
  end
23
23
  end
24
24
 
25
+ @schema.define_dimension(:with_hash) do
26
+ columns do
27
+ binary :hash, :unique => true
28
+ end
29
+
30
+ natural_key :hash
31
+ end
32
+
25
33
  @schema.define_fact(:addresses) do
26
34
  dimensions :user, :address
27
35
  natural_key :user, :address
@@ -114,6 +122,35 @@ describe Chicago::ETL::KeyBuilder do
114
122
  end
115
123
  end
116
124
 
125
+ describe "for non-identifiable dimensions with an existing hash" do
126
+ before :each do
127
+ @builder = described_class.
128
+ for_table(@schema.dimension(:with_hash), @db)
129
+ end
130
+
131
+ it "returns an incrementing key, given a row" do
132
+ @builder.key(:hash => "aaa").should == 1
133
+ @builder.key(:hash => "aab").should == 2
134
+ end
135
+
136
+ it "returns the same incrementing key" do
137
+ @builder.key(:hash => "aaa").should == 1
138
+ @builder.key(:hash => "aaa").should == 1
139
+ end
140
+
141
+ it "returns the same incrementing key, ignoring case" do
142
+ @builder.key(:hash => "aaa").should == 1
143
+ @builder.key(:hash => "AAA").should == 1
144
+ end
145
+
146
+ it "inserts the hash as a binary literal" do
147
+ # Yuck. Don't like the implementation test, but mock
148
+ # expectations fail here for some reason, maybe because of the
149
+ # Sequel::LiteralString?
150
+ @builder.key_for_insert(@builder.original_key(:hash => "aaa")).should == "0xAAA".lit
151
+ end
152
+ end
153
+
117
154
  describe "for non-identifiable dimensions with natural keys" do
118
155
  before :each do
119
156
  @builder = described_class.for_table(@schema.dimension(:address), @db)
@@ -48,9 +48,13 @@ describe Chicago::ETL::SequelExtensions::LoadDataInfileExpression do
48
48
  end
49
49
 
50
50
  it "can set column values" do
51
- described_class.new("bar.csv", :foo, ['@bar', 'quux'],
51
+ sql = described_class.new("bar.csv", :foo, ['@bar', 'quux'],
52
52
  :set => {:bar => :unhex.sql_function("@bar".lit),
53
53
  :etl_batch_id => 3}).
54
- to_sql(TEST_DB).should include("SET `bar` = unhex(@bar), `etl_batch_id` = 3")
54
+ to_sql(TEST_DB)
55
+
56
+ sql.should include("SET")
57
+ sql.should include("`etl_batch_id` = 3")
58
+ sql.should include("`bar` = unhex(@bar)")
55
59
  end
56
60
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chicago-etl
3
3
  version: !ruby/object:Gem::Version
4
- hash: 7
4
+ hash: 5
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 12
10
- version: 0.0.12
9
+ - 13
10
+ version: 0.0.13
11
11
  platform: ruby
12
12
  authors:
13
13
  - Roland Swingler
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2013-02-26 00:00:00 Z
18
+ date: 2013-04-16 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  version_requirements: &id001 !ruby/object:Gem::Requirement