chicago-etl 0.0.12 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.12
1
+ 0.0.13
data/chicago-etl.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "chicago-etl"
8
- s.version = "0.0.12"
8
+ s.version = "0.0.13"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Roland Swingler"]
12
- s.date = "2013-02-26"
12
+ s.date = "2013-04-16"
13
13
  s.description = "ETL tools for Chicago"
14
14
  s.email = "roland.swingler@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -29,6 +29,8 @@ module Chicago
29
29
 
30
30
  if table.identifiable?
31
31
  IdentifiableDimensionKeyBuilder.new(key_table, key_sink)
32
+ elsif existing_hash_column?(table)
33
+ ExistingHashColumnKeyBuilder.new(key_table, key_sink)
32
34
  else
33
35
  HashingKeyBuilder.new(key_table, key_sink, columns_to_hash)
34
36
  end
@@ -39,6 +41,10 @@ module Chicago
39
41
 
40
42
  private
41
43
 
44
+ def existing_hash_column?(table)
45
+ table.columns.any? {|c| c.binary? && c.name == :hash && c.unique? }
46
+ end
47
+
42
48
  def dimension?
43
49
  table.kind_of?(Chicago::Schema::Dimension)
44
50
  end
@@ -135,6 +141,24 @@ module Chicago
135
141
  end
136
142
  end
137
143
 
144
+ # Key builder for dimensions with a single unique hash column
145
+ # already present.
146
+ #
147
+ # @api private
148
+ class ExistingHashColumnKeyBuilder < KeyBuilder
149
+ def original_key(row)
150
+ row[:hash].upcase
151
+ end
152
+
153
+ def key_for_insert(original_id)
154
+ ("0x" + original_id).lit
155
+ end
156
+
157
+ def original_key_select_fragment
158
+ :hex.sql_function(:original_id).as(:original_id)
159
+ end
160
+ end
161
+
138
162
  # Key builder for dimensions with natuaral keys, but no simple
139
163
  # key.
140
164
  #
@@ -22,6 +22,14 @@ describe Chicago::ETL::KeyBuilder do
22
22
  end
23
23
  end
24
24
 
25
+ @schema.define_dimension(:with_hash) do
26
+ columns do
27
+ binary :hash, :unique => true
28
+ end
29
+
30
+ natural_key :hash
31
+ end
32
+
25
33
  @schema.define_fact(:addresses) do
26
34
  dimensions :user, :address
27
35
  natural_key :user, :address
@@ -114,6 +122,35 @@ describe Chicago::ETL::KeyBuilder do
114
122
  end
115
123
  end
116
124
 
125
+ describe "for non-identifiable dimensions with an existing hash" do
126
+ before :each do
127
+ @builder = described_class.
128
+ for_table(@schema.dimension(:with_hash), @db)
129
+ end
130
+
131
+ it "returns an incrementing key, given a row" do
132
+ @builder.key(:hash => "aaa").should == 1
133
+ @builder.key(:hash => "aab").should == 2
134
+ end
135
+
136
+ it "returns the same incrementing key" do
137
+ @builder.key(:hash => "aaa").should == 1
138
+ @builder.key(:hash => "aaa").should == 1
139
+ end
140
+
141
+ it "returns the same incrementing key, ignoring case" do
142
+ @builder.key(:hash => "aaa").should == 1
143
+ @builder.key(:hash => "AAA").should == 1
144
+ end
145
+
146
+ it "inserts the hash as a binary literal" do
147
+ # Yuck. Don't like the implementation test, but mock
148
+ # expectations fail here for some reason, maybe because of the
149
+ # Sequel::LiteralString?
150
+ @builder.key_for_insert(@builder.original_key(:hash => "aaa")).should == "0xAAA".lit
151
+ end
152
+ end
153
+
117
154
  describe "for non-identifiable dimensions with natural keys" do
118
155
  before :each do
119
156
  @builder = described_class.for_table(@schema.dimension(:address), @db)
@@ -48,9 +48,13 @@ describe Chicago::ETL::SequelExtensions::LoadDataInfileExpression do
48
48
  end
49
49
 
50
50
  it "can set column values" do
51
- described_class.new("bar.csv", :foo, ['@bar', 'quux'],
51
+ sql = described_class.new("bar.csv", :foo, ['@bar', 'quux'],
52
52
  :set => {:bar => :unhex.sql_function("@bar".lit),
53
53
  :etl_batch_id => 3}).
54
- to_sql(TEST_DB).should include("SET `bar` = unhex(@bar), `etl_batch_id` = 3")
54
+ to_sql(TEST_DB)
55
+
56
+ sql.should include("SET")
57
+ sql.should include("`etl_batch_id` = 3")
58
+ sql.should include("`bar` = unhex(@bar)")
55
59
  end
56
60
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chicago-etl
3
3
  version: !ruby/object:Gem::Version
4
- hash: 7
4
+ hash: 5
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 12
10
- version: 0.0.12
9
+ - 13
10
+ version: 0.0.13
11
11
  platform: ruby
12
12
  authors:
13
13
  - Roland Swingler
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2013-02-26 00:00:00 Z
18
+ date: 2013-04-16 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  version_requirements: &id001 !ruby/object:Gem::Requirement