linkage 0.0.6 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +10 -0
- data/Gemfile +15 -13
- data/Gemfile.lock +67 -37
- data/Guardfile +0 -2
- data/Rakefile +122 -25
- data/lib/linkage/comparator.rb +172 -0
- data/lib/linkage/comparators/binary.rb +12 -0
- data/lib/linkage/comparators/compare.rb +46 -0
- data/lib/linkage/comparators/within.rb +32 -0
- data/lib/linkage/configuration.rb +285 -153
- data/lib/linkage/data.rb +32 -7
- data/lib/linkage/dataset.rb +107 -32
- data/lib/linkage/decollation.rb +93 -0
- data/lib/linkage/expectation.rb +21 -0
- data/lib/linkage/expectations/exhaustive.rb +63 -0
- data/lib/linkage/expectations/simple.rb +168 -0
- data/lib/linkage/field.rb +30 -4
- data/lib/linkage/field_set.rb +6 -3
- data/lib/linkage/function.rb +50 -3
- data/lib/linkage/functions/binary.rb +30 -0
- data/lib/linkage/functions/cast.rb +54 -0
- data/lib/linkage/functions/length.rb +29 -0
- data/lib/linkage/functions/strftime.rb +12 -11
- data/lib/linkage/functions/trim.rb +8 -0
- data/lib/linkage/group.rb +20 -0
- data/lib/linkage/import_buffer.rb +5 -16
- data/lib/linkage/meta_object.rb +139 -0
- data/lib/linkage/result_set.rb +74 -17
- data/lib/linkage/runner/single_threaded.rb +125 -10
- data/lib/linkage/version.rb +3 -0
- data/lib/linkage.rb +11 -0
- data/linkage.gemspec +16 -121
- data/test/config.yml +5 -0
- data/test/helper.rb +73 -8
- data/test/integration/test_collation.rb +45 -0
- data/test/integration/test_configuration.rb +268 -0
- data/test/integration/test_cross_linkage.rb +4 -17
- data/test/integration/test_dataset.rb +45 -2
- data/test/integration/test_dual_linkage.rb +40 -24
- data/test/integration/test_functions.rb +22 -0
- data/test/integration/test_result_set.rb +85 -0
- data/test/integration/test_scoring.rb +84 -0
- data/test/integration/test_self_linkage.rb +5 -0
- data/test/integration/test_within_comparator.rb +100 -0
- data/test/unit/comparators/test_compare.rb +105 -0
- data/test/unit/comparators/test_within.rb +57 -0
- data/test/unit/expectations/test_exhaustive.rb +111 -0
- data/test/unit/expectations/test_simple.rb +303 -0
- data/test/unit/functions/test_binary.rb +54 -0
- data/test/unit/functions/test_cast.rb +98 -0
- data/test/unit/functions/test_length.rb +52 -0
- data/test/unit/functions/test_strftime.rb +17 -13
- data/test/unit/functions/test_trim.rb +11 -4
- data/test/unit/test_comparator.rb +124 -0
- data/test/unit/test_configuration.rb +137 -175
- data/test/unit/test_data.rb +44 -0
- data/test/unit/test_dataset.rb +73 -21
- data/test/unit/test_decollation.rb +201 -0
- data/test/unit/test_field.rb +38 -14
- data/test/unit/test_field_set.rb +12 -8
- data/test/unit/test_function.rb +83 -16
- data/test/unit/test_group.rb +28 -0
- data/test/unit/test_import_buffer.rb +13 -27
- data/test/unit/test_meta_object.rb +208 -0
- data/test/unit/test_result_set.rb +221 -3
- metadata +82 -190
@@ -5,28 +5,40 @@ module Linkage
|
|
5
5
|
class SingleThreadedRunner < Runner
|
6
6
|
# @return [Linkage::ResultSet]
|
7
7
|
def execute
|
8
|
-
|
9
|
-
|
8
|
+
result_set.create_tables!
|
9
|
+
|
10
|
+
@pk_1 = config.dataset_1.field_set.primary_key.to_expr
|
11
|
+
@pk_2 = config.dataset_2.field_set.primary_key.to_expr
|
12
|
+
if config.has_simple_expectations?
|
13
|
+
setup_datasets
|
14
|
+
group_records
|
15
|
+
|
16
|
+
if config.has_exhaustive_expectations?
|
17
|
+
score_records_with_groups
|
18
|
+
else
|
19
|
+
create_matches
|
20
|
+
end
|
21
|
+
else
|
22
|
+
dataset_1, dataset_2 = config.datasets_with_applied_exhaustive_expectations
|
23
|
+
score_records_without_groups(dataset_1, dataset_2)
|
24
|
+
end
|
10
25
|
|
26
|
+
result_set.flush!
|
11
27
|
return result_set
|
12
28
|
end
|
13
29
|
|
14
30
|
private
|
15
31
|
|
16
32
|
def setup_datasets
|
17
|
-
@dataset_1, @dataset_2 = config.
|
33
|
+
@dataset_1, @dataset_2 = config.datasets_with_applied_simple_expectations
|
18
34
|
|
19
|
-
|
20
|
-
@dataset_1 = @dataset_1.select(pk.to_expr)
|
35
|
+
@dataset_1 = @dataset_1.select(@pk_1)
|
21
36
|
if @config.linkage_type != :self
|
22
|
-
|
23
|
-
@dataset_2 = @dataset_2.select(pk.to_expr)
|
37
|
+
@dataset_2 = @dataset_2.select(@pk_2)
|
24
38
|
end
|
25
39
|
end
|
26
40
|
|
27
41
|
def group_records
|
28
|
-
result_set.create_tables!
|
29
|
-
|
30
42
|
if config.linkage_type == :self
|
31
43
|
group_records_for(@dataset_1, 1)
|
32
44
|
else
|
@@ -56,7 +68,8 @@ module Linkage
|
|
56
68
|
groups_dataset.field_set.values.each do |field|
|
57
69
|
# Sort on all fields
|
58
70
|
if !field.primary_key?
|
59
|
-
|
71
|
+
meta_object = MetaObject.new(field)
|
72
|
+
groups_dataset = groups_dataset.group_match_more(meta_object)
|
60
73
|
end
|
61
74
|
end
|
62
75
|
|
@@ -68,5 +81,107 @@ module Linkage
|
|
68
81
|
sub_dataset = groups_dataset.select(:max.sql_function(:id).as(:id)).group_by_matches
|
69
82
|
groups_dataset.filter(:id => sub_dataset.obj).delete
|
70
83
|
end
|
84
|
+
|
85
|
+
def score_records_with_groups
|
86
|
+
result_set.groups_dataset.each do |group_record|
|
87
|
+
group = Group.from_row(group_record)
|
88
|
+
dataset_1, dataset_2 = config.apply_exhaustive_expectations(
|
89
|
+
*result_set.groups_records_datasets(group))
|
90
|
+
score_records_without_groups(dataset_1, dataset_2)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def score_records_without_groups(dataset_1, dataset_2)
|
95
|
+
if config.linkage_type == :self
|
96
|
+
keys = dataset_1.select_map(@pk_1)
|
97
|
+
unfiltered_dataset = dataset_1.unfiltered
|
98
|
+
cache = Hashery::LRUHash.new(config.record_cache_size) do |h, k|
|
99
|
+
h[k] = unfiltered_dataset.filter(@pk_1 => k).first
|
100
|
+
end
|
101
|
+
upper_bound = keys.length - 1
|
102
|
+
|
103
|
+
forward = true
|
104
|
+
keys.each_with_index do |key_1, key_1_index|
|
105
|
+
record_1 = cache[key_1]
|
106
|
+
|
107
|
+
lower_bound = key_1_index + 1
|
108
|
+
enum =
|
109
|
+
if forward
|
110
|
+
lower_bound.upto(upper_bound)
|
111
|
+
else
|
112
|
+
upper_bound.downto(lower_bound)
|
113
|
+
end
|
114
|
+
enum.each do |key_2_index|
|
115
|
+
record_2 = cache[keys[key_2_index]]
|
116
|
+
score(record_1, record_2)
|
117
|
+
end
|
118
|
+
forward = !forward
|
119
|
+
end
|
120
|
+
else
|
121
|
+
keys_2 = dataset_2.select_map(@pk_2)
|
122
|
+
unfiltered_dataset_2 = dataset_2.unfiltered
|
123
|
+
cache_2 = Hashery::LRUHash.new(config.record_cache_size) do |h, k|
|
124
|
+
h[k] = unfiltered_dataset_2.filter(@pk_2 => k).first
|
125
|
+
end
|
126
|
+
keys_2_last = keys_2.length - 1
|
127
|
+
|
128
|
+
forward = true
|
129
|
+
dataset_1.each do |record_1|
|
130
|
+
enum = forward ? 0.upto(keys_2_last) : keys_2_last.downto(0)
|
131
|
+
enum.each do |key_2_index|
|
132
|
+
record_2 = cache_2[keys_2[key_2_index]]
|
133
|
+
score(record_1, record_2)
|
134
|
+
end
|
135
|
+
forward = !forward
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def score(record_1, record_2)
|
141
|
+
pk_1 = record_1[@pk_1]
|
142
|
+
pk_2 = record_2[@pk_2]
|
143
|
+
|
144
|
+
catch(:stop) do
|
145
|
+
total_score = 0
|
146
|
+
config.exhaustive_expectations.each_with_index do |expectation, comparator_id|
|
147
|
+
comparator = expectation.comparator
|
148
|
+
|
149
|
+
score = comparator.score(record_1, record_2)
|
150
|
+
result_set.add_score(comparator_id, pk_1, pk_2, score)
|
151
|
+
|
152
|
+
throw(:stop) unless expectation.satisfied?(score)
|
153
|
+
total_score += score
|
154
|
+
end
|
155
|
+
result_set.add_match(pk_1, pk_2, total_score)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
# Only needed for linkages without exhaustive expectations
|
160
|
+
def create_matches
|
161
|
+
result_set.groups_dataset.each do |group_record|
|
162
|
+
group = Group.from_row(group_record)
|
163
|
+
dataset_1, dataset_2 = result_set.groups_records_datasets(group)
|
164
|
+
|
165
|
+
if config.linkage_type == :self
|
166
|
+
keys = dataset_1.select_map(@pk_1)
|
167
|
+
keys_last = keys.length - 1
|
168
|
+
keys.each_with_index do |key_1, key_1_index|
|
169
|
+
(key_1_index + 1).upto(keys_last) do |key_2_index|
|
170
|
+
key_2 = keys[key_2_index]
|
171
|
+
result_set.add_match(key_1, key_2, nil)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
else
|
175
|
+
keys_1 = dataset_1.select_map(@pk_1)
|
176
|
+
keys_2 = dataset_2.select_map(@pk_2)
|
177
|
+
|
178
|
+
keys_1.each do |key_1|
|
179
|
+
keys_2.each do |key_2|
|
180
|
+
result_set.add_match(key_1, key_2, nil)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
71
186
|
end
|
72
187
|
end
|
data/lib/linkage.rb
CHANGED
@@ -1,13 +1,16 @@
|
|
1
1
|
require 'pathname'
|
2
2
|
require 'delegate'
|
3
3
|
require 'sequel'
|
4
|
+
require 'hashery'
|
4
5
|
|
5
6
|
module Linkage
|
6
7
|
end
|
7
8
|
|
8
9
|
path = Pathname.new(File.expand_path(File.dirname(__FILE__))) + 'linkage'
|
10
|
+
require path + 'version'
|
9
11
|
require path + 'utils'
|
10
12
|
require path + 'warnings'
|
13
|
+
require path + 'decollation'
|
11
14
|
require path + 'dataset'
|
12
15
|
require path + 'runner'
|
13
16
|
require path + 'data'
|
@@ -15,6 +18,14 @@ require path + 'field'
|
|
15
18
|
require path + 'function'
|
16
19
|
require path + 'group'
|
17
20
|
require path + 'import_buffer'
|
21
|
+
require path + 'meta_object'
|
22
|
+
require path + 'expectation'
|
18
23
|
require path + 'configuration'
|
19
24
|
require path + 'result_set'
|
20
25
|
require path + 'field_set'
|
26
|
+
require path + 'comparator'
|
27
|
+
|
28
|
+
Sequel.extension :collation
|
29
|
+
if Sequel::Collation.respond_to?(:suppress_warnings=)
|
30
|
+
Sequel::Collation.suppress_warnings = true
|
31
|
+
end
|
data/linkage.gemspec
CHANGED
@@ -1,126 +1,21 @@
|
|
1
|
-
# Generated by jeweler
|
2
|
-
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
1
|
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/linkage/version', __FILE__)
|
5
3
|
|
6
|
-
Gem::Specification.new do |
|
7
|
-
|
8
|
-
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Jeremy Stephens"]
|
6
|
+
gem.email = ["jeremy.f.stephens@vanderbilt.edu"]
|
7
|
+
gem.description = %q{Performs record linkage between one or two datasets, using Sequel on the backend}
|
8
|
+
gem.summary = %q{Record linkage library}
|
9
|
+
gem.homepage = "http://github.com/coupler/linkage"
|
9
10
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
"LICENSE.txt",
|
17
|
-
"README.markdown"
|
18
|
-
]
|
19
|
-
s.files = [
|
20
|
-
".document",
|
21
|
-
".vimrc",
|
22
|
-
"Gemfile",
|
23
|
-
"Gemfile.lock",
|
24
|
-
"Guardfile",
|
25
|
-
"LICENSE.txt",
|
26
|
-
"README.markdown",
|
27
|
-
"Rakefile",
|
28
|
-
"VERSION",
|
29
|
-
"lib/linkage.rb",
|
30
|
-
"lib/linkage/configuration.rb",
|
31
|
-
"lib/linkage/data.rb",
|
32
|
-
"lib/linkage/dataset.rb",
|
33
|
-
"lib/linkage/field.rb",
|
34
|
-
"lib/linkage/field_set.rb",
|
35
|
-
"lib/linkage/function.rb",
|
36
|
-
"lib/linkage/functions/strftime.rb",
|
37
|
-
"lib/linkage/functions/trim.rb",
|
38
|
-
"lib/linkage/group.rb",
|
39
|
-
"lib/linkage/import_buffer.rb",
|
40
|
-
"lib/linkage/result_set.rb",
|
41
|
-
"lib/linkage/runner.rb",
|
42
|
-
"lib/linkage/runner/single_threaded.rb",
|
43
|
-
"lib/linkage/utils.rb",
|
44
|
-
"lib/linkage/warnings.rb",
|
45
|
-
"linkage.gemspec",
|
46
|
-
"test/config.yml",
|
47
|
-
"test/helper.rb",
|
48
|
-
"test/integration/test_cross_linkage.rb",
|
49
|
-
"test/integration/test_dataset.rb",
|
50
|
-
"test/integration/test_dual_linkage.rb",
|
51
|
-
"test/integration/test_functions.rb",
|
52
|
-
"test/integration/test_self_linkage.rb",
|
53
|
-
"test/unit/functions/test_strftime.rb",
|
54
|
-
"test/unit/functions/test_trim.rb",
|
55
|
-
"test/unit/runner/test_single_threaded.rb",
|
56
|
-
"test/unit/test_configuration.rb",
|
57
|
-
"test/unit/test_data.rb",
|
58
|
-
"test/unit/test_dataset.rb",
|
59
|
-
"test/unit/test_field.rb",
|
60
|
-
"test/unit/test_field_set.rb",
|
61
|
-
"test/unit/test_function.rb",
|
62
|
-
"test/unit/test_group.rb",
|
63
|
-
"test/unit/test_import_buffer.rb",
|
64
|
-
"test/unit/test_linkage.rb",
|
65
|
-
"test/unit/test_result_set.rb",
|
66
|
-
"test/unit/test_runner.rb",
|
67
|
-
"test/unit/test_utils.rb"
|
68
|
-
]
|
69
|
-
s.homepage = "http://github.com/coupler/linkage"
|
70
|
-
s.licenses = ["MIT"]
|
71
|
-
s.require_paths = ["lib"]
|
72
|
-
s.rubygems_version = "1.8.23"
|
73
|
-
s.summary = "Record linkage library"
|
11
|
+
gem.files = `git ls-files`.split($\)
|
12
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
+
gem.name = "linkage"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = Linkage::VERSION
|
74
17
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
79
|
-
s.add_runtime_dependency(%q<sequel>, [">= 0"])
|
80
|
-
s.add_development_dependency(%q<bundler>, [">= 0"])
|
81
|
-
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
82
|
-
s.add_development_dependency(%q<test-unit>, [">= 0"])
|
83
|
-
s.add_development_dependency(%q<mocha>, [">= 0"])
|
84
|
-
s.add_development_dependency(%q<sqlite3>, [">= 0"])
|
85
|
-
s.add_development_dependency(%q<yard>, [">= 0"])
|
86
|
-
s.add_development_dependency(%q<rake>, [">= 0"])
|
87
|
-
s.add_development_dependency(%q<versionomy>, [">= 0"])
|
88
|
-
s.add_development_dependency(%q<mysql2>, [">= 0"])
|
89
|
-
s.add_development_dependency(%q<pry>, [">= 0"])
|
90
|
-
s.add_development_dependency(%q<rdiscount>, [">= 0"])
|
91
|
-
s.add_development_dependency(%q<guard-test>, [">= 0"])
|
92
|
-
s.add_development_dependency(%q<guard-yard>, [">= 0"])
|
93
|
-
else
|
94
|
-
s.add_dependency(%q<sequel>, [">= 0"])
|
95
|
-
s.add_dependency(%q<bundler>, [">= 0"])
|
96
|
-
s.add_dependency(%q<jeweler>, [">= 0"])
|
97
|
-
s.add_dependency(%q<test-unit>, [">= 0"])
|
98
|
-
s.add_dependency(%q<mocha>, [">= 0"])
|
99
|
-
s.add_dependency(%q<sqlite3>, [">= 0"])
|
100
|
-
s.add_dependency(%q<yard>, [">= 0"])
|
101
|
-
s.add_dependency(%q<rake>, [">= 0"])
|
102
|
-
s.add_dependency(%q<versionomy>, [">= 0"])
|
103
|
-
s.add_dependency(%q<mysql2>, [">= 0"])
|
104
|
-
s.add_dependency(%q<pry>, [">= 0"])
|
105
|
-
s.add_dependency(%q<rdiscount>, [">= 0"])
|
106
|
-
s.add_dependency(%q<guard-test>, [">= 0"])
|
107
|
-
s.add_dependency(%q<guard-yard>, [">= 0"])
|
108
|
-
end
|
109
|
-
else
|
110
|
-
s.add_dependency(%q<sequel>, [">= 0"])
|
111
|
-
s.add_dependency(%q<bundler>, [">= 0"])
|
112
|
-
s.add_dependency(%q<jeweler>, [">= 0"])
|
113
|
-
s.add_dependency(%q<test-unit>, [">= 0"])
|
114
|
-
s.add_dependency(%q<mocha>, [">= 0"])
|
115
|
-
s.add_dependency(%q<sqlite3>, [">= 0"])
|
116
|
-
s.add_dependency(%q<yard>, [">= 0"])
|
117
|
-
s.add_dependency(%q<rake>, [">= 0"])
|
118
|
-
s.add_dependency(%q<versionomy>, [">= 0"])
|
119
|
-
s.add_dependency(%q<mysql2>, [">= 0"])
|
120
|
-
s.add_dependency(%q<pry>, [">= 0"])
|
121
|
-
s.add_dependency(%q<rdiscount>, [">= 0"])
|
122
|
-
s.add_dependency(%q<guard-test>, [">= 0"])
|
123
|
-
s.add_dependency(%q<guard-yard>, [">= 0"])
|
124
|
-
end
|
18
|
+
gem.add_dependency "sequel"
|
19
|
+
gem.add_dependency "sequel-collation"
|
20
|
+
gem.add_dependency "hashery"
|
125
21
|
end
|
126
|
-
|
data/test/config.yml
CHANGED
data/test/helper.rb
CHANGED
@@ -8,18 +8,35 @@ rescue Bundler::BundlerError => e
|
|
8
8
|
exit e.status_code
|
9
9
|
end
|
10
10
|
require 'test/unit'
|
11
|
-
require 'mocha'
|
11
|
+
require 'mocha/setup'
|
12
12
|
require 'tmpdir'
|
13
13
|
require 'logger'
|
14
14
|
require 'pp'
|
15
15
|
require 'versionomy'
|
16
|
-
|
16
|
+
require 'erb'
|
17
17
|
|
18
18
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
19
19
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
20
20
|
require 'linkage'
|
21
21
|
|
22
22
|
class Test::Unit::TestCase
|
23
|
+
def self.current_ruby_version
|
24
|
+
@current_ruby_version ||= Versionomy.parse(RUBY_VERSION)
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.ruby19
|
28
|
+
@ruby19 ||= Versionomy.parse("1.9")
|
29
|
+
end
|
30
|
+
|
31
|
+
@@database_config = nil
|
32
|
+
def self.database_config
|
33
|
+
if @@database_config.nil?
|
34
|
+
template = File.read(File.join(File.dirname(__FILE__), "config.yml"))
|
35
|
+
@@database_config = YAML.load(ERB.new(template).result(binding))
|
36
|
+
end
|
37
|
+
@@database_config
|
38
|
+
end
|
39
|
+
|
23
40
|
def stub_field(name, options = {}, &block)
|
24
41
|
f = Linkage::Field.allocate
|
25
42
|
f.stubs({:static? => false}.merge(options))
|
@@ -38,6 +55,15 @@ class Test::Unit::TestCase
|
|
38
55
|
f
|
39
56
|
end
|
40
57
|
|
58
|
+
def stub_instance(klass, options = {}, &block)
|
59
|
+
f = klass.allocate
|
60
|
+
f.stubs(options)
|
61
|
+
if block
|
62
|
+
f.send(:instance_eval, &block)
|
63
|
+
end
|
64
|
+
f
|
65
|
+
end
|
66
|
+
|
41
67
|
def new_function(name, ruby_type = nil, params = nil, &block)
|
42
68
|
klass = Class.new(Linkage::Function)
|
43
69
|
klass.send(:define_singleton_method, :function_name) { name }
|
@@ -50,17 +76,50 @@ class Test::Unit::TestCase
|
|
50
76
|
klass
|
51
77
|
end
|
52
78
|
|
79
|
+
def new_comparator(name, params = nil, score_range = nil, &block)
|
80
|
+
klass = Class.new(Linkage::Comparator)
|
81
|
+
klass.send(:define_singleton_method, :comparator_name) { name }
|
82
|
+
if params
|
83
|
+
klass.send(:define_singleton_method, :parameters) { params }
|
84
|
+
end
|
85
|
+
if score_range
|
86
|
+
klass.send(:define_singleton_method, :score_range) { score_range }
|
87
|
+
end
|
88
|
+
klass.send(:define_method, :score) { |record_1, record_2| 100 }
|
89
|
+
if block_given?
|
90
|
+
klass.class_eval(&block)
|
91
|
+
end
|
92
|
+
klass
|
93
|
+
end
|
53
94
|
|
54
|
-
def
|
55
|
-
|
95
|
+
def database_config
|
96
|
+
self.class.database_config
|
56
97
|
end
|
57
98
|
|
58
|
-
def
|
59
|
-
|
99
|
+
def database_options_for(adapter)
|
100
|
+
config =
|
101
|
+
if adapter == 'sqlite'
|
102
|
+
@tmpdir ||= Dir.mktmpdir('linkage')
|
103
|
+
{ 'adapter' => 'sqlite', 'database' => File.join(@tmpdir, "foo") }
|
104
|
+
else
|
105
|
+
database_config[adapter]
|
106
|
+
end
|
107
|
+
|
108
|
+
if config
|
109
|
+
return config
|
110
|
+
else
|
111
|
+
omit("Couldn't find configuration for adapter '#{adapter}'")
|
112
|
+
end
|
60
113
|
end
|
61
114
|
|
62
|
-
def
|
63
|
-
|
115
|
+
def database_for(adapter, options = {}, &block)
|
116
|
+
config = database_options_for(adapter)
|
117
|
+
|
118
|
+
if block
|
119
|
+
Sequel.connect(config, options, &block)
|
120
|
+
else
|
121
|
+
Sequel.connect(config, options)
|
122
|
+
end
|
64
123
|
end
|
65
124
|
|
66
125
|
def prefixed_logger(prefix)
|
@@ -72,6 +131,12 @@ class Test::Unit::TestCase
|
|
72
131
|
}
|
73
132
|
logger
|
74
133
|
end
|
134
|
+
|
135
|
+
def teardown
|
136
|
+
if @tmpdir && File.exist?(@tmpdir)
|
137
|
+
FileUtils.remove_entry_secure(@tmpdir)
|
138
|
+
end
|
139
|
+
end
|
75
140
|
end
|
76
141
|
|
77
142
|
module UnitTests; end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
module IntegrationTests
|
4
|
+
class TestCollation < Test::Unit::TestCase
|
5
|
+
def setup
|
6
|
+
@tmpdir = Dir.mktmpdir('linkage')
|
7
|
+
@tmpuri = "sqlite://" + File.join(@tmpdir, "foo")
|
8
|
+
end
|
9
|
+
|
10
|
+
def database(options = {}, &block)
|
11
|
+
Sequel.connect(@tmpuri, options, &block)
|
12
|
+
end
|
13
|
+
|
14
|
+
def teardown
|
15
|
+
FileUtils.remove_entry_secure(@tmpdir)
|
16
|
+
end
|
17
|
+
|
18
|
+
test "comparing strings exactly in MySQL" do
|
19
|
+
options = database_options_for('mysql')
|
20
|
+
database_for('mysql') do |db|
|
21
|
+
db.create_table!(:foo) do
|
22
|
+
primary_key :id
|
23
|
+
String :foo
|
24
|
+
String :bar
|
25
|
+
end
|
26
|
+
db[:foo].import([:foo, :bar], [
|
27
|
+
["Foo", "foo"],
|
28
|
+
["bar", "bar "],
|
29
|
+
])
|
30
|
+
end
|
31
|
+
dataset = Linkage::Dataset.new(options, :foo)
|
32
|
+
tmpuri = @tmpuri
|
33
|
+
conf = dataset.link_with(dataset) do
|
34
|
+
(lhs[:foo].must == rhs[:bar]).exactly
|
35
|
+
save_results_in(tmpuri)
|
36
|
+
end
|
37
|
+
runner = Linkage::SingleThreadedRunner.new(conf)
|
38
|
+
runner.execute
|
39
|
+
|
40
|
+
database do |db|
|
41
|
+
assert_equal 0, db[:groups].count
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|