linkage 0.0.8 → 0.1.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.yardopts +1 -0
- data/Gemfile +1 -19
- data/Gemfile-java +3 -0
- data/README.markdown +88 -34
- data/Rakefile +16 -15
- data/TODO +4 -0
- data/lib/linkage/comparator.rb +139 -144
- data/lib/linkage/comparators/compare.rb +236 -29
- data/lib/linkage/comparators/strcompare.rb +85 -0
- data/lib/linkage/comparators/within.rb +24 -20
- data/lib/linkage/configuration.rb +44 -466
- data/lib/linkage/dataset.rb +28 -127
- data/lib/linkage/exceptions.rb +5 -0
- data/lib/linkage/field.rb +6 -37
- data/lib/linkage/field_set.rb +3 -3
- data/lib/linkage/match_recorder.rb +22 -0
- data/lib/linkage/match_set.rb +34 -0
- data/lib/linkage/match_sets/csv.rb +39 -0
- data/lib/linkage/match_sets/database.rb +45 -0
- data/lib/linkage/matcher.rb +30 -0
- data/lib/linkage/result_set.rb +25 -110
- data/lib/linkage/result_sets/csv.rb +54 -0
- data/lib/linkage/result_sets/database.rb +42 -0
- data/lib/linkage/runner.rb +57 -16
- data/lib/linkage/score_recorder.rb +30 -0
- data/lib/linkage/score_set.rb +49 -0
- data/lib/linkage/score_sets/csv.rb +64 -0
- data/lib/linkage/score_sets/database.rb +77 -0
- data/lib/linkage/version.rb +1 -1
- data/lib/linkage.rb +14 -17
- data/linkage.gemspec +13 -1
- data/linkage.gemspec-java +32 -0
- data/test/helper.rb +30 -23
- data/test/integration/test_cross_linkage.rb +46 -25
- data/test/integration/test_database_result_set.rb +55 -0
- data/test/integration/test_dual_linkage.rb +19 -94
- data/test/integration/test_self_linkage.rb +100 -203
- data/test/integration/test_within_comparator.rb +24 -77
- data/test/unit/comparators/test_compare.rb +254 -50
- data/test/unit/comparators/test_strcompare.rb +45 -0
- data/test/unit/comparators/test_within.rb +14 -26
- data/test/unit/match_sets/test_csv.rb +78 -0
- data/test/unit/match_sets/test_database.rb +63 -0
- data/test/unit/result_sets/test_csv.rb +111 -0
- data/test/unit/result_sets/test_database.rb +68 -0
- data/test/unit/score_sets/test_csv.rb +151 -0
- data/test/unit/score_sets/test_database.rb +149 -0
- data/test/unit/test_comparator.rb +46 -83
- data/test/unit/test_comparators.rb +4 -0
- data/test/unit/test_configuration.rb +99 -145
- data/test/unit/test_dataset.rb +52 -73
- data/test/unit/test_field.rb +4 -55
- data/test/unit/test_field_set.rb +6 -6
- data/test/unit/test_match_recorder.rb +23 -0
- data/test/unit/test_match_set.rb +23 -0
- data/test/unit/test_match_sets.rb +4 -0
- data/test/unit/test_matcher.rb +44 -0
- data/test/unit/test_result_set.rb +24 -223
- data/test/unit/test_result_sets.rb +4 -0
- data/test/unit/test_runner.rb +122 -17
- data/test/unit/test_runners.rb +4 -0
- data/test/unit/test_score_recorder.rb +25 -0
- data/test/unit/test_score_set.rb +37 -0
- data/test/unit/test_score_sets.rb +4 -0
- metadata +183 -90
- data/Gemfile.lock +0 -92
- data/lib/linkage/comparators/binary.rb +0 -12
- data/lib/linkage/data.rb +0 -175
- data/lib/linkage/decollation.rb +0 -93
- data/lib/linkage/expectation.rb +0 -21
- data/lib/linkage/expectations/exhaustive.rb +0 -63
- data/lib/linkage/expectations/simple.rb +0 -168
- data/lib/linkage/function.rb +0 -148
- data/lib/linkage/functions/binary.rb +0 -30
- data/lib/linkage/functions/cast.rb +0 -54
- data/lib/linkage/functions/length.rb +0 -29
- data/lib/linkage/functions/strftime.rb +0 -33
- data/lib/linkage/functions/trim.rb +0 -30
- data/lib/linkage/group.rb +0 -55
- data/lib/linkage/meta_object.rb +0 -139
- data/lib/linkage/runner/single_threaded.rb +0 -187
- data/lib/linkage/utils.rb +0 -164
- data/lib/linkage/warnings.rb +0 -5
- data/test/integration/test_collation.rb +0 -45
- data/test/integration/test_configuration.rb +0 -268
- data/test/integration/test_dataset.rb +0 -116
- data/test/integration/test_functions.rb +0 -88
- data/test/integration/test_result_set.rb +0 -85
- data/test/integration/test_scoring.rb +0 -84
- data/test/unit/expectations/test_exhaustive.rb +0 -111
- data/test/unit/expectations/test_simple.rb +0 -303
- data/test/unit/functions/test_binary.rb +0 -54
- data/test/unit/functions/test_cast.rb +0 -98
- data/test/unit/functions/test_length.rb +0 -52
- data/test/unit/functions/test_strftime.rb +0 -60
- data/test/unit/functions/test_trim.rb +0 -43
- data/test/unit/runner/test_single_threaded.rb +0 -12
- data/test/unit/test_data.rb +0 -445
- data/test/unit/test_decollation.rb +0 -201
- data/test/unit/test_function.rb +0 -233
- data/test/unit/test_group.rb +0 -38
- data/test/unit/test_meta_object.rb +0 -208
- data/test/unit/test_utils.rb +0 -341
@@ -1,268 +0,0 @@
|
|
1
|
-
require 'helper'
|
2
|
-
|
3
|
-
module IntegrationTests
|
4
|
-
class TestConfiguration < Test::Unit::TestCase
|
5
|
-
test "linkage_type is self when the two datasets are the same" do
|
6
|
-
database_for('sqlite') do |db|
|
7
|
-
db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
|
8
|
-
end
|
9
|
-
|
10
|
-
dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
|
11
|
-
conf = Linkage::Configuration.new(dataset, dataset)
|
12
|
-
assert_equal :self, conf.linkage_type
|
13
|
-
end
|
14
|
-
|
15
|
-
test "linkage_type is dual when the two datasets are different" do
|
16
|
-
database_for('sqlite') do |db|
|
17
|
-
db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
|
18
|
-
db.create_table(:bar) { primary_key(:id); String(:foo); String(:bar) }
|
19
|
-
end
|
20
|
-
|
21
|
-
dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
|
22
|
-
dataset_2 = Linkage::Dataset.new(database_options_for('sqlite'), "bar")
|
23
|
-
conf = Linkage::Configuration.new(dataset_1, dataset_2)
|
24
|
-
assert_equal :dual, conf.linkage_type
|
25
|
-
end
|
26
|
-
|
27
|
-
test "linkage_type is cross when there's different filters on both sides" do
|
28
|
-
database_for('sqlite') do |db|
|
29
|
-
db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
|
30
|
-
end
|
31
|
-
|
32
|
-
dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
|
33
|
-
conf = Linkage::Configuration.new(dataset, dataset)
|
34
|
-
conf.configure do
|
35
|
-
lhs[:foo].must == "foo"
|
36
|
-
rhs[:foo].must == "bar"
|
37
|
-
end
|
38
|
-
assert_equal :cross, conf.linkage_type
|
39
|
-
end
|
40
|
-
|
41
|
-
test "linkage_type is self when there's identical static filters on each side" do
|
42
|
-
database_for('sqlite') do |db|
|
43
|
-
db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
|
44
|
-
end
|
45
|
-
|
46
|
-
dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
|
47
|
-
conf = Linkage::Configuration.new(dataset, dataset)
|
48
|
-
conf.configure do
|
49
|
-
lhs[:foo].must == "foo"
|
50
|
-
rhs[:foo].must == "foo"
|
51
|
-
end
|
52
|
-
assert_equal :self, conf.linkage_type
|
53
|
-
end
|
54
|
-
|
55
|
-
test "linkage_type is cross when exhaustive expectations use different fields" do
|
56
|
-
database_for('sqlite') do |db|
|
57
|
-
db.create_table(:foo) { primary_key(:id); Integer(:foo); Integer(:bar) }
|
58
|
-
end
|
59
|
-
|
60
|
-
dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
|
61
|
-
conf = Linkage::Configuration.new(dataset, dataset)
|
62
|
-
conf.configure do
|
63
|
-
lhs[:foo].must(be_within(5).of(rhs[:bar]))
|
64
|
-
end
|
65
|
-
assert_equal :cross, conf.linkage_type
|
66
|
-
end
|
67
|
-
|
68
|
-
test "static expectation" do
|
69
|
-
database_for('sqlite') do |db|
|
70
|
-
db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
|
71
|
-
end
|
72
|
-
|
73
|
-
dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
|
74
|
-
conf = Linkage::Configuration.new(dataset_1, dataset_1)
|
75
|
-
conf.configure do
|
76
|
-
lhs[:foo].must == "foo"
|
77
|
-
end
|
78
|
-
|
79
|
-
dataset_2, _ = conf.datasets_with_applied_simple_expectations
|
80
|
-
assert_equal dataset_2.obj, dataset_1.filter(:foo => "foo").obj
|
81
|
-
end
|
82
|
-
|
83
|
-
test "complain if an invalid field is accessed" do
|
84
|
-
database_for('sqlite') do |db|
|
85
|
-
db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
|
86
|
-
end
|
87
|
-
|
88
|
-
dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
|
89
|
-
conf = Linkage::Configuration.new(dataset, dataset)
|
90
|
-
assert_raises(ArgumentError) do
|
91
|
-
conf.configure do
|
92
|
-
lhs[:foo].must == rhs[:non_existant_field]
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
operators = [:>, :<, :>=, :<=]
|
98
|
-
operators.each do |operator|
|
99
|
-
test "DSL #{operator} filter operator" do
|
100
|
-
database_for('sqlite') do |db|
|
101
|
-
db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
|
102
|
-
end
|
103
|
-
|
104
|
-
dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
|
105
|
-
conf = Linkage::Configuration.new(dataset_1, dataset_1)
|
106
|
-
conf.configure do
|
107
|
-
lhs[:foo].must.send(operator, 123)
|
108
|
-
end
|
109
|
-
|
110
|
-
expr = Sequel::SQL::BooleanExpression.new(operator, Sequel::SQL::Identifier.new(:foo), 123)
|
111
|
-
dataset_2, _ = conf.datasets_with_applied_simple_expectations
|
112
|
-
assert_equal dataset_2.obj, dataset_1.filter(expr).obj
|
113
|
-
end
|
114
|
-
|
115
|
-
test "comparing two data sources with #{operator}" do
|
116
|
-
database_for('sqlite') do |db|
|
117
|
-
db.create_table(:foo) { primary_key(:id); Integer(:foo); Integer(:bar) }
|
118
|
-
end
|
119
|
-
|
120
|
-
dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
|
121
|
-
conf = Linkage::Configuration.new(dataset, dataset)
|
122
|
-
conf.configure do
|
123
|
-
lhs[:foo].must.send(operator, rhs[:bar])
|
124
|
-
end
|
125
|
-
assert_equal 1, conf.exhaustive_expectations.length
|
126
|
-
|
127
|
-
comp = conf.exhaustive_expectations[0].comparator
|
128
|
-
assert_instance_of Linkage::Comparators::Compare, comp
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
test "must_not expectation" do
|
133
|
-
database_for('sqlite') do |db|
|
134
|
-
db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
|
135
|
-
end
|
136
|
-
|
137
|
-
dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
|
138
|
-
conf = Linkage::Configuration.new(dataset_1, dataset_1)
|
139
|
-
conf.configure do
|
140
|
-
lhs[:foo].must_not == "foo"
|
141
|
-
end
|
142
|
-
|
143
|
-
dataset_2, _ = conf.datasets_with_applied_simple_expectations
|
144
|
-
assert_equal dataset_2.obj, dataset_1.filter(~{:foo => "foo"}).obj
|
145
|
-
end
|
146
|
-
|
147
|
-
test "static database function" do
|
148
|
-
database_for('sqlite') do |db|
|
149
|
-
db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
|
150
|
-
end
|
151
|
-
|
152
|
-
dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
|
153
|
-
conf = Linkage::Configuration.new(dataset_1, dataset_1)
|
154
|
-
conf.configure do
|
155
|
-
lhs[:foo].must == trim("foo")
|
156
|
-
end
|
157
|
-
|
158
|
-
dataset_2, _ = conf.datasets_with_applied_simple_expectations
|
159
|
-
assert_equal dataset_1.filter({:foo => :trim.sql_function("foo")}).obj, dataset_2.obj
|
160
|
-
end
|
161
|
-
|
162
|
-
test "save_results_in" do
|
163
|
-
database_for('sqlite') do |db|
|
164
|
-
db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
|
165
|
-
end
|
166
|
-
|
167
|
-
dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
|
168
|
-
conf = Linkage::Configuration.new(dataset_1, dataset_1)
|
169
|
-
conf.configure do
|
170
|
-
save_results_in("mysql://localhost/results", {:foo => 'bar'})
|
171
|
-
end
|
172
|
-
assert_equal "mysql://localhost/results", conf.results_uri
|
173
|
-
assert_equal({:foo => 'bar'}, conf.results_uri_options)
|
174
|
-
end
|
175
|
-
|
176
|
-
test "case insensitive field names" do
|
177
|
-
database_for('sqlite') do |db|
|
178
|
-
db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
|
179
|
-
end
|
180
|
-
|
181
|
-
assert_nothing_raised do
|
182
|
-
dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
|
183
|
-
results_uri = database_options_for('sqlite')
|
184
|
-
conf = dataset.link_with(dataset) do
|
185
|
-
lhs[:Foo].must == rhs[:baR]
|
186
|
-
save_results_in(results_uri)
|
187
|
-
end
|
188
|
-
end
|
189
|
-
end
|
190
|
-
|
191
|
-
test "decollation_needed? is false when the datasets and results dataset all have the same database and collations" do
|
192
|
-
database_for('mysql') do |db|
|
193
|
-
db.create_table!(:foo) { primary_key(:id); String(:foo, :collate => :latin1_swedish_ci) }
|
194
|
-
db.create_table!(:bar) { primary_key(:id); String(:foo, :collate => :latin1_swedish_ci) }
|
195
|
-
end
|
196
|
-
|
197
|
-
dataset_1 = Linkage::Dataset.new(database_options_for('mysql'), 'foo')
|
198
|
-
dataset_2 = Linkage::Dataset.new(database_options_for('mysql'), 'bar')
|
199
|
-
conf = dataset_1.link_with(dataset_2) do
|
200
|
-
lhs[:foo].must == rhs[:foo]
|
201
|
-
end
|
202
|
-
conf.results_uri = database_options_for('mysql')
|
203
|
-
assert !conf.decollation_needed?
|
204
|
-
end
|
205
|
-
|
206
|
-
test "decollation_needed? is true when the datasets have different database types" do
|
207
|
-
database_for('mysql') do |db|
|
208
|
-
db.create_table!(:foo) { primary_key(:id); String(:foo) }
|
209
|
-
end
|
210
|
-
|
211
|
-
database_for('sqlite') do |db|
|
212
|
-
db.create_table!(:foo) { primary_key(:id); String(:foo) }
|
213
|
-
end
|
214
|
-
|
215
|
-
dataset_1 = Linkage::Dataset.new(database_options_for('mysql'), 'foo')
|
216
|
-
dataset_2 = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
|
217
|
-
conf = dataset_1.link_with(dataset_2) do
|
218
|
-
lhs[:foo].must == rhs[:foo]
|
219
|
-
end
|
220
|
-
conf.results_uri = database_options_for('mysql')
|
221
|
-
assert conf.decollation_needed?
|
222
|
-
end
|
223
|
-
|
224
|
-
test "decollation_needed? is true when the result dataset has different database type than the datasets" do
|
225
|
-
database_for('mysql') do |db|
|
226
|
-
db.create_table!(:foo) { primary_key(:id); String(:foo) }
|
227
|
-
db.create_table!(:bar) { primary_key(:id); String(:foo) }
|
228
|
-
end
|
229
|
-
|
230
|
-
dataset_1 = Linkage::Dataset.new(database_options_for('mysql'), 'foo')
|
231
|
-
dataset_2 = Linkage::Dataset.new(database_options_for('mysql'), 'bar')
|
232
|
-
conf = dataset_1.link_with(dataset_2) do
|
233
|
-
lhs[:foo].must == rhs[:foo]
|
234
|
-
end
|
235
|
-
conf.results_uri = database_options_for('sqlite')
|
236
|
-
assert conf.decollation_needed?
|
237
|
-
end
|
238
|
-
|
239
|
-
test "decollation_needed? is false when not comparing string columns" do
|
240
|
-
database_for('mysql') do |db|
|
241
|
-
db.create_table!(:foo) { primary_key(:id); Fixnum(:foo) }
|
242
|
-
end
|
243
|
-
|
244
|
-
database_for('sqlite') do |db|
|
245
|
-
db.create_table!(:foo) { primary_key(:id); Fixnum(:foo) }
|
246
|
-
end
|
247
|
-
|
248
|
-
dataset_1 = Linkage::Dataset.new(database_options_for('mysql'), 'foo')
|
249
|
-
dataset_2 = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
|
250
|
-
conf = dataset_1.link_with(dataset_2) do
|
251
|
-
lhs[:foo].must == rhs[:foo]
|
252
|
-
end
|
253
|
-
conf.results_uri = database_options_for('mysql')
|
254
|
-
assert !conf.decollation_needed?
|
255
|
-
end
|
256
|
-
|
257
|
-
test "creating comparator expectation for within" do
|
258
|
-
database_for('mysql') do |db|
|
259
|
-
db.create_table!(:foo) { primary_key(:id); Integer(:foo) }
|
260
|
-
end
|
261
|
-
dataset = Linkage::Dataset.new(database_options_for('mysql'), 'foo')
|
262
|
-
|
263
|
-
conf = dataset.link_with(dataset) do
|
264
|
-
lhs[:foo].must be_within(5).of(rhs[:foo])
|
265
|
-
end
|
266
|
-
end
|
267
|
-
end
|
268
|
-
end
|
@@ -1,116 +0,0 @@
|
|
1
|
-
require 'helper'
|
2
|
-
|
3
|
-
class IntegrationTests::TestDataset < Test::Unit::TestCase
|
4
|
-
def setup
|
5
|
-
@tmpdir = Dir.mktmpdir('linkage')
|
6
|
-
@tmpuri = "sqlite://" + File.join(@tmpdir, "foo")
|
7
|
-
end
|
8
|
-
|
9
|
-
def database(&block)
|
10
|
-
Sequel.connect(@tmpuri, &block)
|
11
|
-
end
|
12
|
-
|
13
|
-
def teardown
|
14
|
-
FileUtils.remove_entry_secure(@tmpdir)
|
15
|
-
end
|
16
|
-
|
17
|
-
test "methods that clone the dataset" do
|
18
|
-
database do |db|
|
19
|
-
db.create_table(:foo) do
|
20
|
-
primary_key :id
|
21
|
-
String :bar
|
22
|
-
end
|
23
|
-
end
|
24
|
-
ds_1 = Linkage::Dataset.new(@tmpuri, "foo")
|
25
|
-
ds_2 = ds_1.filter(:foo => 'bar')
|
26
|
-
assert_instance_of Linkage::Dataset, ds_2
|
27
|
-
assert_equal ds_2.field_set, ds_1.field_set
|
28
|
-
assert_match /`foo` = 'bar'/, ds_2.sql
|
29
|
-
end
|
30
|
-
|
31
|
-
test "each_group" do
|
32
|
-
database do |db|
|
33
|
-
db.create_table(:foo) do
|
34
|
-
primary_key :id
|
35
|
-
String :bar
|
36
|
-
end
|
37
|
-
db[:foo].import([:id, :bar], [[1, 'foo'], [2, 'foo'], [3, 'bar'], [4, 'baz']])
|
38
|
-
end
|
39
|
-
|
40
|
-
ds = Linkage::Dataset.new(@tmpuri, "foo")
|
41
|
-
ds = ds.group_match(Linkage::MetaObject.new(ds.field_set[:bar]))
|
42
|
-
ds.each_group do |group|
|
43
|
-
assert_equal({:bar => "foo"}, group.values)
|
44
|
-
assert_equal(2, group.count)
|
45
|
-
end
|
46
|
-
|
47
|
-
groups = []
|
48
|
-
ds.each_group(1) do |group|
|
49
|
-
groups << group
|
50
|
-
end
|
51
|
-
assert_equal 3, groups.length
|
52
|
-
end
|
53
|
-
|
54
|
-
test "each_group with alias" do
|
55
|
-
database do |db|
|
56
|
-
db.create_table(:foo) do
|
57
|
-
primary_key :id
|
58
|
-
String :bar
|
59
|
-
end
|
60
|
-
db[:foo].import([:id, :bar], [[1, 'foo'], [2, 'foo'], [3, 'bar'], [4, 'baz']])
|
61
|
-
end
|
62
|
-
|
63
|
-
ds = Linkage::Dataset.new(@tmpuri, "foo")
|
64
|
-
ds = ds.group_match({
|
65
|
-
:meta_object => Linkage::MetaObject.new(ds.field_set[:bar]),
|
66
|
-
:alias => :bar_baz
|
67
|
-
})
|
68
|
-
ds.each_group do |group|
|
69
|
-
assert_equal({:bar_baz => "foo"}, group.values)
|
70
|
-
assert_equal(2, group.count)
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
test "each_group with filters" do
|
75
|
-
database do |db|
|
76
|
-
db.create_table(:foo) do
|
77
|
-
primary_key :id
|
78
|
-
String :bar
|
79
|
-
Integer :baz
|
80
|
-
end
|
81
|
-
db[:foo].import([:id, :bar, :baz], [[1, 'foo', 1], [2, 'foo', 2], [3, 'bar', 3], [4, 'baz', 4]])
|
82
|
-
end
|
83
|
-
|
84
|
-
ds = Linkage::Dataset.new(@tmpuri, "foo")
|
85
|
-
ds = ds.group_match(Linkage::MetaObject.new(ds.field_set[:bar]))
|
86
|
-
ds = ds.filter { baz >= 3 }
|
87
|
-
groups = []
|
88
|
-
ds.each_group(1) do |group|
|
89
|
-
groups << group
|
90
|
-
end
|
91
|
-
assert_equal 2, groups.length
|
92
|
-
end
|
93
|
-
|
94
|
-
test "each_group with collation" do
|
95
|
-
database_for('mysql') do |db|
|
96
|
-
db.create_table!(:foo) do
|
97
|
-
primary_key :id
|
98
|
-
String :bar, :collate => :latin1_swedish_ci
|
99
|
-
end
|
100
|
-
db[:foo].import([:id, :bar], [[1, 'fOo'], [2, 'foO'], [3, 'bar'], [4, 'baz']])
|
101
|
-
end
|
102
|
-
|
103
|
-
ds = Linkage::Dataset.new(database_options_for('mysql'), "foo")
|
104
|
-
ds = ds.group_match(Linkage::MetaObject.new(ds.field_set[:bar]))
|
105
|
-
groups = []
|
106
|
-
ds.each_group(1) do |group|
|
107
|
-
groups << group
|
108
|
-
end
|
109
|
-
expected = [
|
110
|
-
{:bar => 'BAR'},
|
111
|
-
{:bar => 'BAZ'},
|
112
|
-
{:bar => 'FOO'}
|
113
|
-
]
|
114
|
-
assert_equal expected, groups.collect(&:decollated_values)
|
115
|
-
end
|
116
|
-
end
|
@@ -1,88 +0,0 @@
|
|
1
|
-
require 'helper'
|
2
|
-
|
3
|
-
module IntegrationTests
|
4
|
-
class TestFunctions < Test::Unit::TestCase
|
5
|
-
def setup
|
6
|
-
@tmpdir = Dir.mktmpdir('linkage')
|
7
|
-
@tmpuri = "sqlite://" + File.join(@tmpdir, "foo")
|
8
|
-
end
|
9
|
-
|
10
|
-
def database(options = {}, &block)
|
11
|
-
Sequel.connect(@tmpuri, options, &block)
|
12
|
-
end
|
13
|
-
|
14
|
-
def teardown
|
15
|
-
FileUtils.remove_entry_secure(@tmpdir)
|
16
|
-
end
|
17
|
-
|
18
|
-
test "match functions" do
|
19
|
-
# insert the test data
|
20
|
-
database do |db|
|
21
|
-
db.create_table(:foo) { primary_key(:id); String(:bar) }
|
22
|
-
db[:foo].import([:id, :bar],
|
23
|
-
Array.new(100) { |i| [i, "bar%s" % (" " * (i % 10))] })
|
24
|
-
end
|
25
|
-
|
26
|
-
ds = Linkage::Dataset.new(@tmpuri, "foo", :single_threaded => true)
|
27
|
-
tmpuri = @tmpuri
|
28
|
-
conf = ds.link_with(ds) do
|
29
|
-
trim(lhs[:bar]).must == trim(rhs[:bar])
|
30
|
-
save_results_in(tmpuri)
|
31
|
-
end
|
32
|
-
assert_equal :self, conf.linkage_type
|
33
|
-
runner = Linkage::SingleThreadedRunner.new(conf)
|
34
|
-
runner.execute
|
35
|
-
|
36
|
-
database do |db|
|
37
|
-
assert_equal 1, db[:groups].count
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
test "strftime in sqlite" do
|
42
|
-
#logger = Logger.new(STDERR)
|
43
|
-
#database(:logger => logger) do |db|
|
44
|
-
database do |db|
|
45
|
-
db.create_table(:foo) { primary_key(:id); Date(:foo_date) }
|
46
|
-
db.create_table(:bar) { primary_key(:id); String(:bar_string) }
|
47
|
-
db[:foo].insert({:id => 1, :foo_date => Date.today})
|
48
|
-
db[:bar].insert({:id => 1, :bar_string => Date.today.strftime("%Y-%m-%d")})
|
49
|
-
end
|
50
|
-
|
51
|
-
ds_1 = Linkage::Dataset.new(@tmpuri, "foo", :single_threaded => true)
|
52
|
-
ds_2 = Linkage::Dataset.new(@tmpuri, "bar", :single_threaded => true)
|
53
|
-
tmpuri = @tmpuri
|
54
|
-
conf = ds_1.link_with(ds_2) do
|
55
|
-
strftime(lhs[:foo_date], "%Y-%m-%d").must == rhs[:bar_string]
|
56
|
-
save_results_in(tmpuri)
|
57
|
-
end
|
58
|
-
runner = Linkage::SingleThreadedRunner.new(conf)
|
59
|
-
runner.execute
|
60
|
-
|
61
|
-
database do |db|
|
62
|
-
assert_equal 1, db[:groups].count
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
test "binary function with static argument" do
|
67
|
-
database do |db|
|
68
|
-
db.create_table(:foo) { primary_key(:id); String(:bar) }
|
69
|
-
db[:foo].import([:id, :bar], [[1, 'foo'], [2, 'foo']])
|
70
|
-
end
|
71
|
-
|
72
|
-
ds = Linkage::Dataset.new(@tmpuri, "foo")
|
73
|
-
tmpuri = @tmpuri
|
74
|
-
conf = ds.link_with(ds) do
|
75
|
-
lhs[:bar].must == rhs[:bar]
|
76
|
-
binary(lhs[:bar]).must == binary('foo')
|
77
|
-
binary(rhs[:bar]).must == binary('foo')
|
78
|
-
save_results_in(tmpuri)
|
79
|
-
end
|
80
|
-
runner = Linkage::SingleThreadedRunner.new(conf)
|
81
|
-
runner.execute
|
82
|
-
|
83
|
-
database do |db|
|
84
|
-
assert_equal 1, db[:groups].count
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
@@ -1,85 +0,0 @@
|
|
1
|
-
require 'helper'
|
2
|
-
|
3
|
-
module IntegrationTests
|
4
|
-
class TestResultSet < Test::Unit::TestCase
|
5
|
-
test "#create_tables! creates original_groups table when decollation is needed" do
|
6
|
-
database_for('sqlite') do |db|
|
7
|
-
db.create_table!(:foo) { primary_key(:id); String(:foo) }
|
8
|
-
end
|
9
|
-
|
10
|
-
database_for('mysql') do |db|
|
11
|
-
db.create_table!(:foo) { primary_key(:id); String(:foo) }
|
12
|
-
end
|
13
|
-
|
14
|
-
dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
|
15
|
-
dataset_2 = Linkage::Dataset.new(database_options_for('mysql'), 'foo')
|
16
|
-
results_uri = database_options_for('sqlite')
|
17
|
-
conf = dataset_1.link_with(dataset_2) do
|
18
|
-
lhs[:foo].must == rhs[:foo]
|
19
|
-
save_results_in(results_uri)
|
20
|
-
end
|
21
|
-
conf.result_set.create_tables!
|
22
|
-
assert_include conf.result_set.database.tables, :original_groups
|
23
|
-
end
|
24
|
-
|
25
|
-
test "#create_tables! doesn't create original_groups table when decollation is needed" do
|
26
|
-
database_for('sqlite') do |db|
|
27
|
-
db.create_table!(:foo) { primary_key(:id); String(:foo) }
|
28
|
-
end
|
29
|
-
|
30
|
-
dataset = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
|
31
|
-
results_uri = database_options_for('sqlite')
|
32
|
-
conf = dataset.link_with(dataset) do
|
33
|
-
lhs[:foo].must == rhs[:foo]
|
34
|
-
save_results_in(results_uri)
|
35
|
-
end
|
36
|
-
conf.result_set.create_tables!
|
37
|
-
assert_not_include conf.result_set.database.tables, :original_groups
|
38
|
-
end
|
39
|
-
|
40
|
-
test "#create_tables! doesn't create groups table when not needed" do
|
41
|
-
database_for('sqlite') do |db|
|
42
|
-
db.create_table!(:foo) { primary_key(:id); Integer(:foo) }
|
43
|
-
end
|
44
|
-
|
45
|
-
dataset = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
|
46
|
-
results_uri = database_options_for('sqlite')
|
47
|
-
conf = dataset.link_with(dataset) do
|
48
|
-
lhs[:foo].must be_within(5).of(rhs[:foo])
|
49
|
-
save_results_in(results_uri)
|
50
|
-
end
|
51
|
-
conf.result_set.create_tables!
|
52
|
-
assert_not_include conf.result_set.database.tables, :groups
|
53
|
-
end
|
54
|
-
|
55
|
-
test "#create_tables! creates scores table when there are exhaustive expectations" do
|
56
|
-
database_for('sqlite') do |db|
|
57
|
-
db.create_table!(:foo) { primary_key(:id); Integer(:foo) }
|
58
|
-
end
|
59
|
-
|
60
|
-
dataset = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
|
61
|
-
results_uri = database_options_for('sqlite')
|
62
|
-
conf = dataset.link_with(dataset) do
|
63
|
-
lhs[:foo].must be_within(5).of(rhs[:foo])
|
64
|
-
save_results_in(results_uri)
|
65
|
-
end
|
66
|
-
conf.result_set.create_tables!
|
67
|
-
assert_include conf.result_set.database.tables, :scores
|
68
|
-
end
|
69
|
-
|
70
|
-
test "#create_tables! doesn't create scores table when not needed" do
|
71
|
-
database_for('sqlite') do |db|
|
72
|
-
db.create_table!(:foo) { primary_key(:id); Integer(:foo) }
|
73
|
-
end
|
74
|
-
|
75
|
-
dataset = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
|
76
|
-
results_uri = database_options_for('sqlite')
|
77
|
-
conf = dataset.link_with(dataset) do
|
78
|
-
lhs[:foo].must == rhs[:foo]
|
79
|
-
save_results_in(results_uri)
|
80
|
-
end
|
81
|
-
conf.result_set.create_tables!
|
82
|
-
assert_not_include conf.result_set.database.tables, :scores
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
@@ -1,84 +0,0 @@
|
|
1
|
-
require 'helper'
|
2
|
-
|
3
|
-
module IntegrationTests
|
4
|
-
class TestScoring < Test::Unit::TestCase
|
5
|
-
test "stop scoring if must expectation fails" do
|
6
|
-
database_for('sqlite') do |db|
|
7
|
-
db.create_table(:foo) { primary_key(:id); Integer(:num) }
|
8
|
-
db.create_table(:bar) { primary_key(:id); Integer(:num) }
|
9
|
-
db[:foo].import([:id, :num], [[1, 1]])
|
10
|
-
db[:bar].import([:id, :num], [[1, 5]])
|
11
|
-
end
|
12
|
-
|
13
|
-
db_opts = database_options_for('sqlite')
|
14
|
-
dataset_1 = Linkage::Dataset.new(db_opts, "foo")
|
15
|
-
dataset_2 = Linkage::Dataset.new(db_opts, "bar")
|
16
|
-
conf = dataset_1.link_with(dataset_2) do
|
17
|
-
lhs[:num].must_not be_within(5).of(rhs[:num])
|
18
|
-
lhs[:num].must be_within(5).of(rhs[:num])
|
19
|
-
save_results_in(db_opts)
|
20
|
-
end
|
21
|
-
|
22
|
-
runner = Linkage::SingleThreadedRunner.new(conf)
|
23
|
-
runner.execute
|
24
|
-
|
25
|
-
database_for('sqlite') do |db|
|
26
|
-
assert_equal db[:scores].count, 1
|
27
|
-
record = db[:scores].first
|
28
|
-
assert_equal 1, record[:score]
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
test "scoring phase adds matches as needed" do
|
33
|
-
database_for('sqlite') do |db|
|
34
|
-
db.create_table(:foo) { primary_key(:id); Integer(:num) }
|
35
|
-
db.create_table(:bar) { primary_key(:id); Integer(:num) }
|
36
|
-
db[:foo].import([:id, :num], (0..15).collect { |i| [i, i] })
|
37
|
-
db[:bar].import([:id, :num], (0..15).collect { |i| [i, i] })
|
38
|
-
end
|
39
|
-
|
40
|
-
db_opts = database_options_for('sqlite')
|
41
|
-
dataset_1 = Linkage::Dataset.new(db_opts, "foo")
|
42
|
-
dataset_2 = Linkage::Dataset.new(db_opts, "bar")
|
43
|
-
conf = dataset_1.link_with(dataset_2) do
|
44
|
-
lhs[:num].must be_within(10).of(rhs[:num])
|
45
|
-
lhs[:num].must_not be_within(5).of(rhs[:num])
|
46
|
-
save_results_in(db_opts)
|
47
|
-
end
|
48
|
-
|
49
|
-
runner = Linkage::SingleThreadedRunner.new(conf)
|
50
|
-
runner.execute
|
51
|
-
|
52
|
-
database_for('sqlite') do |db|
|
53
|
-
assert_equal 80, db[:matches].count
|
54
|
-
db[:matches].order(:record_1_id, :record_2_id).each do |row|
|
55
|
-
assert_equal 1, row[:total_score]
|
56
|
-
assert_include 6..10, (row[:record_1_id] - row[:record_2_id]).abs
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
test "optimize scoring for self linkage" do
|
62
|
-
database_for('sqlite') do |db|
|
63
|
-
db.create_table(:foo) { primary_key(:id); Integer(:num) }
|
64
|
-
db[:foo].import([:id, :num], [[1, 1], [2, 5], [3, 10]])
|
65
|
-
end
|
66
|
-
|
67
|
-
db_opts = database_options_for('sqlite')
|
68
|
-
dataset = Linkage::Dataset.new(db_opts, "foo")
|
69
|
-
conf = dataset.link_with(dataset) do
|
70
|
-
lhs[:num].must be_within(5).of(rhs[:num])
|
71
|
-
save_results_in(db_opts)
|
72
|
-
end
|
73
|
-
|
74
|
-
runner = Linkage::SingleThreadedRunner.new(conf)
|
75
|
-
runner.execute
|
76
|
-
|
77
|
-
database_for('sqlite') do |db|
|
78
|
-
assert_equal db[:scores].count, 3
|
79
|
-
scores = db[:scores].order(:record_1_id, :record_2_id).select_map(:score)
|
80
|
-
assert_equal [1, 0, 1], scores
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|