linkage 0.0.8 → 0.1.0.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/.yardopts +1 -0
  4. data/Gemfile +1 -19
  5. data/Gemfile-java +3 -0
  6. data/README.markdown +88 -34
  7. data/Rakefile +16 -15
  8. data/TODO +4 -0
  9. data/lib/linkage/comparator.rb +139 -144
  10. data/lib/linkage/comparators/compare.rb +236 -29
  11. data/lib/linkage/comparators/strcompare.rb +85 -0
  12. data/lib/linkage/comparators/within.rb +24 -20
  13. data/lib/linkage/configuration.rb +44 -466
  14. data/lib/linkage/dataset.rb +28 -127
  15. data/lib/linkage/exceptions.rb +5 -0
  16. data/lib/linkage/field.rb +6 -37
  17. data/lib/linkage/field_set.rb +3 -3
  18. data/lib/linkage/match_recorder.rb +22 -0
  19. data/lib/linkage/match_set.rb +34 -0
  20. data/lib/linkage/match_sets/csv.rb +39 -0
  21. data/lib/linkage/match_sets/database.rb +45 -0
  22. data/lib/linkage/matcher.rb +30 -0
  23. data/lib/linkage/result_set.rb +25 -110
  24. data/lib/linkage/result_sets/csv.rb +54 -0
  25. data/lib/linkage/result_sets/database.rb +42 -0
  26. data/lib/linkage/runner.rb +57 -16
  27. data/lib/linkage/score_recorder.rb +30 -0
  28. data/lib/linkage/score_set.rb +49 -0
  29. data/lib/linkage/score_sets/csv.rb +64 -0
  30. data/lib/linkage/score_sets/database.rb +77 -0
  31. data/lib/linkage/version.rb +1 -1
  32. data/lib/linkage.rb +14 -17
  33. data/linkage.gemspec +13 -1
  34. data/linkage.gemspec-java +32 -0
  35. data/test/helper.rb +30 -23
  36. data/test/integration/test_cross_linkage.rb +46 -25
  37. data/test/integration/test_database_result_set.rb +55 -0
  38. data/test/integration/test_dual_linkage.rb +19 -94
  39. data/test/integration/test_self_linkage.rb +100 -203
  40. data/test/integration/test_within_comparator.rb +24 -77
  41. data/test/unit/comparators/test_compare.rb +254 -50
  42. data/test/unit/comparators/test_strcompare.rb +45 -0
  43. data/test/unit/comparators/test_within.rb +14 -26
  44. data/test/unit/match_sets/test_csv.rb +78 -0
  45. data/test/unit/match_sets/test_database.rb +63 -0
  46. data/test/unit/result_sets/test_csv.rb +111 -0
  47. data/test/unit/result_sets/test_database.rb +68 -0
  48. data/test/unit/score_sets/test_csv.rb +151 -0
  49. data/test/unit/score_sets/test_database.rb +149 -0
  50. data/test/unit/test_comparator.rb +46 -83
  51. data/test/unit/test_comparators.rb +4 -0
  52. data/test/unit/test_configuration.rb +99 -145
  53. data/test/unit/test_dataset.rb +52 -73
  54. data/test/unit/test_field.rb +4 -55
  55. data/test/unit/test_field_set.rb +6 -6
  56. data/test/unit/test_match_recorder.rb +23 -0
  57. data/test/unit/test_match_set.rb +23 -0
  58. data/test/unit/test_match_sets.rb +4 -0
  59. data/test/unit/test_matcher.rb +44 -0
  60. data/test/unit/test_result_set.rb +24 -223
  61. data/test/unit/test_result_sets.rb +4 -0
  62. data/test/unit/test_runner.rb +122 -17
  63. data/test/unit/test_runners.rb +4 -0
  64. data/test/unit/test_score_recorder.rb +25 -0
  65. data/test/unit/test_score_set.rb +37 -0
  66. data/test/unit/test_score_sets.rb +4 -0
  67. metadata +183 -90
  68. data/Gemfile.lock +0 -92
  69. data/lib/linkage/comparators/binary.rb +0 -12
  70. data/lib/linkage/data.rb +0 -175
  71. data/lib/linkage/decollation.rb +0 -93
  72. data/lib/linkage/expectation.rb +0 -21
  73. data/lib/linkage/expectations/exhaustive.rb +0 -63
  74. data/lib/linkage/expectations/simple.rb +0 -168
  75. data/lib/linkage/function.rb +0 -148
  76. data/lib/linkage/functions/binary.rb +0 -30
  77. data/lib/linkage/functions/cast.rb +0 -54
  78. data/lib/linkage/functions/length.rb +0 -29
  79. data/lib/linkage/functions/strftime.rb +0 -33
  80. data/lib/linkage/functions/trim.rb +0 -30
  81. data/lib/linkage/group.rb +0 -55
  82. data/lib/linkage/meta_object.rb +0 -139
  83. data/lib/linkage/runner/single_threaded.rb +0 -187
  84. data/lib/linkage/utils.rb +0 -164
  85. data/lib/linkage/warnings.rb +0 -5
  86. data/test/integration/test_collation.rb +0 -45
  87. data/test/integration/test_configuration.rb +0 -268
  88. data/test/integration/test_dataset.rb +0 -116
  89. data/test/integration/test_functions.rb +0 -88
  90. data/test/integration/test_result_set.rb +0 -85
  91. data/test/integration/test_scoring.rb +0 -84
  92. data/test/unit/expectations/test_exhaustive.rb +0 -111
  93. data/test/unit/expectations/test_simple.rb +0 -303
  94. data/test/unit/functions/test_binary.rb +0 -54
  95. data/test/unit/functions/test_cast.rb +0 -98
  96. data/test/unit/functions/test_length.rb +0 -52
  97. data/test/unit/functions/test_strftime.rb +0 -60
  98. data/test/unit/functions/test_trim.rb +0 -43
  99. data/test/unit/runner/test_single_threaded.rb +0 -12
  100. data/test/unit/test_data.rb +0 -445
  101. data/test/unit/test_decollation.rb +0 -201
  102. data/test/unit/test_function.rb +0 -233
  103. data/test/unit/test_group.rb +0 -38
  104. data/test/unit/test_meta_object.rb +0 -208
  105. data/test/unit/test_utils.rb +0 -341
@@ -1,268 +0,0 @@
1
- require 'helper'
2
-
3
- module IntegrationTests
4
- class TestConfiguration < Test::Unit::TestCase
5
- test "linkage_type is self when the two datasets are the same" do
6
- database_for('sqlite') do |db|
7
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
8
- end
9
-
10
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
11
- conf = Linkage::Configuration.new(dataset, dataset)
12
- assert_equal :self, conf.linkage_type
13
- end
14
-
15
- test "linkage_type is dual when the two datasets are different" do
16
- database_for('sqlite') do |db|
17
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
18
- db.create_table(:bar) { primary_key(:id); String(:foo); String(:bar) }
19
- end
20
-
21
- dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
22
- dataset_2 = Linkage::Dataset.new(database_options_for('sqlite'), "bar")
23
- conf = Linkage::Configuration.new(dataset_1, dataset_2)
24
- assert_equal :dual, conf.linkage_type
25
- end
26
-
27
- test "linkage_type is cross when there's different filters on both sides" do
28
- database_for('sqlite') do |db|
29
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
30
- end
31
-
32
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
33
- conf = Linkage::Configuration.new(dataset, dataset)
34
- conf.configure do
35
- lhs[:foo].must == "foo"
36
- rhs[:foo].must == "bar"
37
- end
38
- assert_equal :cross, conf.linkage_type
39
- end
40
-
41
- test "linkage_type is self when there's identical static filters on each side" do
42
- database_for('sqlite') do |db|
43
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
44
- end
45
-
46
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
47
- conf = Linkage::Configuration.new(dataset, dataset)
48
- conf.configure do
49
- lhs[:foo].must == "foo"
50
- rhs[:foo].must == "foo"
51
- end
52
- assert_equal :self, conf.linkage_type
53
- end
54
-
55
- test "linkage_type is cross when exhaustive expectations use different fields" do
56
- database_for('sqlite') do |db|
57
- db.create_table(:foo) { primary_key(:id); Integer(:foo); Integer(:bar) }
58
- end
59
-
60
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
61
- conf = Linkage::Configuration.new(dataset, dataset)
62
- conf.configure do
63
- lhs[:foo].must(be_within(5).of(rhs[:bar]))
64
- end
65
- assert_equal :cross, conf.linkage_type
66
- end
67
-
68
- test "static expectation" do
69
- database_for('sqlite') do |db|
70
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
71
- end
72
-
73
- dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
74
- conf = Linkage::Configuration.new(dataset_1, dataset_1)
75
- conf.configure do
76
- lhs[:foo].must == "foo"
77
- end
78
-
79
- dataset_2, _ = conf.datasets_with_applied_simple_expectations
80
- assert_equal dataset_2.obj, dataset_1.filter(:foo => "foo").obj
81
- end
82
-
83
- test "complain if an invalid field is accessed" do
84
- database_for('sqlite') do |db|
85
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
86
- end
87
-
88
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
89
- conf = Linkage::Configuration.new(dataset, dataset)
90
- assert_raises(ArgumentError) do
91
- conf.configure do
92
- lhs[:foo].must == rhs[:non_existant_field]
93
- end
94
- end
95
- end
96
-
97
- operators = [:>, :<, :>=, :<=]
98
- operators.each do |operator|
99
- test "DSL #{operator} filter operator" do
100
- database_for('sqlite') do |db|
101
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
102
- end
103
-
104
- dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
105
- conf = Linkage::Configuration.new(dataset_1, dataset_1)
106
- conf.configure do
107
- lhs[:foo].must.send(operator, 123)
108
- end
109
-
110
- expr = Sequel::SQL::BooleanExpression.new(operator, Sequel::SQL::Identifier.new(:foo), 123)
111
- dataset_2, _ = conf.datasets_with_applied_simple_expectations
112
- assert_equal dataset_2.obj, dataset_1.filter(expr).obj
113
- end
114
-
115
- test "comparing two data sources with #{operator}" do
116
- database_for('sqlite') do |db|
117
- db.create_table(:foo) { primary_key(:id); Integer(:foo); Integer(:bar) }
118
- end
119
-
120
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
121
- conf = Linkage::Configuration.new(dataset, dataset)
122
- conf.configure do
123
- lhs[:foo].must.send(operator, rhs[:bar])
124
- end
125
- assert_equal 1, conf.exhaustive_expectations.length
126
-
127
- comp = conf.exhaustive_expectations[0].comparator
128
- assert_instance_of Linkage::Comparators::Compare, comp
129
- end
130
- end
131
-
132
- test "must_not expectation" do
133
- database_for('sqlite') do |db|
134
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
135
- end
136
-
137
- dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
138
- conf = Linkage::Configuration.new(dataset_1, dataset_1)
139
- conf.configure do
140
- lhs[:foo].must_not == "foo"
141
- end
142
-
143
- dataset_2, _ = conf.datasets_with_applied_simple_expectations
144
- assert_equal dataset_2.obj, dataset_1.filter(~{:foo => "foo"}).obj
145
- end
146
-
147
- test "static database function" do
148
- database_for('sqlite') do |db|
149
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
150
- end
151
-
152
- dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
153
- conf = Linkage::Configuration.new(dataset_1, dataset_1)
154
- conf.configure do
155
- lhs[:foo].must == trim("foo")
156
- end
157
-
158
- dataset_2, _ = conf.datasets_with_applied_simple_expectations
159
- assert_equal dataset_1.filter({:foo => :trim.sql_function("foo")}).obj, dataset_2.obj
160
- end
161
-
162
- test "save_results_in" do
163
- database_for('sqlite') do |db|
164
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
165
- end
166
-
167
- dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
168
- conf = Linkage::Configuration.new(dataset_1, dataset_1)
169
- conf.configure do
170
- save_results_in("mysql://localhost/results", {:foo => 'bar'})
171
- end
172
- assert_equal "mysql://localhost/results", conf.results_uri
173
- assert_equal({:foo => 'bar'}, conf.results_uri_options)
174
- end
175
-
176
- test "case insensitive field names" do
177
- database_for('sqlite') do |db|
178
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
179
- end
180
-
181
- assert_nothing_raised do
182
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
183
- results_uri = database_options_for('sqlite')
184
- conf = dataset.link_with(dataset) do
185
- lhs[:Foo].must == rhs[:baR]
186
- save_results_in(results_uri)
187
- end
188
- end
189
- end
190
-
191
- test "decollation_needed? is false when the datasets and results dataset all have the same database and collations" do
192
- database_for('mysql') do |db|
193
- db.create_table!(:foo) { primary_key(:id); String(:foo, :collate => :latin1_swedish_ci) }
194
- db.create_table!(:bar) { primary_key(:id); String(:foo, :collate => :latin1_swedish_ci) }
195
- end
196
-
197
- dataset_1 = Linkage::Dataset.new(database_options_for('mysql'), 'foo')
198
- dataset_2 = Linkage::Dataset.new(database_options_for('mysql'), 'bar')
199
- conf = dataset_1.link_with(dataset_2) do
200
- lhs[:foo].must == rhs[:foo]
201
- end
202
- conf.results_uri = database_options_for('mysql')
203
- assert !conf.decollation_needed?
204
- end
205
-
206
- test "decollation_needed? is true when the datasets have different database types" do
207
- database_for('mysql') do |db|
208
- db.create_table!(:foo) { primary_key(:id); String(:foo) }
209
- end
210
-
211
- database_for('sqlite') do |db|
212
- db.create_table!(:foo) { primary_key(:id); String(:foo) }
213
- end
214
-
215
- dataset_1 = Linkage::Dataset.new(database_options_for('mysql'), 'foo')
216
- dataset_2 = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
217
- conf = dataset_1.link_with(dataset_2) do
218
- lhs[:foo].must == rhs[:foo]
219
- end
220
- conf.results_uri = database_options_for('mysql')
221
- assert conf.decollation_needed?
222
- end
223
-
224
- test "decollation_needed? is true when the result dataset has different database type than the datasets" do
225
- database_for('mysql') do |db|
226
- db.create_table!(:foo) { primary_key(:id); String(:foo) }
227
- db.create_table!(:bar) { primary_key(:id); String(:foo) }
228
- end
229
-
230
- dataset_1 = Linkage::Dataset.new(database_options_for('mysql'), 'foo')
231
- dataset_2 = Linkage::Dataset.new(database_options_for('mysql'), 'bar')
232
- conf = dataset_1.link_with(dataset_2) do
233
- lhs[:foo].must == rhs[:foo]
234
- end
235
- conf.results_uri = database_options_for('sqlite')
236
- assert conf.decollation_needed?
237
- end
238
-
239
- test "decollation_needed? is false when not comparing string columns" do
240
- database_for('mysql') do |db|
241
- db.create_table!(:foo) { primary_key(:id); Fixnum(:foo) }
242
- end
243
-
244
- database_for('sqlite') do |db|
245
- db.create_table!(:foo) { primary_key(:id); Fixnum(:foo) }
246
- end
247
-
248
- dataset_1 = Linkage::Dataset.new(database_options_for('mysql'), 'foo')
249
- dataset_2 = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
250
- conf = dataset_1.link_with(dataset_2) do
251
- lhs[:foo].must == rhs[:foo]
252
- end
253
- conf.results_uri = database_options_for('mysql')
254
- assert !conf.decollation_needed?
255
- end
256
-
257
- test "creating comparator expectation for within" do
258
- database_for('mysql') do |db|
259
- db.create_table!(:foo) { primary_key(:id); Integer(:foo) }
260
- end
261
- dataset = Linkage::Dataset.new(database_options_for('mysql'), 'foo')
262
-
263
- conf = dataset.link_with(dataset) do
264
- lhs[:foo].must be_within(5).of(rhs[:foo])
265
- end
266
- end
267
- end
268
- end
@@ -1,116 +0,0 @@
1
- require 'helper'
2
-
3
- class IntegrationTests::TestDataset < Test::Unit::TestCase
4
- def setup
5
- @tmpdir = Dir.mktmpdir('linkage')
6
- @tmpuri = "sqlite://" + File.join(@tmpdir, "foo")
7
- end
8
-
9
- def database(&block)
10
- Sequel.connect(@tmpuri, &block)
11
- end
12
-
13
- def teardown
14
- FileUtils.remove_entry_secure(@tmpdir)
15
- end
16
-
17
- test "methods that clone the dataset" do
18
- database do |db|
19
- db.create_table(:foo) do
20
- primary_key :id
21
- String :bar
22
- end
23
- end
24
- ds_1 = Linkage::Dataset.new(@tmpuri, "foo")
25
- ds_2 = ds_1.filter(:foo => 'bar')
26
- assert_instance_of Linkage::Dataset, ds_2
27
- assert_equal ds_2.field_set, ds_1.field_set
28
- assert_match /`foo` = 'bar'/, ds_2.sql
29
- end
30
-
31
- test "each_group" do
32
- database do |db|
33
- db.create_table(:foo) do
34
- primary_key :id
35
- String :bar
36
- end
37
- db[:foo].import([:id, :bar], [[1, 'foo'], [2, 'foo'], [3, 'bar'], [4, 'baz']])
38
- end
39
-
40
- ds = Linkage::Dataset.new(@tmpuri, "foo")
41
- ds = ds.group_match(Linkage::MetaObject.new(ds.field_set[:bar]))
42
- ds.each_group do |group|
43
- assert_equal({:bar => "foo"}, group.values)
44
- assert_equal(2, group.count)
45
- end
46
-
47
- groups = []
48
- ds.each_group(1) do |group|
49
- groups << group
50
- end
51
- assert_equal 3, groups.length
52
- end
53
-
54
- test "each_group with alias" do
55
- database do |db|
56
- db.create_table(:foo) do
57
- primary_key :id
58
- String :bar
59
- end
60
- db[:foo].import([:id, :bar], [[1, 'foo'], [2, 'foo'], [3, 'bar'], [4, 'baz']])
61
- end
62
-
63
- ds = Linkage::Dataset.new(@tmpuri, "foo")
64
- ds = ds.group_match({
65
- :meta_object => Linkage::MetaObject.new(ds.field_set[:bar]),
66
- :alias => :bar_baz
67
- })
68
- ds.each_group do |group|
69
- assert_equal({:bar_baz => "foo"}, group.values)
70
- assert_equal(2, group.count)
71
- end
72
- end
73
-
74
- test "each_group with filters" do
75
- database do |db|
76
- db.create_table(:foo) do
77
- primary_key :id
78
- String :bar
79
- Integer :baz
80
- end
81
- db[:foo].import([:id, :bar, :baz], [[1, 'foo', 1], [2, 'foo', 2], [3, 'bar', 3], [4, 'baz', 4]])
82
- end
83
-
84
- ds = Linkage::Dataset.new(@tmpuri, "foo")
85
- ds = ds.group_match(Linkage::MetaObject.new(ds.field_set[:bar]))
86
- ds = ds.filter { baz >= 3 }
87
- groups = []
88
- ds.each_group(1) do |group|
89
- groups << group
90
- end
91
- assert_equal 2, groups.length
92
- end
93
-
94
- test "each_group with collation" do
95
- database_for('mysql') do |db|
96
- db.create_table!(:foo) do
97
- primary_key :id
98
- String :bar, :collate => :latin1_swedish_ci
99
- end
100
- db[:foo].import([:id, :bar], [[1, 'fOo'], [2, 'foO'], [3, 'bar'], [4, 'baz']])
101
- end
102
-
103
- ds = Linkage::Dataset.new(database_options_for('mysql'), "foo")
104
- ds = ds.group_match(Linkage::MetaObject.new(ds.field_set[:bar]))
105
- groups = []
106
- ds.each_group(1) do |group|
107
- groups << group
108
- end
109
- expected = [
110
- {:bar => 'BAR'},
111
- {:bar => 'BAZ'},
112
- {:bar => 'FOO'}
113
- ]
114
- assert_equal expected, groups.collect(&:decollated_values)
115
- end
116
- end
@@ -1,88 +0,0 @@
1
- require 'helper'
2
-
3
- module IntegrationTests
4
- class TestFunctions < Test::Unit::TestCase
5
- def setup
6
- @tmpdir = Dir.mktmpdir('linkage')
7
- @tmpuri = "sqlite://" + File.join(@tmpdir, "foo")
8
- end
9
-
10
- def database(options = {}, &block)
11
- Sequel.connect(@tmpuri, options, &block)
12
- end
13
-
14
- def teardown
15
- FileUtils.remove_entry_secure(@tmpdir)
16
- end
17
-
18
- test "match functions" do
19
- # insert the test data
20
- database do |db|
21
- db.create_table(:foo) { primary_key(:id); String(:bar) }
22
- db[:foo].import([:id, :bar],
23
- Array.new(100) { |i| [i, "bar%s" % (" " * (i % 10))] })
24
- end
25
-
26
- ds = Linkage::Dataset.new(@tmpuri, "foo", :single_threaded => true)
27
- tmpuri = @tmpuri
28
- conf = ds.link_with(ds) do
29
- trim(lhs[:bar]).must == trim(rhs[:bar])
30
- save_results_in(tmpuri)
31
- end
32
- assert_equal :self, conf.linkage_type
33
- runner = Linkage::SingleThreadedRunner.new(conf)
34
- runner.execute
35
-
36
- database do |db|
37
- assert_equal 1, db[:groups].count
38
- end
39
- end
40
-
41
- test "strftime in sqlite" do
42
- #logger = Logger.new(STDERR)
43
- #database(:logger => logger) do |db|
44
- database do |db|
45
- db.create_table(:foo) { primary_key(:id); Date(:foo_date) }
46
- db.create_table(:bar) { primary_key(:id); String(:bar_string) }
47
- db[:foo].insert({:id => 1, :foo_date => Date.today})
48
- db[:bar].insert({:id => 1, :bar_string => Date.today.strftime("%Y-%m-%d")})
49
- end
50
-
51
- ds_1 = Linkage::Dataset.new(@tmpuri, "foo", :single_threaded => true)
52
- ds_2 = Linkage::Dataset.new(@tmpuri, "bar", :single_threaded => true)
53
- tmpuri = @tmpuri
54
- conf = ds_1.link_with(ds_2) do
55
- strftime(lhs[:foo_date], "%Y-%m-%d").must == rhs[:bar_string]
56
- save_results_in(tmpuri)
57
- end
58
- runner = Linkage::SingleThreadedRunner.new(conf)
59
- runner.execute
60
-
61
- database do |db|
62
- assert_equal 1, db[:groups].count
63
- end
64
- end
65
-
66
- test "binary function with static argument" do
67
- database do |db|
68
- db.create_table(:foo) { primary_key(:id); String(:bar) }
69
- db[:foo].import([:id, :bar], [[1, 'foo'], [2, 'foo']])
70
- end
71
-
72
- ds = Linkage::Dataset.new(@tmpuri, "foo")
73
- tmpuri = @tmpuri
74
- conf = ds.link_with(ds) do
75
- lhs[:bar].must == rhs[:bar]
76
- binary(lhs[:bar]).must == binary('foo')
77
- binary(rhs[:bar]).must == binary('foo')
78
- save_results_in(tmpuri)
79
- end
80
- runner = Linkage::SingleThreadedRunner.new(conf)
81
- runner.execute
82
-
83
- database do |db|
84
- assert_equal 1, db[:groups].count
85
- end
86
- end
87
- end
88
- end
@@ -1,85 +0,0 @@
1
- require 'helper'
2
-
3
- module IntegrationTests
4
- class TestResultSet < Test::Unit::TestCase
5
- test "#create_tables! creates original_groups table when decollation is needed" do
6
- database_for('sqlite') do |db|
7
- db.create_table!(:foo) { primary_key(:id); String(:foo) }
8
- end
9
-
10
- database_for('mysql') do |db|
11
- db.create_table!(:foo) { primary_key(:id); String(:foo) }
12
- end
13
-
14
- dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
15
- dataset_2 = Linkage::Dataset.new(database_options_for('mysql'), 'foo')
16
- results_uri = database_options_for('sqlite')
17
- conf = dataset_1.link_with(dataset_2) do
18
- lhs[:foo].must == rhs[:foo]
19
- save_results_in(results_uri)
20
- end
21
- conf.result_set.create_tables!
22
- assert_include conf.result_set.database.tables, :original_groups
23
- end
24
-
25
- test "#create_tables! doesn't create original_groups table when decollation is needed" do
26
- database_for('sqlite') do |db|
27
- db.create_table!(:foo) { primary_key(:id); String(:foo) }
28
- end
29
-
30
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
31
- results_uri = database_options_for('sqlite')
32
- conf = dataset.link_with(dataset) do
33
- lhs[:foo].must == rhs[:foo]
34
- save_results_in(results_uri)
35
- end
36
- conf.result_set.create_tables!
37
- assert_not_include conf.result_set.database.tables, :original_groups
38
- end
39
-
40
- test "#create_tables! doesn't create groups table when not needed" do
41
- database_for('sqlite') do |db|
42
- db.create_table!(:foo) { primary_key(:id); Integer(:foo) }
43
- end
44
-
45
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
46
- results_uri = database_options_for('sqlite')
47
- conf = dataset.link_with(dataset) do
48
- lhs[:foo].must be_within(5).of(rhs[:foo])
49
- save_results_in(results_uri)
50
- end
51
- conf.result_set.create_tables!
52
- assert_not_include conf.result_set.database.tables, :groups
53
- end
54
-
55
- test "#create_tables! creates scores table when there are exhaustive expectations" do
56
- database_for('sqlite') do |db|
57
- db.create_table!(:foo) { primary_key(:id); Integer(:foo) }
58
- end
59
-
60
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
61
- results_uri = database_options_for('sqlite')
62
- conf = dataset.link_with(dataset) do
63
- lhs[:foo].must be_within(5).of(rhs[:foo])
64
- save_results_in(results_uri)
65
- end
66
- conf.result_set.create_tables!
67
- assert_include conf.result_set.database.tables, :scores
68
- end
69
-
70
- test "#create_tables! doesn't create scores table when not needed" do
71
- database_for('sqlite') do |db|
72
- db.create_table!(:foo) { primary_key(:id); Integer(:foo) }
73
- end
74
-
75
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
76
- results_uri = database_options_for('sqlite')
77
- conf = dataset.link_with(dataset) do
78
- lhs[:foo].must == rhs[:foo]
79
- save_results_in(results_uri)
80
- end
81
- conf.result_set.create_tables!
82
- assert_not_include conf.result_set.database.tables, :scores
83
- end
84
- end
85
- end
@@ -1,84 +0,0 @@
1
- require 'helper'
2
-
3
- module IntegrationTests
4
- class TestScoring < Test::Unit::TestCase
5
- test "stop scoring if must expectation fails" do
6
- database_for('sqlite') do |db|
7
- db.create_table(:foo) { primary_key(:id); Integer(:num) }
8
- db.create_table(:bar) { primary_key(:id); Integer(:num) }
9
- db[:foo].import([:id, :num], [[1, 1]])
10
- db[:bar].import([:id, :num], [[1, 5]])
11
- end
12
-
13
- db_opts = database_options_for('sqlite')
14
- dataset_1 = Linkage::Dataset.new(db_opts, "foo")
15
- dataset_2 = Linkage::Dataset.new(db_opts, "bar")
16
- conf = dataset_1.link_with(dataset_2) do
17
- lhs[:num].must_not be_within(5).of(rhs[:num])
18
- lhs[:num].must be_within(5).of(rhs[:num])
19
- save_results_in(db_opts)
20
- end
21
-
22
- runner = Linkage::SingleThreadedRunner.new(conf)
23
- runner.execute
24
-
25
- database_for('sqlite') do |db|
26
- assert_equal db[:scores].count, 1
27
- record = db[:scores].first
28
- assert_equal 1, record[:score]
29
- end
30
- end
31
-
32
- test "scoring phase adds matches as needed" do
33
- database_for('sqlite') do |db|
34
- db.create_table(:foo) { primary_key(:id); Integer(:num) }
35
- db.create_table(:bar) { primary_key(:id); Integer(:num) }
36
- db[:foo].import([:id, :num], (0..15).collect { |i| [i, i] })
37
- db[:bar].import([:id, :num], (0..15).collect { |i| [i, i] })
38
- end
39
-
40
- db_opts = database_options_for('sqlite')
41
- dataset_1 = Linkage::Dataset.new(db_opts, "foo")
42
- dataset_2 = Linkage::Dataset.new(db_opts, "bar")
43
- conf = dataset_1.link_with(dataset_2) do
44
- lhs[:num].must be_within(10).of(rhs[:num])
45
- lhs[:num].must_not be_within(5).of(rhs[:num])
46
- save_results_in(db_opts)
47
- end
48
-
49
- runner = Linkage::SingleThreadedRunner.new(conf)
50
- runner.execute
51
-
52
- database_for('sqlite') do |db|
53
- assert_equal 80, db[:matches].count
54
- db[:matches].order(:record_1_id, :record_2_id).each do |row|
55
- assert_equal 1, row[:total_score]
56
- assert_include 6..10, (row[:record_1_id] - row[:record_2_id]).abs
57
- end
58
- end
59
- end
60
-
61
- test "optimize scoring for self linkage" do
62
- database_for('sqlite') do |db|
63
- db.create_table(:foo) { primary_key(:id); Integer(:num) }
64
- db[:foo].import([:id, :num], [[1, 1], [2, 5], [3, 10]])
65
- end
66
-
67
- db_opts = database_options_for('sqlite')
68
- dataset = Linkage::Dataset.new(db_opts, "foo")
69
- conf = dataset.link_with(dataset) do
70
- lhs[:num].must be_within(5).of(rhs[:num])
71
- save_results_in(db_opts)
72
- end
73
-
74
- runner = Linkage::SingleThreadedRunner.new(conf)
75
- runner.execute
76
-
77
- database_for('sqlite') do |db|
78
- assert_equal db[:scores].count, 3
79
- scores = db[:scores].order(:record_1_id, :record_2_id).select_map(:score)
80
- assert_equal [1, 0, 1], scores
81
- end
82
- end
83
- end
84
- end