linkage 0.0.8 → 0.1.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/.yardopts +1 -0
  4. data/Gemfile +1 -19
  5. data/Gemfile-java +3 -0
  6. data/README.markdown +88 -34
  7. data/Rakefile +16 -15
  8. data/TODO +4 -0
  9. data/lib/linkage/comparator.rb +139 -144
  10. data/lib/linkage/comparators/compare.rb +236 -29
  11. data/lib/linkage/comparators/strcompare.rb +85 -0
  12. data/lib/linkage/comparators/within.rb +24 -20
  13. data/lib/linkage/configuration.rb +44 -466
  14. data/lib/linkage/dataset.rb +28 -127
  15. data/lib/linkage/exceptions.rb +5 -0
  16. data/lib/linkage/field.rb +6 -37
  17. data/lib/linkage/field_set.rb +3 -3
  18. data/lib/linkage/match_recorder.rb +22 -0
  19. data/lib/linkage/match_set.rb +34 -0
  20. data/lib/linkage/match_sets/csv.rb +39 -0
  21. data/lib/linkage/match_sets/database.rb +45 -0
  22. data/lib/linkage/matcher.rb +30 -0
  23. data/lib/linkage/result_set.rb +25 -110
  24. data/lib/linkage/result_sets/csv.rb +54 -0
  25. data/lib/linkage/result_sets/database.rb +42 -0
  26. data/lib/linkage/runner.rb +57 -16
  27. data/lib/linkage/score_recorder.rb +30 -0
  28. data/lib/linkage/score_set.rb +49 -0
  29. data/lib/linkage/score_sets/csv.rb +64 -0
  30. data/lib/linkage/score_sets/database.rb +77 -0
  31. data/lib/linkage/version.rb +1 -1
  32. data/lib/linkage.rb +14 -17
  33. data/linkage.gemspec +13 -1
  34. data/linkage.gemspec-java +32 -0
  35. data/test/helper.rb +30 -23
  36. data/test/integration/test_cross_linkage.rb +46 -25
  37. data/test/integration/test_database_result_set.rb +55 -0
  38. data/test/integration/test_dual_linkage.rb +19 -94
  39. data/test/integration/test_self_linkage.rb +100 -203
  40. data/test/integration/test_within_comparator.rb +24 -77
  41. data/test/unit/comparators/test_compare.rb +254 -50
  42. data/test/unit/comparators/test_strcompare.rb +45 -0
  43. data/test/unit/comparators/test_within.rb +14 -26
  44. data/test/unit/match_sets/test_csv.rb +78 -0
  45. data/test/unit/match_sets/test_database.rb +63 -0
  46. data/test/unit/result_sets/test_csv.rb +111 -0
  47. data/test/unit/result_sets/test_database.rb +68 -0
  48. data/test/unit/score_sets/test_csv.rb +151 -0
  49. data/test/unit/score_sets/test_database.rb +149 -0
  50. data/test/unit/test_comparator.rb +46 -83
  51. data/test/unit/test_comparators.rb +4 -0
  52. data/test/unit/test_configuration.rb +99 -145
  53. data/test/unit/test_dataset.rb +52 -73
  54. data/test/unit/test_field.rb +4 -55
  55. data/test/unit/test_field_set.rb +6 -6
  56. data/test/unit/test_match_recorder.rb +23 -0
  57. data/test/unit/test_match_set.rb +23 -0
  58. data/test/unit/test_match_sets.rb +4 -0
  59. data/test/unit/test_matcher.rb +44 -0
  60. data/test/unit/test_result_set.rb +24 -223
  61. data/test/unit/test_result_sets.rb +4 -0
  62. data/test/unit/test_runner.rb +122 -17
  63. data/test/unit/test_runners.rb +4 -0
  64. data/test/unit/test_score_recorder.rb +25 -0
  65. data/test/unit/test_score_set.rb +37 -0
  66. data/test/unit/test_score_sets.rb +4 -0
  67. metadata +183 -90
  68. data/Gemfile.lock +0 -92
  69. data/lib/linkage/comparators/binary.rb +0 -12
  70. data/lib/linkage/data.rb +0 -175
  71. data/lib/linkage/decollation.rb +0 -93
  72. data/lib/linkage/expectation.rb +0 -21
  73. data/lib/linkage/expectations/exhaustive.rb +0 -63
  74. data/lib/linkage/expectations/simple.rb +0 -168
  75. data/lib/linkage/function.rb +0 -148
  76. data/lib/linkage/functions/binary.rb +0 -30
  77. data/lib/linkage/functions/cast.rb +0 -54
  78. data/lib/linkage/functions/length.rb +0 -29
  79. data/lib/linkage/functions/strftime.rb +0 -33
  80. data/lib/linkage/functions/trim.rb +0 -30
  81. data/lib/linkage/group.rb +0 -55
  82. data/lib/linkage/meta_object.rb +0 -139
  83. data/lib/linkage/runner/single_threaded.rb +0 -187
  84. data/lib/linkage/utils.rb +0 -164
  85. data/lib/linkage/warnings.rb +0 -5
  86. data/test/integration/test_collation.rb +0 -45
  87. data/test/integration/test_configuration.rb +0 -268
  88. data/test/integration/test_dataset.rb +0 -116
  89. data/test/integration/test_functions.rb +0 -88
  90. data/test/integration/test_result_set.rb +0 -85
  91. data/test/integration/test_scoring.rb +0 -84
  92. data/test/unit/expectations/test_exhaustive.rb +0 -111
  93. data/test/unit/expectations/test_simple.rb +0 -303
  94. data/test/unit/functions/test_binary.rb +0 -54
  95. data/test/unit/functions/test_cast.rb +0 -98
  96. data/test/unit/functions/test_length.rb +0 -52
  97. data/test/unit/functions/test_strftime.rb +0 -60
  98. data/test/unit/functions/test_trim.rb +0 -43
  99. data/test/unit/runner/test_single_threaded.rb +0 -12
  100. data/test/unit/test_data.rb +0 -445
  101. data/test/unit/test_decollation.rb +0 -201
  102. data/test/unit/test_function.rb +0 -233
  103. data/test/unit/test_group.rb +0 -38
  104. data/test/unit/test_meta_object.rb +0 -208
  105. data/test/unit/test_utils.rb +0 -341
@@ -1,268 +0,0 @@
1
- require 'helper'
2
-
3
- module IntegrationTests
4
- class TestConfiguration < Test::Unit::TestCase
5
- test "linkage_type is self when the two datasets are the same" do
6
- database_for('sqlite') do |db|
7
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
8
- end
9
-
10
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
11
- conf = Linkage::Configuration.new(dataset, dataset)
12
- assert_equal :self, conf.linkage_type
13
- end
14
-
15
- test "linkage_type is dual when the two datasets are different" do
16
- database_for('sqlite') do |db|
17
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
18
- db.create_table(:bar) { primary_key(:id); String(:foo); String(:bar) }
19
- end
20
-
21
- dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
22
- dataset_2 = Linkage::Dataset.new(database_options_for('sqlite'), "bar")
23
- conf = Linkage::Configuration.new(dataset_1, dataset_2)
24
- assert_equal :dual, conf.linkage_type
25
- end
26
-
27
- test "linkage_type is cross when there's different filters on both sides" do
28
- database_for('sqlite') do |db|
29
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
30
- end
31
-
32
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
33
- conf = Linkage::Configuration.new(dataset, dataset)
34
- conf.configure do
35
- lhs[:foo].must == "foo"
36
- rhs[:foo].must == "bar"
37
- end
38
- assert_equal :cross, conf.linkage_type
39
- end
40
-
41
- test "linkage_type is self when there's identical static filters on each side" do
42
- database_for('sqlite') do |db|
43
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
44
- end
45
-
46
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
47
- conf = Linkage::Configuration.new(dataset, dataset)
48
- conf.configure do
49
- lhs[:foo].must == "foo"
50
- rhs[:foo].must == "foo"
51
- end
52
- assert_equal :self, conf.linkage_type
53
- end
54
-
55
- test "linkage_type is cross when exhaustive expectations use different fields" do
56
- database_for('sqlite') do |db|
57
- db.create_table(:foo) { primary_key(:id); Integer(:foo); Integer(:bar) }
58
- end
59
-
60
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
61
- conf = Linkage::Configuration.new(dataset, dataset)
62
- conf.configure do
63
- lhs[:foo].must(be_within(5).of(rhs[:bar]))
64
- end
65
- assert_equal :cross, conf.linkage_type
66
- end
67
-
68
- test "static expectation" do
69
- database_for('sqlite') do |db|
70
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
71
- end
72
-
73
- dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
74
- conf = Linkage::Configuration.new(dataset_1, dataset_1)
75
- conf.configure do
76
- lhs[:foo].must == "foo"
77
- end
78
-
79
- dataset_2, _ = conf.datasets_with_applied_simple_expectations
80
- assert_equal dataset_2.obj, dataset_1.filter(:foo => "foo").obj
81
- end
82
-
83
- test "complain if an invalid field is accessed" do
84
- database_for('sqlite') do |db|
85
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
86
- end
87
-
88
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
89
- conf = Linkage::Configuration.new(dataset, dataset)
90
- assert_raises(ArgumentError) do
91
- conf.configure do
92
- lhs[:foo].must == rhs[:non_existant_field]
93
- end
94
- end
95
- end
96
-
97
- operators = [:>, :<, :>=, :<=]
98
- operators.each do |operator|
99
- test "DSL #{operator} filter operator" do
100
- database_for('sqlite') do |db|
101
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
102
- end
103
-
104
- dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
105
- conf = Linkage::Configuration.new(dataset_1, dataset_1)
106
- conf.configure do
107
- lhs[:foo].must.send(operator, 123)
108
- end
109
-
110
- expr = Sequel::SQL::BooleanExpression.new(operator, Sequel::SQL::Identifier.new(:foo), 123)
111
- dataset_2, _ = conf.datasets_with_applied_simple_expectations
112
- assert_equal dataset_2.obj, dataset_1.filter(expr).obj
113
- end
114
-
115
- test "comparing two data sources with #{operator}" do
116
- database_for('sqlite') do |db|
117
- db.create_table(:foo) { primary_key(:id); Integer(:foo); Integer(:bar) }
118
- end
119
-
120
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
121
- conf = Linkage::Configuration.new(dataset, dataset)
122
- conf.configure do
123
- lhs[:foo].must.send(operator, rhs[:bar])
124
- end
125
- assert_equal 1, conf.exhaustive_expectations.length
126
-
127
- comp = conf.exhaustive_expectations[0].comparator
128
- assert_instance_of Linkage::Comparators::Compare, comp
129
- end
130
- end
131
-
132
- test "must_not expectation" do
133
- database_for('sqlite') do |db|
134
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
135
- end
136
-
137
- dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
138
- conf = Linkage::Configuration.new(dataset_1, dataset_1)
139
- conf.configure do
140
- lhs[:foo].must_not == "foo"
141
- end
142
-
143
- dataset_2, _ = conf.datasets_with_applied_simple_expectations
144
- assert_equal dataset_2.obj, dataset_1.filter(~{:foo => "foo"}).obj
145
- end
146
-
147
- test "static database function" do
148
- database_for('sqlite') do |db|
149
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
150
- end
151
-
152
- dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
153
- conf = Linkage::Configuration.new(dataset_1, dataset_1)
154
- conf.configure do
155
- lhs[:foo].must == trim("foo")
156
- end
157
-
158
- dataset_2, _ = conf.datasets_with_applied_simple_expectations
159
- assert_equal dataset_1.filter({:foo => :trim.sql_function("foo")}).obj, dataset_2.obj
160
- end
161
-
162
- test "save_results_in" do
163
- database_for('sqlite') do |db|
164
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
165
- end
166
-
167
- dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
168
- conf = Linkage::Configuration.new(dataset_1, dataset_1)
169
- conf.configure do
170
- save_results_in("mysql://localhost/results", {:foo => 'bar'})
171
- end
172
- assert_equal "mysql://localhost/results", conf.results_uri
173
- assert_equal({:foo => 'bar'}, conf.results_uri_options)
174
- end
175
-
176
- test "case insensitive field names" do
177
- database_for('sqlite') do |db|
178
- db.create_table(:foo) { primary_key(:id); String(:foo); String(:bar) }
179
- end
180
-
181
- assert_nothing_raised do
182
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), "foo")
183
- results_uri = database_options_for('sqlite')
184
- conf = dataset.link_with(dataset) do
185
- lhs[:Foo].must == rhs[:baR]
186
- save_results_in(results_uri)
187
- end
188
- end
189
- end
190
-
191
- test "decollation_needed? is false when the datasets and results dataset all have the same database and collations" do
192
- database_for('mysql') do |db|
193
- db.create_table!(:foo) { primary_key(:id); String(:foo, :collate => :latin1_swedish_ci) }
194
- db.create_table!(:bar) { primary_key(:id); String(:foo, :collate => :latin1_swedish_ci) }
195
- end
196
-
197
- dataset_1 = Linkage::Dataset.new(database_options_for('mysql'), 'foo')
198
- dataset_2 = Linkage::Dataset.new(database_options_for('mysql'), 'bar')
199
- conf = dataset_1.link_with(dataset_2) do
200
- lhs[:foo].must == rhs[:foo]
201
- end
202
- conf.results_uri = database_options_for('mysql')
203
- assert !conf.decollation_needed?
204
- end
205
-
206
- test "decollation_needed? is true when the datasets have different database types" do
207
- database_for('mysql') do |db|
208
- db.create_table!(:foo) { primary_key(:id); String(:foo) }
209
- end
210
-
211
- database_for('sqlite') do |db|
212
- db.create_table!(:foo) { primary_key(:id); String(:foo) }
213
- end
214
-
215
- dataset_1 = Linkage::Dataset.new(database_options_for('mysql'), 'foo')
216
- dataset_2 = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
217
- conf = dataset_1.link_with(dataset_2) do
218
- lhs[:foo].must == rhs[:foo]
219
- end
220
- conf.results_uri = database_options_for('mysql')
221
- assert conf.decollation_needed?
222
- end
223
-
224
- test "decollation_needed? is true when the result dataset has different database type than the datasets" do
225
- database_for('mysql') do |db|
226
- db.create_table!(:foo) { primary_key(:id); String(:foo) }
227
- db.create_table!(:bar) { primary_key(:id); String(:foo) }
228
- end
229
-
230
- dataset_1 = Linkage::Dataset.new(database_options_for('mysql'), 'foo')
231
- dataset_2 = Linkage::Dataset.new(database_options_for('mysql'), 'bar')
232
- conf = dataset_1.link_with(dataset_2) do
233
- lhs[:foo].must == rhs[:foo]
234
- end
235
- conf.results_uri = database_options_for('sqlite')
236
- assert conf.decollation_needed?
237
- end
238
-
239
- test "decollation_needed? is false when not comparing string columns" do
240
- database_for('mysql') do |db|
241
- db.create_table!(:foo) { primary_key(:id); Fixnum(:foo) }
242
- end
243
-
244
- database_for('sqlite') do |db|
245
- db.create_table!(:foo) { primary_key(:id); Fixnum(:foo) }
246
- end
247
-
248
- dataset_1 = Linkage::Dataset.new(database_options_for('mysql'), 'foo')
249
- dataset_2 = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
250
- conf = dataset_1.link_with(dataset_2) do
251
- lhs[:foo].must == rhs[:foo]
252
- end
253
- conf.results_uri = database_options_for('mysql')
254
- assert !conf.decollation_needed?
255
- end
256
-
257
- test "creating comparator expectation for within" do
258
- database_for('mysql') do |db|
259
- db.create_table!(:foo) { primary_key(:id); Integer(:foo) }
260
- end
261
- dataset = Linkage::Dataset.new(database_options_for('mysql'), 'foo')
262
-
263
- conf = dataset.link_with(dataset) do
264
- lhs[:foo].must be_within(5).of(rhs[:foo])
265
- end
266
- end
267
- end
268
- end
@@ -1,116 +0,0 @@
1
- require 'helper'
2
-
3
- class IntegrationTests::TestDataset < Test::Unit::TestCase
4
- def setup
5
- @tmpdir = Dir.mktmpdir('linkage')
6
- @tmpuri = "sqlite://" + File.join(@tmpdir, "foo")
7
- end
8
-
9
- def database(&block)
10
- Sequel.connect(@tmpuri, &block)
11
- end
12
-
13
- def teardown
14
- FileUtils.remove_entry_secure(@tmpdir)
15
- end
16
-
17
- test "methods that clone the dataset" do
18
- database do |db|
19
- db.create_table(:foo) do
20
- primary_key :id
21
- String :bar
22
- end
23
- end
24
- ds_1 = Linkage::Dataset.new(@tmpuri, "foo")
25
- ds_2 = ds_1.filter(:foo => 'bar')
26
- assert_instance_of Linkage::Dataset, ds_2
27
- assert_equal ds_2.field_set, ds_1.field_set
28
- assert_match /`foo` = 'bar'/, ds_2.sql
29
- end
30
-
31
- test "each_group" do
32
- database do |db|
33
- db.create_table(:foo) do
34
- primary_key :id
35
- String :bar
36
- end
37
- db[:foo].import([:id, :bar], [[1, 'foo'], [2, 'foo'], [3, 'bar'], [4, 'baz']])
38
- end
39
-
40
- ds = Linkage::Dataset.new(@tmpuri, "foo")
41
- ds = ds.group_match(Linkage::MetaObject.new(ds.field_set[:bar]))
42
- ds.each_group do |group|
43
- assert_equal({:bar => "foo"}, group.values)
44
- assert_equal(2, group.count)
45
- end
46
-
47
- groups = []
48
- ds.each_group(1) do |group|
49
- groups << group
50
- end
51
- assert_equal 3, groups.length
52
- end
53
-
54
- test "each_group with alias" do
55
- database do |db|
56
- db.create_table(:foo) do
57
- primary_key :id
58
- String :bar
59
- end
60
- db[:foo].import([:id, :bar], [[1, 'foo'], [2, 'foo'], [3, 'bar'], [4, 'baz']])
61
- end
62
-
63
- ds = Linkage::Dataset.new(@tmpuri, "foo")
64
- ds = ds.group_match({
65
- :meta_object => Linkage::MetaObject.new(ds.field_set[:bar]),
66
- :alias => :bar_baz
67
- })
68
- ds.each_group do |group|
69
- assert_equal({:bar_baz => "foo"}, group.values)
70
- assert_equal(2, group.count)
71
- end
72
- end
73
-
74
- test "each_group with filters" do
75
- database do |db|
76
- db.create_table(:foo) do
77
- primary_key :id
78
- String :bar
79
- Integer :baz
80
- end
81
- db[:foo].import([:id, :bar, :baz], [[1, 'foo', 1], [2, 'foo', 2], [3, 'bar', 3], [4, 'baz', 4]])
82
- end
83
-
84
- ds = Linkage::Dataset.new(@tmpuri, "foo")
85
- ds = ds.group_match(Linkage::MetaObject.new(ds.field_set[:bar]))
86
- ds = ds.filter { baz >= 3 }
87
- groups = []
88
- ds.each_group(1) do |group|
89
- groups << group
90
- end
91
- assert_equal 2, groups.length
92
- end
93
-
94
- test "each_group with collation" do
95
- database_for('mysql') do |db|
96
- db.create_table!(:foo) do
97
- primary_key :id
98
- String :bar, :collate => :latin1_swedish_ci
99
- end
100
- db[:foo].import([:id, :bar], [[1, 'fOo'], [2, 'foO'], [3, 'bar'], [4, 'baz']])
101
- end
102
-
103
- ds = Linkage::Dataset.new(database_options_for('mysql'), "foo")
104
- ds = ds.group_match(Linkage::MetaObject.new(ds.field_set[:bar]))
105
- groups = []
106
- ds.each_group(1) do |group|
107
- groups << group
108
- end
109
- expected = [
110
- {:bar => 'BAR'},
111
- {:bar => 'BAZ'},
112
- {:bar => 'FOO'}
113
- ]
114
- assert_equal expected, groups.collect(&:decollated_values)
115
- end
116
- end
@@ -1,88 +0,0 @@
1
- require 'helper'
2
-
3
- module IntegrationTests
4
- class TestFunctions < Test::Unit::TestCase
5
- def setup
6
- @tmpdir = Dir.mktmpdir('linkage')
7
- @tmpuri = "sqlite://" + File.join(@tmpdir, "foo")
8
- end
9
-
10
- def database(options = {}, &block)
11
- Sequel.connect(@tmpuri, options, &block)
12
- end
13
-
14
- def teardown
15
- FileUtils.remove_entry_secure(@tmpdir)
16
- end
17
-
18
- test "match functions" do
19
- # insert the test data
20
- database do |db|
21
- db.create_table(:foo) { primary_key(:id); String(:bar) }
22
- db[:foo].import([:id, :bar],
23
- Array.new(100) { |i| [i, "bar%s" % (" " * (i % 10))] })
24
- end
25
-
26
- ds = Linkage::Dataset.new(@tmpuri, "foo", :single_threaded => true)
27
- tmpuri = @tmpuri
28
- conf = ds.link_with(ds) do
29
- trim(lhs[:bar]).must == trim(rhs[:bar])
30
- save_results_in(tmpuri)
31
- end
32
- assert_equal :self, conf.linkage_type
33
- runner = Linkage::SingleThreadedRunner.new(conf)
34
- runner.execute
35
-
36
- database do |db|
37
- assert_equal 1, db[:groups].count
38
- end
39
- end
40
-
41
- test "strftime in sqlite" do
42
- #logger = Logger.new(STDERR)
43
- #database(:logger => logger) do |db|
44
- database do |db|
45
- db.create_table(:foo) { primary_key(:id); Date(:foo_date) }
46
- db.create_table(:bar) { primary_key(:id); String(:bar_string) }
47
- db[:foo].insert({:id => 1, :foo_date => Date.today})
48
- db[:bar].insert({:id => 1, :bar_string => Date.today.strftime("%Y-%m-%d")})
49
- end
50
-
51
- ds_1 = Linkage::Dataset.new(@tmpuri, "foo", :single_threaded => true)
52
- ds_2 = Linkage::Dataset.new(@tmpuri, "bar", :single_threaded => true)
53
- tmpuri = @tmpuri
54
- conf = ds_1.link_with(ds_2) do
55
- strftime(lhs[:foo_date], "%Y-%m-%d").must == rhs[:bar_string]
56
- save_results_in(tmpuri)
57
- end
58
- runner = Linkage::SingleThreadedRunner.new(conf)
59
- runner.execute
60
-
61
- database do |db|
62
- assert_equal 1, db[:groups].count
63
- end
64
- end
65
-
66
- test "binary function with static argument" do
67
- database do |db|
68
- db.create_table(:foo) { primary_key(:id); String(:bar) }
69
- db[:foo].import([:id, :bar], [[1, 'foo'], [2, 'foo']])
70
- end
71
-
72
- ds = Linkage::Dataset.new(@tmpuri, "foo")
73
- tmpuri = @tmpuri
74
- conf = ds.link_with(ds) do
75
- lhs[:bar].must == rhs[:bar]
76
- binary(lhs[:bar]).must == binary('foo')
77
- binary(rhs[:bar]).must == binary('foo')
78
- save_results_in(tmpuri)
79
- end
80
- runner = Linkage::SingleThreadedRunner.new(conf)
81
- runner.execute
82
-
83
- database do |db|
84
- assert_equal 1, db[:groups].count
85
- end
86
- end
87
- end
88
- end
@@ -1,85 +0,0 @@
1
- require 'helper'
2
-
3
- module IntegrationTests
4
- class TestResultSet < Test::Unit::TestCase
5
- test "#create_tables! creates original_groups table when decollation is needed" do
6
- database_for('sqlite') do |db|
7
- db.create_table!(:foo) { primary_key(:id); String(:foo) }
8
- end
9
-
10
- database_for('mysql') do |db|
11
- db.create_table!(:foo) { primary_key(:id); String(:foo) }
12
- end
13
-
14
- dataset_1 = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
15
- dataset_2 = Linkage::Dataset.new(database_options_for('mysql'), 'foo')
16
- results_uri = database_options_for('sqlite')
17
- conf = dataset_1.link_with(dataset_2) do
18
- lhs[:foo].must == rhs[:foo]
19
- save_results_in(results_uri)
20
- end
21
- conf.result_set.create_tables!
22
- assert_include conf.result_set.database.tables, :original_groups
23
- end
24
-
25
- test "#create_tables! doesn't create original_groups table when decollation is needed" do
26
- database_for('sqlite') do |db|
27
- db.create_table!(:foo) { primary_key(:id); String(:foo) }
28
- end
29
-
30
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
31
- results_uri = database_options_for('sqlite')
32
- conf = dataset.link_with(dataset) do
33
- lhs[:foo].must == rhs[:foo]
34
- save_results_in(results_uri)
35
- end
36
- conf.result_set.create_tables!
37
- assert_not_include conf.result_set.database.tables, :original_groups
38
- end
39
-
40
- test "#create_tables! doesn't create groups table when not needed" do
41
- database_for('sqlite') do |db|
42
- db.create_table!(:foo) { primary_key(:id); Integer(:foo) }
43
- end
44
-
45
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
46
- results_uri = database_options_for('sqlite')
47
- conf = dataset.link_with(dataset) do
48
- lhs[:foo].must be_within(5).of(rhs[:foo])
49
- save_results_in(results_uri)
50
- end
51
- conf.result_set.create_tables!
52
- assert_not_include conf.result_set.database.tables, :groups
53
- end
54
-
55
- test "#create_tables! creates scores table when there are exhaustive expectations" do
56
- database_for('sqlite') do |db|
57
- db.create_table!(:foo) { primary_key(:id); Integer(:foo) }
58
- end
59
-
60
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
61
- results_uri = database_options_for('sqlite')
62
- conf = dataset.link_with(dataset) do
63
- lhs[:foo].must be_within(5).of(rhs[:foo])
64
- save_results_in(results_uri)
65
- end
66
- conf.result_set.create_tables!
67
- assert_include conf.result_set.database.tables, :scores
68
- end
69
-
70
- test "#create_tables! doesn't create scores table when not needed" do
71
- database_for('sqlite') do |db|
72
- db.create_table!(:foo) { primary_key(:id); Integer(:foo) }
73
- end
74
-
75
- dataset = Linkage::Dataset.new(database_options_for('sqlite'), 'foo')
76
- results_uri = database_options_for('sqlite')
77
- conf = dataset.link_with(dataset) do
78
- lhs[:foo].must == rhs[:foo]
79
- save_results_in(results_uri)
80
- end
81
- conf.result_set.create_tables!
82
- assert_not_include conf.result_set.database.tables, :scores
83
- end
84
- end
85
- end
@@ -1,84 +0,0 @@
1
- require 'helper'
2
-
3
- module IntegrationTests
4
- class TestScoring < Test::Unit::TestCase
5
- test "stop scoring if must expectation fails" do
6
- database_for('sqlite') do |db|
7
- db.create_table(:foo) { primary_key(:id); Integer(:num) }
8
- db.create_table(:bar) { primary_key(:id); Integer(:num) }
9
- db[:foo].import([:id, :num], [[1, 1]])
10
- db[:bar].import([:id, :num], [[1, 5]])
11
- end
12
-
13
- db_opts = database_options_for('sqlite')
14
- dataset_1 = Linkage::Dataset.new(db_opts, "foo")
15
- dataset_2 = Linkage::Dataset.new(db_opts, "bar")
16
- conf = dataset_1.link_with(dataset_2) do
17
- lhs[:num].must_not be_within(5).of(rhs[:num])
18
- lhs[:num].must be_within(5).of(rhs[:num])
19
- save_results_in(db_opts)
20
- end
21
-
22
- runner = Linkage::SingleThreadedRunner.new(conf)
23
- runner.execute
24
-
25
- database_for('sqlite') do |db|
26
- assert_equal db[:scores].count, 1
27
- record = db[:scores].first
28
- assert_equal 1, record[:score]
29
- end
30
- end
31
-
32
- test "scoring phase adds matches as needed" do
33
- database_for('sqlite') do |db|
34
- db.create_table(:foo) { primary_key(:id); Integer(:num) }
35
- db.create_table(:bar) { primary_key(:id); Integer(:num) }
36
- db[:foo].import([:id, :num], (0..15).collect { |i| [i, i] })
37
- db[:bar].import([:id, :num], (0..15).collect { |i| [i, i] })
38
- end
39
-
40
- db_opts = database_options_for('sqlite')
41
- dataset_1 = Linkage::Dataset.new(db_opts, "foo")
42
- dataset_2 = Linkage::Dataset.new(db_opts, "bar")
43
- conf = dataset_1.link_with(dataset_2) do
44
- lhs[:num].must be_within(10).of(rhs[:num])
45
- lhs[:num].must_not be_within(5).of(rhs[:num])
46
- save_results_in(db_opts)
47
- end
48
-
49
- runner = Linkage::SingleThreadedRunner.new(conf)
50
- runner.execute
51
-
52
- database_for('sqlite') do |db|
53
- assert_equal 80, db[:matches].count
54
- db[:matches].order(:record_1_id, :record_2_id).each do |row|
55
- assert_equal 1, row[:total_score]
56
- assert_include 6..10, (row[:record_1_id] - row[:record_2_id]).abs
57
- end
58
- end
59
- end
60
-
61
- test "optimize scoring for self linkage" do
62
- database_for('sqlite') do |db|
63
- db.create_table(:foo) { primary_key(:id); Integer(:num) }
64
- db[:foo].import([:id, :num], [[1, 1], [2, 5], [3, 10]])
65
- end
66
-
67
- db_opts = database_options_for('sqlite')
68
- dataset = Linkage::Dataset.new(db_opts, "foo")
69
- conf = dataset.link_with(dataset) do
70
- lhs[:num].must be_within(5).of(rhs[:num])
71
- save_results_in(db_opts)
72
- end
73
-
74
- runner = Linkage::SingleThreadedRunner.new(conf)
75
- runner.execute
76
-
77
- database_for('sqlite') do |db|
78
- assert_equal db[:scores].count, 3
79
- scores = db[:scores].order(:record_1_id, :record_2_id).select_map(:score)
80
- assert_equal [1, 0, 1], scores
81
- end
82
- end
83
- end
84
- end