linkage 0.0.8 → 0.1.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.yardopts +1 -0
- data/Gemfile +1 -19
- data/Gemfile-java +3 -0
- data/README.markdown +88 -34
- data/Rakefile +16 -15
- data/TODO +4 -0
- data/lib/linkage/comparator.rb +139 -144
- data/lib/linkage/comparators/compare.rb +236 -29
- data/lib/linkage/comparators/strcompare.rb +85 -0
- data/lib/linkage/comparators/within.rb +24 -20
- data/lib/linkage/configuration.rb +44 -466
- data/lib/linkage/dataset.rb +28 -127
- data/lib/linkage/exceptions.rb +5 -0
- data/lib/linkage/field.rb +6 -37
- data/lib/linkage/field_set.rb +3 -3
- data/lib/linkage/match_recorder.rb +22 -0
- data/lib/linkage/match_set.rb +34 -0
- data/lib/linkage/match_sets/csv.rb +39 -0
- data/lib/linkage/match_sets/database.rb +45 -0
- data/lib/linkage/matcher.rb +30 -0
- data/lib/linkage/result_set.rb +25 -110
- data/lib/linkage/result_sets/csv.rb +54 -0
- data/lib/linkage/result_sets/database.rb +42 -0
- data/lib/linkage/runner.rb +57 -16
- data/lib/linkage/score_recorder.rb +30 -0
- data/lib/linkage/score_set.rb +49 -0
- data/lib/linkage/score_sets/csv.rb +64 -0
- data/lib/linkage/score_sets/database.rb +77 -0
- data/lib/linkage/version.rb +1 -1
- data/lib/linkage.rb +14 -17
- data/linkage.gemspec +13 -1
- data/linkage.gemspec-java +32 -0
- data/test/helper.rb +30 -23
- data/test/integration/test_cross_linkage.rb +46 -25
- data/test/integration/test_database_result_set.rb +55 -0
- data/test/integration/test_dual_linkage.rb +19 -94
- data/test/integration/test_self_linkage.rb +100 -203
- data/test/integration/test_within_comparator.rb +24 -77
- data/test/unit/comparators/test_compare.rb +254 -50
- data/test/unit/comparators/test_strcompare.rb +45 -0
- data/test/unit/comparators/test_within.rb +14 -26
- data/test/unit/match_sets/test_csv.rb +78 -0
- data/test/unit/match_sets/test_database.rb +63 -0
- data/test/unit/result_sets/test_csv.rb +111 -0
- data/test/unit/result_sets/test_database.rb +68 -0
- data/test/unit/score_sets/test_csv.rb +151 -0
- data/test/unit/score_sets/test_database.rb +149 -0
- data/test/unit/test_comparator.rb +46 -83
- data/test/unit/test_comparators.rb +4 -0
- data/test/unit/test_configuration.rb +99 -145
- data/test/unit/test_dataset.rb +52 -73
- data/test/unit/test_field.rb +4 -55
- data/test/unit/test_field_set.rb +6 -6
- data/test/unit/test_match_recorder.rb +23 -0
- data/test/unit/test_match_set.rb +23 -0
- data/test/unit/test_match_sets.rb +4 -0
- data/test/unit/test_matcher.rb +44 -0
- data/test/unit/test_result_set.rb +24 -223
- data/test/unit/test_result_sets.rb +4 -0
- data/test/unit/test_runner.rb +122 -17
- data/test/unit/test_runners.rb +4 -0
- data/test/unit/test_score_recorder.rb +25 -0
- data/test/unit/test_score_set.rb +37 -0
- data/test/unit/test_score_sets.rb +4 -0
- metadata +183 -90
- data/Gemfile.lock +0 -92
- data/lib/linkage/comparators/binary.rb +0 -12
- data/lib/linkage/data.rb +0 -175
- data/lib/linkage/decollation.rb +0 -93
- data/lib/linkage/expectation.rb +0 -21
- data/lib/linkage/expectations/exhaustive.rb +0 -63
- data/lib/linkage/expectations/simple.rb +0 -168
- data/lib/linkage/function.rb +0 -148
- data/lib/linkage/functions/binary.rb +0 -30
- data/lib/linkage/functions/cast.rb +0 -54
- data/lib/linkage/functions/length.rb +0 -29
- data/lib/linkage/functions/strftime.rb +0 -33
- data/lib/linkage/functions/trim.rb +0 -30
- data/lib/linkage/group.rb +0 -55
- data/lib/linkage/meta_object.rb +0 -139
- data/lib/linkage/runner/single_threaded.rb +0 -187
- data/lib/linkage/utils.rb +0 -164
- data/lib/linkage/warnings.rb +0 -5
- data/test/integration/test_collation.rb +0 -45
- data/test/integration/test_configuration.rb +0 -268
- data/test/integration/test_dataset.rb +0 -116
- data/test/integration/test_functions.rb +0 -88
- data/test/integration/test_result_set.rb +0 -85
- data/test/integration/test_scoring.rb +0 -84
- data/test/unit/expectations/test_exhaustive.rb +0 -111
- data/test/unit/expectations/test_simple.rb +0 -303
- data/test/unit/functions/test_binary.rb +0 -54
- data/test/unit/functions/test_cast.rb +0 -98
- data/test/unit/functions/test_length.rb +0 -52
- data/test/unit/functions/test_strftime.rb +0 -60
- data/test/unit/functions/test_trim.rb +0 -43
- data/test/unit/runner/test_single_threaded.rb +0 -12
- data/test/unit/test_data.rb +0 -445
- data/test/unit/test_decollation.rb +0 -201
- data/test/unit/test_function.rb +0 -233
- data/test/unit/test_group.rb +0 -38
- data/test/unit/test_meta_object.rb +0 -208
- data/test/unit/test_utils.rb +0 -341
@@ -1,172 +1,126 @@
|
|
1
1
|
require 'helper'
|
2
2
|
|
3
3
|
class UnitTests::TestConfiguration < Test::Unit::TestCase
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
4
|
+
def setup
|
5
|
+
@pk_1 = stub('primary key 1', :name => :id)
|
6
|
+
@field_1 = stub('field 1')
|
7
|
+
@field_set_1 = stub('field set 1', :primary_key => @pk_1, :[] => @field_1)
|
8
|
+
@dataset_1 = stub('dataset 1', :field_set => @field_set_1)
|
9
|
+
@pk_2 = stub('primary key 2', :name => :id)
|
10
|
+
@field_2 = stub('field 2')
|
11
|
+
@field_set_2 = stub('field set 2', :primary_key => @pk_2, :[] => @field_2)
|
12
|
+
@dataset_2 = stub('dataset 2', :field_set => @field_set_2)
|
13
|
+
@score_set = stub('score set')
|
14
|
+
@match_set = stub('match set')
|
15
|
+
@result_set = stub('result set', :score_set => @score_set, :match_set => @match_set)
|
16
|
+
@compare = stub('compare')
|
17
|
+
Linkage::Comparators::Compare.stubs(:new).returns(@compare)
|
12
18
|
end
|
13
19
|
|
14
|
-
test "
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
20
|
+
test "init with single dataset and result set" do
|
21
|
+
config = Linkage::Configuration.new(@dataset_1, @result_set)
|
22
|
+
assert_equal @dataset_1, config.dataset_1
|
23
|
+
assert_nil config.dataset_2
|
24
|
+
assert_equal @result_set, config.result_set
|
19
25
|
end
|
20
26
|
|
21
|
-
test "
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
conf.add_simple_expectation(exp)
|
27
|
-
assert conf.groups_table_needed?
|
27
|
+
test "init with two datasets and result set" do
|
28
|
+
config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
|
29
|
+
assert_equal @dataset_1, config.dataset_1
|
30
|
+
assert_equal @dataset_2, config.dataset_2
|
31
|
+
assert_equal @result_set, config.result_set
|
28
32
|
end
|
29
33
|
|
30
|
-
test "
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
34
|
+
test "adding comparator with set arguments and two datasets" do
|
35
|
+
config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
|
36
|
+
|
37
|
+
@field_set_1.expects(:[]).with(:foo).returns(@field_1)
|
38
|
+
@field_set_2.expects(:[]).with(:foo).returns(@field_2)
|
39
|
+
Linkage::Comparators::Compare.expects(:new).with([@field_1], [@field_2], :equal).returns(@compare)
|
40
|
+
config.compare([:foo], [:foo], :equal)
|
41
|
+
assert_equal @compare, config.comparators[0]
|
35
42
|
end
|
36
43
|
|
37
|
-
test "
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
+
test "adding comparator with set arguments and one datasets" do
|
45
|
+
config = Linkage::Configuration.new(@dataset_1, @result_set)
|
46
|
+
|
47
|
+
@field_set_1.expects(:[]).with(:foo).returns(@field_1)
|
48
|
+
@field_set_1.expects(:[]).with(:bar).returns(@field_2)
|
49
|
+
Linkage::Comparators::Compare.expects(:new).with([@field_1], [@field_2], :equal).returns(@compare)
|
50
|
+
config.compare([:foo], [:bar], :equal)
|
51
|
+
assert_equal @compare, config.comparators[0]
|
44
52
|
end
|
45
53
|
|
46
|
-
test "
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
:field_set => stub('field set 2', {
|
56
|
-
:primary_key => stub('primary key 2', {
|
57
|
-
:ruby_type => {:type => String, :opts => {:size => 10}}
|
58
|
-
})
|
59
|
-
})
|
60
|
-
})
|
61
|
-
conf = Linkage::Configuration.new(dataset_1, dataset_2)
|
62
|
-
exp_1 = stub('exhaustive expectation 1')
|
63
|
-
exp_2 = stub('exhaustive expectation 2')
|
64
|
-
conf.add_exhaustive_expectation(exp_1)
|
65
|
-
conf.add_exhaustive_expectation(exp_2)
|
66
|
-
|
67
|
-
expected = [
|
68
|
-
[:id, Integer, {:primary_key => true}],
|
69
|
-
[:comparator_id, Integer, {}],
|
70
|
-
[:record_1_id, Integer, {}],
|
71
|
-
[:record_2_id, String, {:size => 10}],
|
72
|
-
[:score, Integer, {}],
|
73
|
-
]
|
74
|
-
assert_equal expected, conf.scores_table_schema
|
54
|
+
test "adding comparator with scalar arguments and two datasets" do
|
55
|
+
config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
|
56
|
+
|
57
|
+
@field_set_1.expects(:[]).with(:foo).returns(@field_1)
|
58
|
+
@field_set_2.expects(:[]).with(:foo).returns(@field_2)
|
59
|
+
within = stub('within')
|
60
|
+
Linkage::Comparators::Within.expects(:new).with(@field_1, @field_2, 5).returns(within)
|
61
|
+
config.within(:foo, :foo, 5)
|
62
|
+
assert_equal within, config.comparators[0]
|
75
63
|
end
|
76
64
|
|
77
|
-
test "
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
:to_expr => :bar_id
|
89
|
-
})
|
90
|
-
})
|
91
|
-
})
|
92
|
-
dataset_1a = stub('dataset 1a')
|
93
|
-
dataset_2a = stub('dataset 2a')
|
94
|
-
dataset_1b = stub('dataset 1b')
|
95
|
-
dataset_2b = stub('dataset 2b')
|
96
|
-
dataset_1c = stub('dataset 1c')
|
97
|
-
dataset_2c = stub('dataset 2c')
|
98
|
-
exp_1 = stub('exhaustive expectation 1')
|
99
|
-
exp_2 = stub('exhaustive expectation 2')
|
100
|
-
|
101
|
-
conf = Linkage::Configuration.new(dataset_1, dataset_2)
|
102
|
-
conf.add_exhaustive_expectation(exp_1)
|
103
|
-
conf.add_exhaustive_expectation(exp_2)
|
104
|
-
|
105
|
-
dataset_1.expects(:select).with(:foo_id).returns(dataset_1a)
|
106
|
-
dataset_2.expects(:select).with(:bar_id).returns(dataset_2a)
|
107
|
-
exp_1.expects(:apply_to).with(dataset_1a, :lhs).returns(dataset_1b)
|
108
|
-
exp_1.expects(:apply_to).with(dataset_2a, :rhs).returns(dataset_2b)
|
109
|
-
exp_2.expects(:apply_to).with(dataset_1b, :lhs).returns(dataset_1c)
|
110
|
-
exp_2.expects(:apply_to).with(dataset_2b, :rhs).returns(dataset_2c)
|
111
|
-
|
112
|
-
assert_equal [dataset_1c, dataset_2c], conf.datasets_with_applied_exhaustive_expectations
|
65
|
+
test "score_recorder with two datasets" do
|
66
|
+
config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
|
67
|
+
config.compare([:foo], [:foo], :equal)
|
68
|
+
|
69
|
+
@field_set_1.expects(:primary_key).returns(@pk_1)
|
70
|
+
@field_set_2.expects(:primary_key).returns(@pk_2)
|
71
|
+
@pk_1.expects(:name).returns(:id_1)
|
72
|
+
@pk_2.expects(:name).returns(:id_2)
|
73
|
+
score_recorder = stub('recorder')
|
74
|
+
Linkage::ScoreRecorder.expects(:new).with([@compare], @score_set, [:id_1, :id_2]).returns(score_recorder)
|
75
|
+
assert_same score_recorder, config.score_recorder
|
113
76
|
end
|
114
77
|
|
115
|
-
test "
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
:field_set => stub('field set 2', {
|
125
|
-
:primary_key => stub('primary key 2', {
|
126
|
-
:ruby_type => {:type => String, :opts => {:size => 10}}
|
127
|
-
})
|
128
|
-
})
|
129
|
-
})
|
130
|
-
conf = Linkage::Configuration.new(dataset_1, dataset_2)
|
131
|
-
|
132
|
-
expected = [
|
133
|
-
[:id, Integer, {:primary_key => true}],
|
134
|
-
[:record_1_id, Integer, {}],
|
135
|
-
[:record_2_id, String, {:size => 10}],
|
136
|
-
[:total_score, Integer, {}],
|
137
|
-
]
|
138
|
-
assert_equal expected, conf.matches_table_schema
|
78
|
+
test "score_recorder with one dataset" do
|
79
|
+
config = Linkage::Configuration.new(@dataset_1, @result_set)
|
80
|
+
config.compare([:foo], [:bar], :equal)
|
81
|
+
|
82
|
+
@field_set_1.expects(:primary_key).returns(@pk_1)
|
83
|
+
@pk_1.expects(:name).returns(:id_1)
|
84
|
+
score_recorder = stub('score recorder')
|
85
|
+
Linkage::ScoreRecorder.expects(:new).with([@compare], @score_set, [:id_1, :id_1]).returns(score_recorder)
|
86
|
+
assert_same score_recorder, config.score_recorder
|
139
87
|
end
|
140
88
|
|
141
|
-
test "
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
89
|
+
test "default matcher" do
|
90
|
+
config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
|
91
|
+
config.compare([:foo], [:bar], :equal)
|
92
|
+
|
93
|
+
matcher = stub('matcher')
|
94
|
+
Linkage::Matcher.expects(:new).with([@compare], @score_set, :mean, 0.5).returns(matcher)
|
95
|
+
assert_equal matcher, config.matcher
|
147
96
|
end
|
148
97
|
|
149
|
-
test "
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
98
|
+
test "matcher with explicit algorithm" do
|
99
|
+
config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
|
100
|
+
config.compare([:foo], [:bar], :equal)
|
101
|
+
config.algorithm = :foo
|
102
|
+
|
103
|
+
matcher = stub('matcher')
|
104
|
+
Linkage::Matcher.expects(:new).with([@compare], @score_set, :foo, 0.5).returns(matcher)
|
105
|
+
assert_equal matcher, config.matcher
|
155
106
|
end
|
156
107
|
|
157
|
-
test "
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
108
|
+
test "matcher with explicit threshold" do
|
109
|
+
config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
|
110
|
+
config.compare([:foo], [:bar], :equal)
|
111
|
+
config.threshold = 0.9
|
112
|
+
|
113
|
+
matcher = stub('matcher')
|
114
|
+
Linkage::Matcher.expects(:new).with([@compare], @score_set, :mean, 0.9).returns(matcher)
|
115
|
+
assert_equal matcher, config.matcher
|
163
116
|
end
|
164
117
|
|
165
|
-
test "
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
118
|
+
test "match_recorder" do
|
119
|
+
config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
|
120
|
+
|
121
|
+
matcher = stub('matcher')
|
122
|
+
match_recorder = stub('match recorder')
|
123
|
+
Linkage::MatchRecorder.expects(:new).with(matcher, @match_set).returns(match_recorder)
|
124
|
+
assert_equal match_recorder, config.match_recorder(matcher)
|
171
125
|
end
|
172
126
|
end
|
data/test/unit/test_dataset.rb
CHANGED
@@ -8,36 +8,45 @@ class UnitTests::TestDataset < Test::Unit::TestCase
|
|
8
8
|
[:first_name, {:allow_null=>true, :default=>nil, :primary_key=>false, :db_type=>"varchar(255)", :type=>:string, :ruby_default=>nil}],
|
9
9
|
[:last_name, {:allow_null=>true, :default=>nil, :primary_key=>false, :db_type=>"varchar(255)", :type=>:string, :ruby_default=>nil}]
|
10
10
|
]
|
11
|
+
|
11
12
|
@dataset = stub('Sequel dataset', :first_source_table => :foo)
|
12
|
-
@
|
13
|
+
@dataset.responds_like_instance_of(Sequel::Dataset)
|
14
|
+
@dataset.stubs(:kind_of?).with(Sequel::Dataset).returns(true)
|
15
|
+
|
16
|
+
@database = stub('database', :schema => @schema, :[] => @dataset)
|
17
|
+
@database.responds_like_instance_of(Sequel::Database)
|
18
|
+
@database.stubs(:kind_of?).with(Sequel::Database).returns(true)
|
13
19
|
@dataset.stubs(:db).returns(@database)
|
14
20
|
Sequel.stubs(:connect).returns(@database)
|
21
|
+
|
15
22
|
@field_set = stub("field set")
|
16
23
|
Linkage::FieldSet.stubs(:new).returns(@field_set)
|
17
24
|
end
|
18
25
|
|
19
26
|
test "initialize with uri and table name" do
|
20
27
|
Sequel.expects(:connect).with('foo:/bar', {:foo => 'bar'}).returns(@database)
|
21
|
-
@database.expects(:extend).with(Sequel::Collation)
|
22
28
|
@database.expects(:[]).with(:foo).returns(@dataset)
|
23
29
|
Linkage::FieldSet.expects(:new).with(kind_of(Linkage::Dataset)).returns(@field_set)
|
24
30
|
ds = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
|
31
|
+
assert_equal @field_set, ds.field_set
|
25
32
|
end
|
26
33
|
|
27
34
|
test "initialize with sequel dataset" do
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
test "extend Sequel::Collation when initializing with sequel dataset" do
|
32
|
-
@database.stubs(:kind_of?).with(Sequel::Collation).returns(false)
|
33
|
-
@database.expects(:extend).with(Sequel::Collation)
|
35
|
+
@dataset.expects(:first_source_table).returns(:foo)
|
36
|
+
@dataset.expects(:db).returns(@database)
|
37
|
+
Linkage::FieldSet.expects(:new).with(kind_of(Linkage::Dataset)).returns(@field_set)
|
34
38
|
ds = Linkage::Dataset.new(@dataset)
|
39
|
+
assert_equal :foo, ds.table_name
|
40
|
+
assert_equal @field_set, ds.field_set
|
35
41
|
end
|
36
42
|
|
37
|
-
test "
|
38
|
-
|
39
|
-
|
40
|
-
|
43
|
+
test "initialize with sequel database and table name" do
|
44
|
+
Sequel.unstub(:connect)
|
45
|
+
Sequel.expects(:connect).never
|
46
|
+
@database.expects(:[]).with(:foo).returns(@dataset)
|
47
|
+
Linkage::FieldSet.expects(:new).with(kind_of(Linkage::Dataset)).returns(@field_set)
|
48
|
+
ds = Linkage::Dataset.new(@database, "foo")
|
49
|
+
assert_equal @field_set, ds.field_set
|
41
50
|
end
|
42
51
|
|
43
52
|
test "table_name" do
|
@@ -69,72 +78,42 @@ class UnitTests::TestDataset < Test::Unit::TestCase
|
|
69
78
|
assert_equal :foo, ds.database_type
|
70
79
|
end
|
71
80
|
|
72
|
-
test "
|
73
|
-
ds_1 = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
|
74
|
-
@dataset.expects(:clone).returns(@dataset)
|
75
|
-
meta_object = stub_instance(Linkage::MetaObject)
|
76
|
-
ds_2 = ds_1.group_match(meta_object)
|
77
|
-
assert_not_same ds_1, ds_2
|
78
|
-
assert_not_equal ds_1.instance_variable_get(:@linkage_options),
|
79
|
-
ds_2.instance_variable_get(:@linkage_options)
|
80
|
-
end
|
81
|
-
|
82
|
-
test "subsequent group_match replaces old options" do
|
83
|
-
ds_1 = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
|
84
|
-
@dataset.expects(:clone).at_least_once.returns(@dataset)
|
85
|
-
meta_object_1 = stub_instance(Linkage::MetaObject)
|
86
|
-
ds_2 = ds_1.group_match(meta_object_1)
|
87
|
-
assert_equal([{:meta_object => meta_object_1}], ds_2.linkage_options[:group_match])
|
88
|
-
|
89
|
-
meta_object_2 = stub_instance(Linkage::MetaObject)
|
90
|
-
ds_3 = ds_2.group_match(meta_object_2)
|
91
|
-
assert_equal([{:meta_object => meta_object_2}], ds_3.linkage_options[:group_match])
|
92
|
-
end
|
93
|
-
|
94
|
-
test "group_match_more appends to group_match options" do
|
95
|
-
ds_1 = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
|
96
|
-
@dataset.expects(:clone).at_least_once.returns(@dataset)
|
97
|
-
meta_object_1 = stub_instance(Linkage::MetaObject)
|
98
|
-
ds_2 = ds_1.group_match(meta_object_1)
|
99
|
-
assert_equal([{:meta_object => meta_object_1}], ds_2.linkage_options[:group_match])
|
100
|
-
|
101
|
-
meta_object_2 = stub_instance(Linkage::MetaObject)
|
102
|
-
ds_3 = ds_2.group_match_more(meta_object_2)
|
103
|
-
assert_equal([{:meta_object => meta_object_1}, {:meta_object => meta_object_2}], ds_3.linkage_options[:group_match])
|
104
|
-
end
|
105
|
-
|
106
|
-
test "group_by_matches" do
|
81
|
+
test "primary key" do
|
107
82
|
ds = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
|
108
|
-
|
109
|
-
@
|
110
|
-
|
111
|
-
ds = ds.group_match(meta_object)
|
112
|
-
@dataset.expects(:group).with(:foo).returns(@dataset)
|
113
|
-
|
114
|
-
ds.group_by_matches
|
83
|
+
pk = stub('primary key field')
|
84
|
+
@field_set.expects(:primary_key).returns(pk)
|
85
|
+
assert_equal pk, ds.primary_key
|
115
86
|
end
|
116
87
|
|
117
|
-
test "
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
assert_equal
|
88
|
+
test "link_with other" do
|
89
|
+
ds_1 = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
|
90
|
+
ds_2 = Linkage::Dataset.new('foo:/bar', "bar", {:foo => 'bar'})
|
91
|
+
result_set = stub('result set')
|
92
|
+
conf = stub('configuration')
|
93
|
+
Linkage::Configuration.expects(:new).with(ds_1, ds_2, result_set).returns(conf)
|
94
|
+
actual = ds_1.link_with(ds_2, result_set) do |arg|
|
95
|
+
assert_equal conf, arg
|
96
|
+
end
|
97
|
+
assert_equal actual, conf
|
127
98
|
end
|
128
99
|
|
129
|
-
test "
|
100
|
+
test "link_with self" do
|
130
101
|
ds = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
|
131
|
-
|
132
|
-
|
133
|
-
ds
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
102
|
+
result_set = stub('result set')
|
103
|
+
conf = stub('configuration')
|
104
|
+
Linkage::Configuration.expects(:new).with(ds, nil, result_set).returns(conf)
|
105
|
+
actual = ds.link_with(ds, result_set) do |arg|
|
106
|
+
assert_equal conf, arg
|
107
|
+
end
|
108
|
+
assert_equal actual, conf
|
109
|
+
end
|
110
|
+
|
111
|
+
test "delegating" do
|
112
|
+
dataset_2 = Sequel::Dataset.allocate
|
113
|
+
@dataset.expects(:filter).with(:foo => 123).returns(dataset_2)
|
114
|
+
ds_1 = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
|
115
|
+
ds_2 = ds_1.filter(:foo => 123)
|
116
|
+
assert_kind_of Linkage::Dataset, ds_2
|
117
|
+
assert_same dataset_2, ds_2.obj
|
139
118
|
end
|
140
119
|
end
|
data/test/unit/test_field.rb
CHANGED
@@ -1,74 +1,23 @@
|
|
1
1
|
require 'helper'
|
2
2
|
|
3
3
|
class UnitTests::TestField < Test::Unit::TestCase
|
4
|
-
test "subclass of data" do
|
5
|
-
assert_equal Linkage::Data, Linkage::Field.superclass
|
6
|
-
end
|
7
|
-
|
8
4
|
test "initialize with schema info" do
|
9
|
-
dataset = stub('dataset')
|
10
5
|
schema = {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil}
|
11
|
-
field = Linkage::Field.new(
|
6
|
+
field = Linkage::Field.new(:id, schema)
|
12
7
|
assert_equal :id, field.name
|
13
8
|
assert_equal schema, field.schema
|
14
|
-
assert_equal dataset, field.dataset
|
15
|
-
end
|
16
|
-
|
17
|
-
test "static? is always false" do
|
18
|
-
dataset = stub('dataset')
|
19
|
-
schema = {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil}
|
20
|
-
field = Linkage::Field.new(dataset, :id, schema)
|
21
|
-
assert !field.static?
|
22
9
|
end
|
23
10
|
|
24
11
|
test "ruby_type for integer" do
|
25
|
-
|
26
|
-
field = Linkage::Field.new(dataset, :id, {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
|
12
|
+
field = Linkage::Field.new(:id, {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
|
27
13
|
assert_equal({:type => Integer}, field.ruby_type)
|
28
14
|
end
|
29
15
|
|
30
16
|
test "primary_key? returns true if primary key" do
|
31
|
-
|
32
|
-
field_1 = Linkage::Field.new(dataset, :id, {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
|
17
|
+
field_1 = Linkage::Field.new(:id, {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
|
33
18
|
assert field_1.primary_key?
|
34
19
|
|
35
|
-
field_2 = Linkage::Field.new(
|
20
|
+
field_2 = Linkage::Field.new(:foo, {:allow_null=>true, :default=>nil, :primary_key=>false, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
|
36
21
|
assert !field_2.primary_key?
|
37
22
|
end
|
38
|
-
|
39
|
-
test "to_expr returns name" do
|
40
|
-
dataset = stub('dataset')
|
41
|
-
field = Linkage::Field.new(dataset, :id, {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
|
42
|
-
assert_equal :id, field.to_expr
|
43
|
-
end
|
44
|
-
|
45
|
-
test "to_expr ignores adapter argument" do
|
46
|
-
dataset = stub('dataset')
|
47
|
-
field = Linkage::Field.new(dataset, :id, {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
|
48
|
-
assert_equal :id, field.to_expr(:foo)
|
49
|
-
end
|
50
|
-
|
51
|
-
test "collation" do
|
52
|
-
dataset = stub('dataset')
|
53
|
-
field = Linkage::Field.new(dataset, :foo, {:allow_null=>true, :default=>nil, :primary_key=>false, :db_type=>"varchar(255)", :type=>:string, :collation=>"latin1_general_cs", :ruby_default=>nil})
|
54
|
-
assert_equal "latin1_general_cs", field.collation
|
55
|
-
assert_equal "latin1_general_cs", field.ruby_type[:opts][:collate]
|
56
|
-
end
|
57
|
-
|
58
|
-
test "initialize MergeField with ruby type" do
|
59
|
-
info = {:type => Integer}
|
60
|
-
field = Linkage::MergeField.new(:id, info)
|
61
|
-
assert_equal :id, field.name
|
62
|
-
assert_equal info, field.ruby_type
|
63
|
-
assert_nil field.schema
|
64
|
-
assert_nil field.dataset
|
65
|
-
end
|
66
|
-
|
67
|
-
test "MergeField#database_type accessor" do
|
68
|
-
field_1 = Linkage::MergeField.new(:id, {:type => Integer})
|
69
|
-
assert_nil field_1.database_type
|
70
|
-
|
71
|
-
field_2 = Linkage::MergeField.new(:id, {:type => Integer}, :mysql)
|
72
|
-
assert_equal :mysql, field_2.database_type
|
73
|
-
end
|
74
23
|
end
|
data/test/unit/test_field_set.rb
CHANGED
@@ -20,9 +20,9 @@ class UnitTests::TestFieldSet < Test::Unit::TestCase
|
|
20
20
|
field_1 = stub('id field')
|
21
21
|
field_2 = stub('first_name field')
|
22
22
|
field_3 = stub('last_name field')
|
23
|
-
Linkage::Field.expects(:new).with(
|
24
|
-
Linkage::Field.expects(:new).with(
|
25
|
-
Linkage::Field.expects(:new).with(
|
23
|
+
Linkage::Field.expects(:new).with(:id, @schema[:id]).returns(field_1)
|
24
|
+
Linkage::Field.expects(:new).with(:first_name, @schema[:first_name]).returns(field_2)
|
25
|
+
Linkage::Field.expects(:new).with(:last_name, @schema[:last_name]).returns(field_3)
|
26
26
|
|
27
27
|
fs = Linkage::FieldSet.new(dataset)
|
28
28
|
assert_equal field_1, fs.primary_key
|
@@ -37,9 +37,9 @@ class UnitTests::TestFieldSet < Test::Unit::TestCase
|
|
37
37
|
field_1 = stub('id field')
|
38
38
|
field_2 = stub('first_name field')
|
39
39
|
field_3 = stub('last_name field')
|
40
|
-
Linkage::Field.stubs(:new).with(
|
41
|
-
Linkage::Field.stubs(:new).with(
|
42
|
-
Linkage::Field.stubs(:new).with(
|
40
|
+
Linkage::Field.stubs(:new).with(:id, @schema[:id]).returns(field_1)
|
41
|
+
Linkage::Field.stubs(:new).with(:first_name, @schema[:first_name]).returns(field_2)
|
42
|
+
Linkage::Field.stubs(:new).with(:last_name, @schema[:last_name]).returns(field_3)
|
43
43
|
|
44
44
|
fs = Linkage::FieldSet.new(dataset)
|
45
45
|
assert_equal field_1, fs.primary_key
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class UnitTests::TestMatchRecorder < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
@match_set = stub('match set')
|
6
|
+
@matcher = stub('matcher')
|
7
|
+
end
|
8
|
+
|
9
|
+
test "recording events from a matcher" do
|
10
|
+
match_recorder = Linkage::MatchRecorder.new(@matcher, @match_set)
|
11
|
+
|
12
|
+
@matcher.expects(:add_observer).with(match_recorder)
|
13
|
+
@match_set.expects(:open_for_writing)
|
14
|
+
match_recorder.start
|
15
|
+
|
16
|
+
@match_set.expects(:add_match).with(123, 456, 1)
|
17
|
+
match_recorder.update(123, 456, 1)
|
18
|
+
|
19
|
+
@matcher.expects(:delete_observer).with(match_recorder)
|
20
|
+
@match_set.expects(:close)
|
21
|
+
match_recorder.stop
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestMatchSet < Test::Unit::TestCase
|
4
|
+
test "add_match raises NotImplementedError" do
|
5
|
+
match_set = Linkage::MatchSet.new
|
6
|
+
assert_raises(NotImplementedError) do
|
7
|
+
match_set.add_match('foo', 'bar', 'baz')
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
test "getting a registered class" do
|
12
|
+
klass = new_match_set
|
13
|
+
Linkage::MatchSet.register('foo', klass)
|
14
|
+
assert_equal klass, Linkage::MatchSet['foo']
|
15
|
+
end
|
16
|
+
|
17
|
+
test "registered classes required to define add_match" do
|
18
|
+
klass = new_match_set do
|
19
|
+
remove_method :add_match
|
20
|
+
end
|
21
|
+
assert_raises(ArgumentError) { Linkage::MatchSet.register('foo', klass) }
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class UnitTests::TestMatcher < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
@score_set = stub('score set')
|
6
|
+
@comparators = [stub('comparator 1'), stub('comparator 2'), stub('comparator 3')]
|
7
|
+
end
|
8
|
+
|
9
|
+
test "finding matches with mean and threshold" do
|
10
|
+
matcher = Linkage::Matcher.new(@comparators, @score_set, :mean, 0.5)
|
11
|
+
observer = stub('observer')
|
12
|
+
observer.expects(:update).with(3, 4, 2.0 / 3)
|
13
|
+
observer.expects(:update).with(4, 5, 1.0)
|
14
|
+
matcher.add_observer(observer)
|
15
|
+
|
16
|
+
pairs = [
|
17
|
+
[1, 2, {1 => 1, 2 => 0, 3 => 0}],
|
18
|
+
[2, 3, {1 => 0, 2 => 0, 3 => 0}],
|
19
|
+
[3, 4, {1 => 0, 2 => 1, 3 => 1}],
|
20
|
+
[4, 5, {1 => 1, 2 => 1, 3 => 1}]
|
21
|
+
]
|
22
|
+
@score_set.expects(:each_pair).multiple_yields(*pairs)
|
23
|
+
|
24
|
+
matcher.run
|
25
|
+
end
|
26
|
+
|
27
|
+
test "finding matches with mean and threshold with missing scores" do
|
28
|
+
matcher = Linkage::Matcher.new(@comparators, @score_set, :mean, 0.5)
|
29
|
+
observer = stub('observer')
|
30
|
+
observer.expects(:update).with(3, 4, 2.0 / 3)
|
31
|
+
observer.expects(:update).with(4, 5, 1.0)
|
32
|
+
matcher.add_observer(observer)
|
33
|
+
|
34
|
+
pairs = [
|
35
|
+
[1, 2, {1 => 1, 3 => 0}],
|
36
|
+
[2, 3, {1 => 0, 2 => 0, 3 => 0}],
|
37
|
+
[3, 4, {2 => 1, 3 => 1}],
|
38
|
+
[4, 5, {1 => 1, 2 => 1, 3 => 1}]
|
39
|
+
]
|
40
|
+
@score_set.expects(:each_pair).multiple_yields(*pairs)
|
41
|
+
|
42
|
+
matcher.run
|
43
|
+
end
|
44
|
+
end
|