linkage 0.1.0.pre → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +2 -0
- data/Guardfile +0 -1
- data/TODO +2 -0
- data/lib/linkage.rb +1 -0
- data/lib/linkage/comparator.rb +12 -2
- data/lib/linkage/comparators/strcompare.rb +68 -16
- data/lib/linkage/configuration.rb +112 -8
- data/lib/linkage/dataset.rb +124 -9
- data/lib/linkage/exceptions.rb +5 -0
- data/lib/linkage/field.rb +55 -18
- data/lib/linkage/field_set.rb +20 -0
- data/lib/linkage/helpers.rb +7 -0
- data/lib/linkage/helpers/csv.rb +28 -0
- data/lib/linkage/helpers/database.rb +47 -0
- data/lib/linkage/import_buffer.rb +3 -3
- data/lib/linkage/match_recorder.rb +4 -0
- data/lib/linkage/match_set.rb +51 -13
- data/lib/linkage/match_sets/csv.rb +36 -9
- data/lib/linkage/match_sets/database.rb +43 -2
- data/lib/linkage/matcher.rb +49 -3
- data/lib/linkage/result_set.rb +60 -22
- data/lib/linkage/result_sets/csv.rb +46 -28
- data/lib/linkage/result_sets/database.rb +44 -26
- data/lib/linkage/runner.rb +10 -0
- data/lib/linkage/score_recorder.rb +5 -0
- data/lib/linkage/score_set.rb +78 -20
- data/lib/linkage/score_sets/csv.rb +41 -15
- data/lib/linkage/score_sets/database.rb +43 -5
- data/lib/linkage/version.rb +1 -1
- data/linkage.gemspec +2 -0
- data/misc/uml/linkage.dia +0 -0
- data/misc/uml/linkage.png +0 -0
- data/misc/uml/linkage.svg +197 -0
- data/test/helper.rb +2 -11
- data/test/integration/test_database_result_set.rb +4 -2
- data/test/unit/comparators/test_strcompare.rb +29 -0
- data/test/unit/match_sets/test_csv.rb +44 -13
- data/test/unit/match_sets/test_database.rb +42 -1
- data/test/unit/result_sets/test_csv.rb +9 -69
- data/test/unit/result_sets/test_database.rb +20 -11
- data/test/unit/score_sets/test_csv.rb +68 -25
- data/test/unit/score_sets/test_database.rb +57 -1
- data/test/unit/test_comparator.rb +8 -0
- data/test/unit/test_configuration.rb +33 -6
- data/test/unit/test_dataset.rb +0 -7
- data/test/unit/test_matcher.rb +52 -3
- data/test/unit/test_result_set.rb +8 -14
- metadata +66 -32
@@ -2,20 +2,52 @@ require 'csv'
|
|
2
2
|
|
3
3
|
module Linkage
|
4
4
|
module ScoreSets
|
5
|
+
# {CSV ScoreSets::CSV} is an implementation of {ScoreSet} for saving scores
|
6
|
+
# in a CSV file.
|
7
|
+
#
|
8
|
+
# There are three options available:
|
9
|
+
#
|
10
|
+
# * `:filename` - which file to store scores in; can be an absolute path
|
11
|
+
# or relative path
|
12
|
+
# * `:dir` - which directory to put the file in; used if `:filename` is a
|
13
|
+
# relative path
|
14
|
+
# * `:overwrite` - indicate whether or not to overwrite an existing file
|
15
|
+
#
|
16
|
+
# By default, `:filename` is `'scores.csv'`, and the other options are
|
17
|
+
# blank. This means that it will write scores to the `'scores.csv'` file in
|
18
|
+
# the current working directory and will raise an error if the file already
|
19
|
+
# exists.
|
20
|
+
#
|
21
|
+
# If you specify `:dir`, that path will be created if it doesn't exist yet.
|
22
|
+
#
|
23
|
+
# The resulting file looks like this:
|
24
|
+
#
|
25
|
+
# comparator_id,id_1,id_2,score
|
26
|
+
# 1,123,456,1
|
27
|
+
# 1,124,457,0.5
|
28
|
+
# 2,123,456,0
|
29
|
+
#
|
30
|
+
# @see Helpers::CSV
|
5
31
|
class CSV < ScoreSet
|
6
|
-
|
7
|
-
|
8
|
-
|
32
|
+
include Linkage::Helpers::CSV
|
33
|
+
|
34
|
+
DEFAULT_OPTIONS = {
|
35
|
+
:filename => 'scores.csv'
|
36
|
+
}
|
37
|
+
|
38
|
+
# @param [Hash] options
|
39
|
+
# @option options [String] :filename
|
40
|
+
# @option options [String] :dir
|
41
|
+
# @option options [Boolean] :overwrite
|
42
|
+
def initialize(options = {})
|
43
|
+
@options = DEFAULT_OPTIONS.merge(options.reject { |k, v| v.nil? })
|
9
44
|
end
|
10
45
|
|
11
46
|
def open_for_reading
|
12
47
|
raise "already open for writing, try closing first" if @mode == :write
|
13
48
|
return if @mode == :read
|
14
49
|
|
15
|
-
|
16
|
-
raise MissingError, "#{@filename} does not exist"
|
17
|
-
end
|
18
|
-
@csv = ::CSV.open(@filename, 'rb', :headers => true)
|
50
|
+
@csv = open_csv_for_reading(@options)
|
19
51
|
@mode = :read
|
20
52
|
end
|
21
53
|
|
@@ -23,11 +55,7 @@ module Linkage
|
|
23
55
|
raise "already open for reading, try closing first" if @mode == :read
|
24
56
|
return if @mode == :write
|
25
57
|
|
26
|
-
|
27
|
-
raise ExistsError, "#{@filename} exists and not in overwrite mode"
|
28
|
-
end
|
29
|
-
|
30
|
-
@csv = ::CSV.open(@filename, 'wb')
|
58
|
+
@csv = open_csv_for_writing(@options)
|
31
59
|
@csv << %w{comparator_id id_1 id_2 score}
|
32
60
|
@mode = :write
|
33
61
|
end
|
@@ -38,7 +66,7 @@ module Linkage
|
|
38
66
|
end
|
39
67
|
|
40
68
|
def each_pair
|
41
|
-
|
69
|
+
raise "not in read mode" if @mode != :read
|
42
70
|
|
43
71
|
pairs = Hash.new { |h, k| h[k] = {} }
|
44
72
|
@csv.each do |row|
|
@@ -49,8 +77,6 @@ module Linkage
|
|
49
77
|
pairs.each_pair do |pair, scores|
|
50
78
|
yield pair[0], pair[1], scores
|
51
79
|
end
|
52
|
-
|
53
|
-
close
|
54
80
|
end
|
55
81
|
|
56
82
|
def close
|
@@ -1,8 +1,48 @@
|
|
1
1
|
module Linkage
|
2
2
|
module ScoreSets
|
3
|
+
# {Database ScoreSets::Database} is an implementation of {ScoreSet} for saving
|
4
|
+
# scores in a relational database.
|
5
|
+
#
|
6
|
+
# Scores are saved in a database table with the following columns:
|
7
|
+
# - comparator_id (integer)
|
8
|
+
# - id_1 (string)
|
9
|
+
# - id_2 (string)
|
10
|
+
# - score (float)
|
11
|
+
#
|
12
|
+
# You can setup a database connection in a few different ways. By default, a
|
13
|
+
# SQLite database with the filename of `scores.db` will be created in the
|
14
|
+
# current working directory. If you want something different, you can either
|
15
|
+
# specify a Sequel-style URI, provide connection options for
|
16
|
+
# `Sequel.connect`, or you can just specify a `Sequel::Database` object to
|
17
|
+
# use.
|
18
|
+
#
|
19
|
+
# There are a couple of non-Sequel connection options:
|
20
|
+
# * `:filename` - specify filename to use for a SQLite database
|
21
|
+
# * `:dir` - specify the parent directory for a SQLite database
|
22
|
+
#
|
23
|
+
# In addition to connection options, there are behavioral options you can
|
24
|
+
# set. By default, the table name used is called `scores`, but you change
|
25
|
+
# that by setting the `:table_name` option in the second options hash. If
|
26
|
+
# the table already exists, an {ExistsError} will be raised unless you set
|
27
|
+
# the `:overwrite` option to a truthy value in the second options hash.
|
3
28
|
class Database < ScoreSet
|
4
|
-
|
5
|
-
|
29
|
+
include Helpers::Database
|
30
|
+
|
31
|
+
DEFAULT_OPTIONS = {
|
32
|
+
:filename => 'scores.db'
|
33
|
+
}
|
34
|
+
|
35
|
+
# @override initialize(connection_options = {}, options = {})
|
36
|
+
# @param connection_options [Hash]
|
37
|
+
# @param options [Hash]
|
38
|
+
# @override initialize(uri, options = {})
|
39
|
+
# @param uri [String]
|
40
|
+
# @param options [Hash]
|
41
|
+
# @override initialize(database, options = {})
|
42
|
+
# @param database [Sequel::Database]
|
43
|
+
# @param options [Hash]
|
44
|
+
def initialize(connection_options = {}, options = {})
|
45
|
+
@database = database_connection(connection_options, DEFAULT_OPTIONS)
|
6
46
|
@table_name = options[:table_name] || :scores
|
7
47
|
@overwrite = options[:overwrite]
|
8
48
|
end
|
@@ -51,7 +91,7 @@ module Linkage
|
|
51
91
|
end
|
52
92
|
|
53
93
|
def each_pair
|
54
|
-
|
94
|
+
raise "not in read mode" if @mode != :read
|
55
95
|
|
56
96
|
current_pair = nil
|
57
97
|
@dataset.order(:id_1, :id_2, :comparator_id).each do |row|
|
@@ -63,8 +103,6 @@ module Linkage
|
|
63
103
|
scores[row[:comparator_id]] = row[:score]
|
64
104
|
end
|
65
105
|
yield(*current_pair) unless current_pair.nil?
|
66
|
-
|
67
|
-
close
|
68
106
|
end
|
69
107
|
|
70
108
|
def close
|
data/lib/linkage/version.rb
CHANGED
data/linkage.gemspec
CHANGED
@@ -28,6 +28,8 @@ Gem::Specification.new do |gem|
|
|
28
28
|
gem.add_development_dependency "mysql2"
|
29
29
|
gem.add_development_dependency "guard-test"
|
30
30
|
gem.add_development_dependency "guard-yard"
|
31
|
+
gem.add_development_dependency "redcarpet"
|
32
|
+
gem.add_development_dependency "yard-redcarpet-ext"
|
31
33
|
|
32
34
|
gem.required_ruby_version = '>= 1.9'
|
33
35
|
end
|
Binary file
|
Binary file
|
@@ -0,0 +1,197 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
2
|
+
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/PR-SVG-20010719/DTD/svg10.dtd">
|
3
|
+
<svg width="45cm" height="28cm" viewBox="10 8 891 557" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
4
|
+
<g>
|
5
|
+
<rect style="fill: #ffffff" x="386" y="413" width="102.4" height="28"/>
|
6
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="386" y="413" width="102.4" height="28"/>
|
7
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="437.2" y="432">Dataset</text>
|
8
|
+
<rect style="fill: #ffffff" x="386" y="441" width="102.4" height="20"/>
|
9
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="386" y="441" width="102.4" height="20"/>
|
10
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="389" y="455">+table_name</text>
|
11
|
+
<rect style="fill: #ffffff" x="386" y="461" width="102.4" height="20"/>
|
12
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="386" y="461" width="102.4" height="20"/>
|
13
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="389" y="475">+link_with()</text>
|
14
|
+
</g>
|
15
|
+
<g>
|
16
|
+
<rect style="fill: #ffffff" x="75" y="433" width="154.9" height="28"/>
|
17
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="75" y="433" width="154.9" height="28"/>
|
18
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="152.45" y="452">Sequel::Dataset</text>
|
19
|
+
</g>
|
20
|
+
<g>
|
21
|
+
<rect style="fill: #ffffff" x="574" y="500" width="84.25" height="28"/>
|
22
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="574" y="500" width="84.25" height="28"/>
|
23
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="616.125" y="519">FieldSet</text>
|
24
|
+
</g>
|
25
|
+
<g>
|
26
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="514.577,447 542.201,447 542.201,514 573.008,514 "/>
|
27
|
+
<polygon style="fill: #000000" points="489.405,447 503.405,442.2 517.405,447 503.405,451.8 "/>
|
28
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="489.405,447 503.405,442.2 517.405,447 503.405,451.8 "/>
|
29
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="521.405" y="443"/>
|
30
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:end;font-family:monospace;font-style:normal;font-weight:normal" x="569.008" y="510"/>
|
31
|
+
</g>
|
32
|
+
<g>
|
33
|
+
<rect style="fill: #ffffff" x="774" y="464" width="125.5" height="28"/>
|
34
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="774" y="464" width="125.5" height="28"/>
|
35
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="836.75" y="483">Field</text>
|
36
|
+
<rect style="fill: #ffffff" x="774" y="492" width="125.5" height="36"/>
|
37
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="774" y="492" width="125.5" height="36"/>
|
38
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="777" y="506">+name</text>
|
39
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="777" y="522">+schema</text>
|
40
|
+
<rect style="fill: #ffffff" x="774" y="528" width="125.5" height="36"/>
|
41
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="774" y="528" width="125.5" height="36"/>
|
42
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="777" y="542">+ruby_type()</text>
|
43
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="777" y="558">+primary_key?()</text>
|
44
|
+
</g>
|
45
|
+
<g>
|
46
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="684.427,514 674.255,514 771.992,514 772.992,514 "/>
|
47
|
+
<polygon style="fill: #ffffff" points="659.255,514 673.255,509.2 687.255,514 673.255,518.8 "/>
|
48
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="659.255,514 673.255,509.2 687.255,514 673.255,518.8 "/>
|
49
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="691.255" y="510"/>
|
50
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:end;font-family:monospace;font-style:normal;font-weight:normal" x="768.992" y="510">1..*</text>
|
51
|
+
</g>
|
52
|
+
<g>
|
53
|
+
<rect style="fill: #ffffff" x="11" y="237" width="97.2" height="28"/>
|
54
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="11" y="237" width="97.2" height="28"/>
|
55
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="59.6" y="256">ResultSet</text>
|
56
|
+
</g>
|
57
|
+
<g>
|
58
|
+
<rect style="fill: #ffffff" x="332" y="9" width="102.4" height="28"/>
|
59
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="332" y="9" width="102.4" height="28"/>
|
60
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="383.2" y="28">ScoreSet</text>
|
61
|
+
<rect style="fill: #ffffff" x="332" y="37" width="102.4" height="36"/>
|
62
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="332" y="37" width="102.4" height="36"/>
|
63
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="335" y="51">+add_score()</text>
|
64
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="335" y="67">+each_pair()</text>
|
65
|
+
</g>
|
66
|
+
<g>
|
67
|
+
<rect style="fill: #ffffff" x="168.6" y="226.3" width="102.4" height="28"/>
|
68
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="168.6" y="226.3" width="102.4" height="28"/>
|
69
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="219.8" y="245.3">MatchSet</text>
|
70
|
+
<rect style="fill: #ffffff" x="168.6" y="254.3" width="102.4" height="20"/>
|
71
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="168.6" y="254.3" width="102.4" height="20"/>
|
72
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="171.6" y="268.3">+add_match()</text>
|
73
|
+
</g>
|
74
|
+
<g>
|
75
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="59.6,211.828 59.6,48.75 332,48.75 332,47 "/>
|
76
|
+
<polygon style="fill: #ffffff" points="59.6,237 54.8,223 59.6,209 64.4,223 "/>
|
77
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="59.6,237 54.8,223 59.6,209 64.4,223 "/>
|
78
|
+
</g>
|
79
|
+
<g>
|
80
|
+
<rect style="fill: #ffffff" x="434.002" y="234" width="133.1" height="28"/>
|
81
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="434.002" y="234" width="133.1" height="28"/>
|
82
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="500.552" y="253">Configuration</text>
|
83
|
+
</g>
|
84
|
+
<g>
|
85
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,288.157 500.552,315.75 59.6,315.75 59.6,266.002 "/>
|
86
|
+
<polygon style="fill: #ffffff" points="500.552,262.986 505.352,276.986 500.552,290.986 495.752,276.986 "/>
|
87
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,262.986 505.352,276.986 500.552,290.986 495.752,276.986 "/>
|
88
|
+
</g>
|
89
|
+
<g>
|
90
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,288.179 500.552,344.499 437.2,344.499 437.2,411.991 "/>
|
91
|
+
<polygon style="fill: #ffffff" points="500.552,263.007 505.352,277.007 500.552,291.007 495.752,277.007 "/>
|
92
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,263.007 505.352,277.007 500.552,291.007 495.752,277.007 "/>
|
93
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="441.2" y="407.991">1..2</text>
|
94
|
+
</g>
|
95
|
+
<g>
|
96
|
+
<rect style="fill: #ffffff" x="699.002" y="189" width="116.9" height="28"/>
|
97
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="699.002" y="189" width="116.9" height="28"/>
|
98
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="757.452" y="208">Comparator</text>
|
99
|
+
<rect style="fill: #ffffff" x="699.002" y="217" width="116.9" height="36"/>
|
100
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="699.002" y="217" width="116.9" height="36"/>
|
101
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="702.002" y="231">+weight</text>
|
102
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="702.002" y="247">+type</text>
|
103
|
+
<rect style="fill: #ffffff" x="699.002" y="253" width="116.9" height="20"/>
|
104
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="699.002" y="253" width="116.9" height="20"/>
|
105
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="702.002" y="267">+score()</text>
|
106
|
+
</g>
|
107
|
+
<g>
|
108
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="593.282,248 640.052,248 640.052,231 697.995,231 "/>
|
109
|
+
<polygon style="fill: #000000" points="568.11,248 582.11,243.2 596.11,248 582.11,252.8 "/>
|
110
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="568.11,248 582.11,243.2 596.11,248 582.11,252.8 "/>
|
111
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:end;font-family:monospace;font-style:normal;font-weight:normal" x="693.995" y="227">1..*</text>
|
112
|
+
</g>
|
113
|
+
<g>
|
114
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="757.452,299.177 757.452,375.5 836.75,375.5 836.75,462.994 "/>
|
115
|
+
<polygon style="fill: #ffffff" points="757.452,274.005 762.252,288.005 757.452,302.005 752.652,288.005 "/>
|
116
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="757.452,274.005 762.252,288.005 757.452,302.005 752.652,288.005 "/>
|
117
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="840.75" y="458.994">2</text>
|
118
|
+
</g>
|
119
|
+
<g>
|
120
|
+
<rect style="fill: #ffffff" x="498.002" y="102" width="142.5" height="28"/>
|
121
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="498.002" y="102" width="142.5" height="28"/>
|
122
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="569.252" y="121">ScoreRecorder</text>
|
123
|
+
</g>
|
124
|
+
<g>
|
125
|
+
<line style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x1="608.305" y1="139.864" x2="698.042" y2="194.698"/>
|
126
|
+
<polygon style="fill: #000000" points="610.913,135.597 594.653,131.521 605.698,144.13 "/>
|
127
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="610.913,135.597 594.653,131.521 605.698,144.13 "/>
|
128
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="645.921" y="172.849">update</text>
|
129
|
+
</g>
|
130
|
+
<g>
|
131
|
+
<line style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x1="451.169" y1="68.3993" x2="532.096" y2="101.022"/>
|
132
|
+
<polygon style="fill: #000000" points="453.039,63.7619 436.33,62.4173 449.3,73.0367 "/>
|
133
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="453.039,63.7619 436.33,62.4173 449.3,73.0367 "/>
|
134
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="483.749" y="91.5327">add_score</text>
|
135
|
+
</g>
|
136
|
+
<g>
|
137
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,207.821 500.552,175 569.252,175 569.252,131.007 "/>
|
138
|
+
<polygon style="fill: #000000" points="500.552,232.993 495.752,218.993 500.552,204.993 505.352,218.993 "/>
|
139
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,232.993 495.752,218.993 500.552,204.993 505.352,218.993 "/>
|
140
|
+
</g>
|
141
|
+
<g>
|
142
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="134.378,251 145.4,251 145.4,250.3 167.594,250.3 "/>
|
143
|
+
<polygon style="fill: #ffffff" points="109.206,251 123.206,246.2 137.206,251 123.206,255.8 "/>
|
144
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="109.206,251 123.206,246.2 137.206,251 123.206,255.8 "/>
|
145
|
+
</g>
|
146
|
+
<g>
|
147
|
+
<rect style="fill: #ffffff" x="316.002" y="209.75" width="84" height="28"/>
|
148
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="316.002" y="209.75" width="84" height="28"/>
|
149
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="358.002" y="228.75">Matcher</text>
|
150
|
+
</g>
|
151
|
+
<g>
|
152
|
+
<rect style="fill: #ffffff" x="240.002" y="124.75" width="147" height="28"/>
|
153
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="240.002" y="124.75" width="147" height="28"/>
|
154
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="313.502" y="143.75">MatchRecorder</text>
|
155
|
+
</g>
|
156
|
+
<g>
|
157
|
+
<line style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x1="329.236" y1="168.804" x2="350.153" y2="208.757"/>
|
158
|
+
<polygon style="fill: #000000" points="333.666,166.485 321.815,154.629 324.807,171.123 "/>
|
159
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="333.666,166.485 321.815,154.629 324.807,171.123 "/>
|
160
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="335.752" y="191.25">update</text>
|
161
|
+
</g>
|
162
|
+
<g>
|
163
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="408.779,248 377.002,248 377.002,237.75 358.002,237.75 "/>
|
164
|
+
<polygon style="fill: #000000" points="433.951,248 419.951,252.8 405.951,248 419.951,243.2 "/>
|
165
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="433.951,248 419.951,252.8 405.951,248 419.951,243.2 "/>
|
166
|
+
</g>
|
167
|
+
<g>
|
168
|
+
<line style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x1="251.712" y1="212.31" x2="300.909" y2="153.742"/>
|
169
|
+
<polygon style="fill: #000000" points="247.883,209.094 241.421,224.561 255.541,215.526 "/>
|
170
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="247.883,209.094 241.421,224.561 255.541,215.526 "/>
|
171
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="270.843" y="199.534">add_match</text>
|
172
|
+
</g>
|
173
|
+
<g>
|
174
|
+
<rect style="fill: #ffffff" x="595.002" y="347.75" width="87" height="28"/>
|
175
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="595.002" y="347.75" width="87" height="28"/>
|
176
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="638.502" y="366.75">Runner</text>
|
177
|
+
<rect style="fill: #ffffff" x="595.002" y="375.75" width="87" height="20"/>
|
178
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="595.002" y="375.75" width="87" height="20"/>
|
179
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="598.002" y="389.75">+execute()</text>
|
180
|
+
</g>
|
181
|
+
<g>
|
182
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="638.502,321.595 638.502,294.75 567.102,294.75 567.102,262 "/>
|
183
|
+
<polygon style="fill: #ffffff" points="638.502,346.766 633.702,332.766 638.502,318.766 643.302,332.766 "/>
|
184
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="638.502,346.766 633.702,332.766 638.502,318.766 643.302,332.766 "/>
|
185
|
+
</g>
|
186
|
+
<g>
|
187
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="434.002,208.828 434.002,186.375 387.002,186.375 387.002,152.75 "/>
|
188
|
+
<polygon style="fill: #000000" points="434.002,234 429.202,220 434.002,206 438.802,220 "/>
|
189
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="434.002,234 429.202,220 434.002,206 438.802,220 "/>
|
190
|
+
</g>
|
191
|
+
<g>
|
192
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="359.855,447 294.447,447 294.447,447 229.9,447 "/>
|
193
|
+
<polygon style="fill: #ffffff" points="385.027,447 371.027,451.8 357.027,447 371.027,442.2 "/>
|
194
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="385.027,447 371.027,451.8 357.027,447 371.027,442.2 "/>
|
195
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:monospace;font-style:normal;font-weight:normal" x="294.447" y="443"><<delegate>></text>
|
196
|
+
</g>
|
197
|
+
</svg>
|
data/test/helper.rb
CHANGED
@@ -38,13 +38,8 @@ class Test::Unit::TestCase
|
|
38
38
|
@@database_config
|
39
39
|
end
|
40
40
|
|
41
|
-
def
|
42
|
-
|
43
|
-
f.stubs({:static? => false}.merge(options))
|
44
|
-
if block
|
45
|
-
f.send(:instance_eval, &block)
|
46
|
-
end
|
47
|
-
f
|
41
|
+
def stub_dataset(options = {}, &block)
|
42
|
+
stub_instance(Linkage::Dataset, options, &block)
|
48
43
|
end
|
49
44
|
|
50
45
|
def stub_instance(klass, options = {}, &block)
|
@@ -89,10 +84,6 @@ class Test::Unit::TestCase
|
|
89
84
|
|
90
85
|
def new_result_set(&block)
|
91
86
|
klass = Class.new(Linkage::ResultSet)
|
92
|
-
klass.send(:define_method, :score_set) do
|
93
|
-
end
|
94
|
-
klass.send(:define_method, :match_set) do
|
95
|
-
end
|
96
87
|
if block_given?
|
97
88
|
klass.class_eval(&block)
|
98
89
|
end
|
@@ -41,14 +41,16 @@ class IntegrationTests::TestDatabaseResultSet < Test::Unit::TestCase
|
|
41
41
|
db[:scores].order(:id_1, :id_2).each do |row|
|
42
42
|
assert_equal row[:id_1], row[:id_2]
|
43
43
|
assert_equal 1, row[:comparator_id]
|
44
|
-
|
44
|
+
assert_kind_of Float, row[:score]
|
45
|
+
assert_equal 1.0, row[:score]
|
45
46
|
end
|
46
47
|
|
47
48
|
assert db.table_exists?(:matches)
|
48
49
|
assert_equal 10, db[:matches].count
|
49
50
|
db[:matches].order(:id_1, :id_2).each do |row|
|
50
51
|
assert_equal row[:id_1], row[:id_2]
|
51
|
-
|
52
|
+
assert_kind_of Float, row[:score]
|
53
|
+
assert_equal 1.0, row[:score]
|
52
54
|
end
|
53
55
|
end
|
54
56
|
end
|
@@ -36,6 +36,35 @@ class UnitTests::TestComparators::TestStrcompare < Test::Unit::TestCase
|
|
36
36
|
assert_equal 0.961, comp.score({:foo => 'martha'}, {:bar => 'marhta'})
|
37
37
|
assert_equal 0.840, comp.score({:foo => 'dwayne'}, {:bar => 'duane'})
|
38
38
|
assert_equal 0.813, comp.score({:foo => 'dixon'}, {:bar => 'dicksonx'})
|
39
|
+
assert_equal 0.783, comp.score({:foo => 'erin'}, {:bar => 'afrin'})
|
40
|
+
assert_equal 0.939, comp.score({:foo => 'KEICHENBERG'}, {:bar => 'REICHENBERG'})
|
41
|
+
assert_equal 0.823, comp.score({:foo => 'KEICHENBERG'}, {:bar => 'EEICHENBERG'})
|
42
|
+
assert_equal 0.775, comp.score({:foo => 'airplane'}, {:bar => 'plane'})
|
43
|
+
assert_equal 0, comp.score({:foo => 'cat'}, {:bar => 'dog'})
|
44
|
+
end
|
45
|
+
|
46
|
+
test "score for reverse-jarowinkler" do
|
47
|
+
field_1 = stub('field 1', :name => :foo, :ruby_type => { :type => String })
|
48
|
+
field_2 = stub('field 2', :name => :bar, :ruby_type => { :type => String })
|
49
|
+
comp = Strcompare.new(field_1, field_2, :reverse_jarowinkler)
|
50
|
+
assert_equal 0.950, comp.score({:foo => 'martha'}, {:bar => 'marhta'})
|
51
|
+
assert_equal 0.858, comp.score({:foo => 'dwayne'}, {:bar => 'duane'})
|
52
|
+
assert_equal 0.775, comp.score({:foo => 'dixon'}, {:bar => 'dicksonx'})
|
53
|
+
assert_equal 0.848, comp.score({:foo => 'erin'}, {:bar => 'afrin'})
|
54
|
+
assert_equal 0.964, comp.score({:foo => 'KEICHENBERG'}, {:bar => 'REICHENBERG'})
|
55
|
+
assert_equal 0.964, comp.score({:foo => 'KEICHENBERG'}, {:bar => 'EEICHENBERG'})
|
56
|
+
assert_equal 0.925, comp.score({:foo => 'airplane'}, {:bar => 'plane'})
|
57
|
+
assert_equal 0, comp.score({:foo => 'cat'}, {:bar => 'dog'})
|
58
|
+
end
|
59
|
+
|
60
|
+
test "score for damerau-levenshtein" do
|
61
|
+
field_1 = stub('field 1', :name => :foo, :ruby_type => { :type => String })
|
62
|
+
field_2 = stub('field 2', :name => :bar, :ruby_type => { :type => String })
|
63
|
+
comp = Strcompare.new(field_1, field_2, :damerau_levenshtein)
|
64
|
+
assert_equal 0.833, comp.score({:foo => 'martha'}, {:bar => 'marhta'})
|
65
|
+
assert_equal 0.750, comp.score({:foo => 'dwayne'}, {:bar => 'duane'})
|
66
|
+
assert_equal 0.688, comp.score({:foo => 'dixon'}, {:bar => 'dicksonx'})
|
67
|
+
assert_equal 0.889, comp.score({:foo => 'perfect'}, {:bar => 'perfect10'})
|
39
68
|
assert_equal 0, comp.score({:foo => 'cat'}, {:bar => 'dog'})
|
40
69
|
end
|
41
70
|
|