linkage 0.1.0.pre → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +2 -0
- data/Guardfile +0 -1
- data/TODO +2 -0
- data/lib/linkage.rb +1 -0
- data/lib/linkage/comparator.rb +12 -2
- data/lib/linkage/comparators/strcompare.rb +68 -16
- data/lib/linkage/configuration.rb +112 -8
- data/lib/linkage/dataset.rb +124 -9
- data/lib/linkage/exceptions.rb +5 -0
- data/lib/linkage/field.rb +55 -18
- data/lib/linkage/field_set.rb +20 -0
- data/lib/linkage/helpers.rb +7 -0
- data/lib/linkage/helpers/csv.rb +28 -0
- data/lib/linkage/helpers/database.rb +47 -0
- data/lib/linkage/import_buffer.rb +3 -3
- data/lib/linkage/match_recorder.rb +4 -0
- data/lib/linkage/match_set.rb +51 -13
- data/lib/linkage/match_sets/csv.rb +36 -9
- data/lib/linkage/match_sets/database.rb +43 -2
- data/lib/linkage/matcher.rb +49 -3
- data/lib/linkage/result_set.rb +60 -22
- data/lib/linkage/result_sets/csv.rb +46 -28
- data/lib/linkage/result_sets/database.rb +44 -26
- data/lib/linkage/runner.rb +10 -0
- data/lib/linkage/score_recorder.rb +5 -0
- data/lib/linkage/score_set.rb +78 -20
- data/lib/linkage/score_sets/csv.rb +41 -15
- data/lib/linkage/score_sets/database.rb +43 -5
- data/lib/linkage/version.rb +1 -1
- data/linkage.gemspec +2 -0
- data/misc/uml/linkage.dia +0 -0
- data/misc/uml/linkage.png +0 -0
- data/misc/uml/linkage.svg +197 -0
- data/test/helper.rb +2 -11
- data/test/integration/test_database_result_set.rb +4 -2
- data/test/unit/comparators/test_strcompare.rb +29 -0
- data/test/unit/match_sets/test_csv.rb +44 -13
- data/test/unit/match_sets/test_database.rb +42 -1
- data/test/unit/result_sets/test_csv.rb +9 -69
- data/test/unit/result_sets/test_database.rb +20 -11
- data/test/unit/score_sets/test_csv.rb +68 -25
- data/test/unit/score_sets/test_database.rb +57 -1
- data/test/unit/test_comparator.rb +8 -0
- data/test/unit/test_configuration.rb +33 -6
- data/test/unit/test_dataset.rb +0 -7
- data/test/unit/test_matcher.rb +52 -3
- data/test/unit/test_result_set.rb +8 -14
- metadata +66 -32
@@ -2,20 +2,52 @@ require 'csv'
|
|
2
2
|
|
3
3
|
module Linkage
|
4
4
|
module ScoreSets
|
5
|
+
# {CSV ScoreSets::CSV} is an implementation of {ScoreSet} for saving scores
|
6
|
+
# in a CSV file.
|
7
|
+
#
|
8
|
+
# There are three options available:
|
9
|
+
#
|
10
|
+
# * `:filename` - which file to store scores in; can be an absolute path
|
11
|
+
# or relative path
|
12
|
+
# * `:dir` - which directory to put the file in; used if `:filename` is a
|
13
|
+
# relative path
|
14
|
+
# * `:overwrite` - indicate whether or not to overwrite an existing file
|
15
|
+
#
|
16
|
+
# By default, `:filename` is `'scores.csv'`, and the other options are
|
17
|
+
# blank. This means that it will write scores to the `'scores.csv'` file in
|
18
|
+
# the current working directory and will raise an error if the file already
|
19
|
+
# exists.
|
20
|
+
#
|
21
|
+
# If you specify `:dir`, that path will be created if it doesn't exist yet.
|
22
|
+
#
|
23
|
+
# The resulting file looks like this:
|
24
|
+
#
|
25
|
+
# comparator_id,id_1,id_2,score
|
26
|
+
# 1,123,456,1
|
27
|
+
# 1,124,457,0.5
|
28
|
+
# 2,123,456,0
|
29
|
+
#
|
30
|
+
# @see Helpers::CSV
|
5
31
|
class CSV < ScoreSet
|
6
|
-
|
7
|
-
|
8
|
-
|
32
|
+
include Linkage::Helpers::CSV
|
33
|
+
|
34
|
+
DEFAULT_OPTIONS = {
|
35
|
+
:filename => 'scores.csv'
|
36
|
+
}
|
37
|
+
|
38
|
+
# @param [Hash] options
|
39
|
+
# @option options [String] :filename
|
40
|
+
# @option options [String] :dir
|
41
|
+
# @option options [Boolean] :overwrite
|
42
|
+
def initialize(options = {})
|
43
|
+
@options = DEFAULT_OPTIONS.merge(options.reject { |k, v| v.nil? })
|
9
44
|
end
|
10
45
|
|
11
46
|
def open_for_reading
|
12
47
|
raise "already open for writing, try closing first" if @mode == :write
|
13
48
|
return if @mode == :read
|
14
49
|
|
15
|
-
|
16
|
-
raise MissingError, "#{@filename} does not exist"
|
17
|
-
end
|
18
|
-
@csv = ::CSV.open(@filename, 'rb', :headers => true)
|
50
|
+
@csv = open_csv_for_reading(@options)
|
19
51
|
@mode = :read
|
20
52
|
end
|
21
53
|
|
@@ -23,11 +55,7 @@ module Linkage
|
|
23
55
|
raise "already open for reading, try closing first" if @mode == :read
|
24
56
|
return if @mode == :write
|
25
57
|
|
26
|
-
|
27
|
-
raise ExistsError, "#{@filename} exists and not in overwrite mode"
|
28
|
-
end
|
29
|
-
|
30
|
-
@csv = ::CSV.open(@filename, 'wb')
|
58
|
+
@csv = open_csv_for_writing(@options)
|
31
59
|
@csv << %w{comparator_id id_1 id_2 score}
|
32
60
|
@mode = :write
|
33
61
|
end
|
@@ -38,7 +66,7 @@ module Linkage
|
|
38
66
|
end
|
39
67
|
|
40
68
|
def each_pair
|
41
|
-
|
69
|
+
raise "not in read mode" if @mode != :read
|
42
70
|
|
43
71
|
pairs = Hash.new { |h, k| h[k] = {} }
|
44
72
|
@csv.each do |row|
|
@@ -49,8 +77,6 @@ module Linkage
|
|
49
77
|
pairs.each_pair do |pair, scores|
|
50
78
|
yield pair[0], pair[1], scores
|
51
79
|
end
|
52
|
-
|
53
|
-
close
|
54
80
|
end
|
55
81
|
|
56
82
|
def close
|
@@ -1,8 +1,48 @@
|
|
1
1
|
module Linkage
|
2
2
|
module ScoreSets
|
3
|
+
# {Database ScoreSets::Database} is an implementation of {ScoreSet} for saving
|
4
|
+
# scores in a relational database.
|
5
|
+
#
|
6
|
+
# Scores are saved in a database table with the following columns:
|
7
|
+
# - comparator_id (integer)
|
8
|
+
# - id_1 (string)
|
9
|
+
# - id_2 (string)
|
10
|
+
# - score (float)
|
11
|
+
#
|
12
|
+
# You can setup a database connection in a few different ways. By default, a
|
13
|
+
# SQLite database with the filename of `scores.db` will be created in the
|
14
|
+
# current working directory. If you want something different, you can either
|
15
|
+
# specify a Sequel-style URI, provide connection options for
|
16
|
+
# `Sequel.connect`, or you can just specify a `Sequel::Database` object to
|
17
|
+
# use.
|
18
|
+
#
|
19
|
+
# There are a couple of non-Sequel connection options:
|
20
|
+
# * `:filename` - specify filename to use for a SQLite database
|
21
|
+
# * `:dir` - specify the parent directory for a SQLite database
|
22
|
+
#
|
23
|
+
# In addition to connection options, there are behavioral options you can
|
24
|
+
# set. By default, the table name used is called `scores`, but you change
|
25
|
+
# that by setting the `:table_name` option in the second options hash. If
|
26
|
+
# the table already exists, an {ExistsError} will be raised unless you set
|
27
|
+
# the `:overwrite` option to a truthy value in the second options hash.
|
3
28
|
class Database < ScoreSet
|
4
|
-
|
5
|
-
|
29
|
+
include Helpers::Database
|
30
|
+
|
31
|
+
DEFAULT_OPTIONS = {
|
32
|
+
:filename => 'scores.db'
|
33
|
+
}
|
34
|
+
|
35
|
+
# @override initialize(connection_options = {}, options = {})
|
36
|
+
# @param connection_options [Hash]
|
37
|
+
# @param options [Hash]
|
38
|
+
# @override initialize(uri, options = {})
|
39
|
+
# @param uri [String]
|
40
|
+
# @param options [Hash]
|
41
|
+
# @override initialize(database, options = {})
|
42
|
+
# @param database [Sequel::Database]
|
43
|
+
# @param options [Hash]
|
44
|
+
def initialize(connection_options = {}, options = {})
|
45
|
+
@database = database_connection(connection_options, DEFAULT_OPTIONS)
|
6
46
|
@table_name = options[:table_name] || :scores
|
7
47
|
@overwrite = options[:overwrite]
|
8
48
|
end
|
@@ -51,7 +91,7 @@ module Linkage
|
|
51
91
|
end
|
52
92
|
|
53
93
|
def each_pair
|
54
|
-
|
94
|
+
raise "not in read mode" if @mode != :read
|
55
95
|
|
56
96
|
current_pair = nil
|
57
97
|
@dataset.order(:id_1, :id_2, :comparator_id).each do |row|
|
@@ -63,8 +103,6 @@ module Linkage
|
|
63
103
|
scores[row[:comparator_id]] = row[:score]
|
64
104
|
end
|
65
105
|
yield(*current_pair) unless current_pair.nil?
|
66
|
-
|
67
|
-
close
|
68
106
|
end
|
69
107
|
|
70
108
|
def close
|
data/lib/linkage/version.rb
CHANGED
data/linkage.gemspec
CHANGED
@@ -28,6 +28,8 @@ Gem::Specification.new do |gem|
|
|
28
28
|
gem.add_development_dependency "mysql2"
|
29
29
|
gem.add_development_dependency "guard-test"
|
30
30
|
gem.add_development_dependency "guard-yard"
|
31
|
+
gem.add_development_dependency "redcarpet"
|
32
|
+
gem.add_development_dependency "yard-redcarpet-ext"
|
31
33
|
|
32
34
|
gem.required_ruby_version = '>= 1.9'
|
33
35
|
end
|
Binary file
|
Binary file
|
@@ -0,0 +1,197 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
2
|
+
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/PR-SVG-20010719/DTD/svg10.dtd">
|
3
|
+
<svg width="45cm" height="28cm" viewBox="10 8 891 557" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
4
|
+
<g>
|
5
|
+
<rect style="fill: #ffffff" x="386" y="413" width="102.4" height="28"/>
|
6
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="386" y="413" width="102.4" height="28"/>
|
7
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="437.2" y="432">Dataset</text>
|
8
|
+
<rect style="fill: #ffffff" x="386" y="441" width="102.4" height="20"/>
|
9
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="386" y="441" width="102.4" height="20"/>
|
10
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="389" y="455">+table_name</text>
|
11
|
+
<rect style="fill: #ffffff" x="386" y="461" width="102.4" height="20"/>
|
12
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="386" y="461" width="102.4" height="20"/>
|
13
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="389" y="475">+link_with()</text>
|
14
|
+
</g>
|
15
|
+
<g>
|
16
|
+
<rect style="fill: #ffffff" x="75" y="433" width="154.9" height="28"/>
|
17
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="75" y="433" width="154.9" height="28"/>
|
18
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="152.45" y="452">Sequel::Dataset</text>
|
19
|
+
</g>
|
20
|
+
<g>
|
21
|
+
<rect style="fill: #ffffff" x="574" y="500" width="84.25" height="28"/>
|
22
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="574" y="500" width="84.25" height="28"/>
|
23
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="616.125" y="519">FieldSet</text>
|
24
|
+
</g>
|
25
|
+
<g>
|
26
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="514.577,447 542.201,447 542.201,514 573.008,514 "/>
|
27
|
+
<polygon style="fill: #000000" points="489.405,447 503.405,442.2 517.405,447 503.405,451.8 "/>
|
28
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="489.405,447 503.405,442.2 517.405,447 503.405,451.8 "/>
|
29
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="521.405" y="443"/>
|
30
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:end;font-family:monospace;font-style:normal;font-weight:normal" x="569.008" y="510"/>
|
31
|
+
</g>
|
32
|
+
<g>
|
33
|
+
<rect style="fill: #ffffff" x="774" y="464" width="125.5" height="28"/>
|
34
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="774" y="464" width="125.5" height="28"/>
|
35
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="836.75" y="483">Field</text>
|
36
|
+
<rect style="fill: #ffffff" x="774" y="492" width="125.5" height="36"/>
|
37
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="774" y="492" width="125.5" height="36"/>
|
38
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="777" y="506">+name</text>
|
39
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="777" y="522">+schema</text>
|
40
|
+
<rect style="fill: #ffffff" x="774" y="528" width="125.5" height="36"/>
|
41
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="774" y="528" width="125.5" height="36"/>
|
42
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="777" y="542">+ruby_type()</text>
|
43
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="777" y="558">+primary_key?()</text>
|
44
|
+
</g>
|
45
|
+
<g>
|
46
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="684.427,514 674.255,514 771.992,514 772.992,514 "/>
|
47
|
+
<polygon style="fill: #ffffff" points="659.255,514 673.255,509.2 687.255,514 673.255,518.8 "/>
|
48
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="659.255,514 673.255,509.2 687.255,514 673.255,518.8 "/>
|
49
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="691.255" y="510"/>
|
50
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:end;font-family:monospace;font-style:normal;font-weight:normal" x="768.992" y="510">1..*</text>
|
51
|
+
</g>
|
52
|
+
<g>
|
53
|
+
<rect style="fill: #ffffff" x="11" y="237" width="97.2" height="28"/>
|
54
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="11" y="237" width="97.2" height="28"/>
|
55
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="59.6" y="256">ResultSet</text>
|
56
|
+
</g>
|
57
|
+
<g>
|
58
|
+
<rect style="fill: #ffffff" x="332" y="9" width="102.4" height="28"/>
|
59
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="332" y="9" width="102.4" height="28"/>
|
60
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="383.2" y="28">ScoreSet</text>
|
61
|
+
<rect style="fill: #ffffff" x="332" y="37" width="102.4" height="36"/>
|
62
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="332" y="37" width="102.4" height="36"/>
|
63
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="335" y="51">+add_score()</text>
|
64
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="335" y="67">+each_pair()</text>
|
65
|
+
</g>
|
66
|
+
<g>
|
67
|
+
<rect style="fill: #ffffff" x="168.6" y="226.3" width="102.4" height="28"/>
|
68
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="168.6" y="226.3" width="102.4" height="28"/>
|
69
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="219.8" y="245.3">MatchSet</text>
|
70
|
+
<rect style="fill: #ffffff" x="168.6" y="254.3" width="102.4" height="20"/>
|
71
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="168.6" y="254.3" width="102.4" height="20"/>
|
72
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="171.6" y="268.3">+add_match()</text>
|
73
|
+
</g>
|
74
|
+
<g>
|
75
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="59.6,211.828 59.6,48.75 332,48.75 332,47 "/>
|
76
|
+
<polygon style="fill: #ffffff" points="59.6,237 54.8,223 59.6,209 64.4,223 "/>
|
77
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="59.6,237 54.8,223 59.6,209 64.4,223 "/>
|
78
|
+
</g>
|
79
|
+
<g>
|
80
|
+
<rect style="fill: #ffffff" x="434.002" y="234" width="133.1" height="28"/>
|
81
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="434.002" y="234" width="133.1" height="28"/>
|
82
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="500.552" y="253">Configuration</text>
|
83
|
+
</g>
|
84
|
+
<g>
|
85
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,288.157 500.552,315.75 59.6,315.75 59.6,266.002 "/>
|
86
|
+
<polygon style="fill: #ffffff" points="500.552,262.986 505.352,276.986 500.552,290.986 495.752,276.986 "/>
|
87
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,262.986 505.352,276.986 500.552,290.986 495.752,276.986 "/>
|
88
|
+
</g>
|
89
|
+
<g>
|
90
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,288.179 500.552,344.499 437.2,344.499 437.2,411.991 "/>
|
91
|
+
<polygon style="fill: #ffffff" points="500.552,263.007 505.352,277.007 500.552,291.007 495.752,277.007 "/>
|
92
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,263.007 505.352,277.007 500.552,291.007 495.752,277.007 "/>
|
93
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="441.2" y="407.991">1..2</text>
|
94
|
+
</g>
|
95
|
+
<g>
|
96
|
+
<rect style="fill: #ffffff" x="699.002" y="189" width="116.9" height="28"/>
|
97
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="699.002" y="189" width="116.9" height="28"/>
|
98
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="757.452" y="208">Comparator</text>
|
99
|
+
<rect style="fill: #ffffff" x="699.002" y="217" width="116.9" height="36"/>
|
100
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="699.002" y="217" width="116.9" height="36"/>
|
101
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="702.002" y="231">+weight</text>
|
102
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="702.002" y="247">+type</text>
|
103
|
+
<rect style="fill: #ffffff" x="699.002" y="253" width="116.9" height="20"/>
|
104
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="699.002" y="253" width="116.9" height="20"/>
|
105
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="702.002" y="267">+score()</text>
|
106
|
+
</g>
|
107
|
+
<g>
|
108
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="593.282,248 640.052,248 640.052,231 697.995,231 "/>
|
109
|
+
<polygon style="fill: #000000" points="568.11,248 582.11,243.2 596.11,248 582.11,252.8 "/>
|
110
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="568.11,248 582.11,243.2 596.11,248 582.11,252.8 "/>
|
111
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:end;font-family:monospace;font-style:normal;font-weight:normal" x="693.995" y="227">1..*</text>
|
112
|
+
</g>
|
113
|
+
<g>
|
114
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="757.452,299.177 757.452,375.5 836.75,375.5 836.75,462.994 "/>
|
115
|
+
<polygon style="fill: #ffffff" points="757.452,274.005 762.252,288.005 757.452,302.005 752.652,288.005 "/>
|
116
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="757.452,274.005 762.252,288.005 757.452,302.005 752.652,288.005 "/>
|
117
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="840.75" y="458.994">2</text>
|
118
|
+
</g>
|
119
|
+
<g>
|
120
|
+
<rect style="fill: #ffffff" x="498.002" y="102" width="142.5" height="28"/>
|
121
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="498.002" y="102" width="142.5" height="28"/>
|
122
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="569.252" y="121">ScoreRecorder</text>
|
123
|
+
</g>
|
124
|
+
<g>
|
125
|
+
<line style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x1="608.305" y1="139.864" x2="698.042" y2="194.698"/>
|
126
|
+
<polygon style="fill: #000000" points="610.913,135.597 594.653,131.521 605.698,144.13 "/>
|
127
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="610.913,135.597 594.653,131.521 605.698,144.13 "/>
|
128
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="645.921" y="172.849">update</text>
|
129
|
+
</g>
|
130
|
+
<g>
|
131
|
+
<line style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x1="451.169" y1="68.3993" x2="532.096" y2="101.022"/>
|
132
|
+
<polygon style="fill: #000000" points="453.039,63.7619 436.33,62.4173 449.3,73.0367 "/>
|
133
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="453.039,63.7619 436.33,62.4173 449.3,73.0367 "/>
|
134
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="483.749" y="91.5327">add_score</text>
|
135
|
+
</g>
|
136
|
+
<g>
|
137
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,207.821 500.552,175 569.252,175 569.252,131.007 "/>
|
138
|
+
<polygon style="fill: #000000" points="500.552,232.993 495.752,218.993 500.552,204.993 505.352,218.993 "/>
|
139
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,232.993 495.752,218.993 500.552,204.993 505.352,218.993 "/>
|
140
|
+
</g>
|
141
|
+
<g>
|
142
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="134.378,251 145.4,251 145.4,250.3 167.594,250.3 "/>
|
143
|
+
<polygon style="fill: #ffffff" points="109.206,251 123.206,246.2 137.206,251 123.206,255.8 "/>
|
144
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="109.206,251 123.206,246.2 137.206,251 123.206,255.8 "/>
|
145
|
+
</g>
|
146
|
+
<g>
|
147
|
+
<rect style="fill: #ffffff" x="316.002" y="209.75" width="84" height="28"/>
|
148
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="316.002" y="209.75" width="84" height="28"/>
|
149
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="358.002" y="228.75">Matcher</text>
|
150
|
+
</g>
|
151
|
+
<g>
|
152
|
+
<rect style="fill: #ffffff" x="240.002" y="124.75" width="147" height="28"/>
|
153
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="240.002" y="124.75" width="147" height="28"/>
|
154
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="313.502" y="143.75">MatchRecorder</text>
|
155
|
+
</g>
|
156
|
+
<g>
|
157
|
+
<line style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x1="329.236" y1="168.804" x2="350.153" y2="208.757"/>
|
158
|
+
<polygon style="fill: #000000" points="333.666,166.485 321.815,154.629 324.807,171.123 "/>
|
159
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="333.666,166.485 321.815,154.629 324.807,171.123 "/>
|
160
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="335.752" y="191.25">update</text>
|
161
|
+
</g>
|
162
|
+
<g>
|
163
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="408.779,248 377.002,248 377.002,237.75 358.002,237.75 "/>
|
164
|
+
<polygon style="fill: #000000" points="433.951,248 419.951,252.8 405.951,248 419.951,243.2 "/>
|
165
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="433.951,248 419.951,252.8 405.951,248 419.951,243.2 "/>
|
166
|
+
</g>
|
167
|
+
<g>
|
168
|
+
<line style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x1="251.712" y1="212.31" x2="300.909" y2="153.742"/>
|
169
|
+
<polygon style="fill: #000000" points="247.883,209.094 241.421,224.561 255.541,215.526 "/>
|
170
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="247.883,209.094 241.421,224.561 255.541,215.526 "/>
|
171
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="270.843" y="199.534">add_match</text>
|
172
|
+
</g>
|
173
|
+
<g>
|
174
|
+
<rect style="fill: #ffffff" x="595.002" y="347.75" width="87" height="28"/>
|
175
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="595.002" y="347.75" width="87" height="28"/>
|
176
|
+
<text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="638.502" y="366.75">Runner</text>
|
177
|
+
<rect style="fill: #ffffff" x="595.002" y="375.75" width="87" height="20"/>
|
178
|
+
<rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="595.002" y="375.75" width="87" height="20"/>
|
179
|
+
<text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="598.002" y="389.75">+execute()</text>
|
180
|
+
</g>
|
181
|
+
<g>
|
182
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="638.502,321.595 638.502,294.75 567.102,294.75 567.102,262 "/>
|
183
|
+
<polygon style="fill: #ffffff" points="638.502,346.766 633.702,332.766 638.502,318.766 643.302,332.766 "/>
|
184
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="638.502,346.766 633.702,332.766 638.502,318.766 643.302,332.766 "/>
|
185
|
+
</g>
|
186
|
+
<g>
|
187
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="434.002,208.828 434.002,186.375 387.002,186.375 387.002,152.75 "/>
|
188
|
+
<polygon style="fill: #000000" points="434.002,234 429.202,220 434.002,206 438.802,220 "/>
|
189
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="434.002,234 429.202,220 434.002,206 438.802,220 "/>
|
190
|
+
</g>
|
191
|
+
<g>
|
192
|
+
<polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="359.855,447 294.447,447 294.447,447 229.9,447 "/>
|
193
|
+
<polygon style="fill: #ffffff" points="385.027,447 371.027,451.8 357.027,447 371.027,442.2 "/>
|
194
|
+
<polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="385.027,447 371.027,451.8 357.027,447 371.027,442.2 "/>
|
195
|
+
<text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:monospace;font-style:normal;font-weight:normal" x="294.447" y="443"><<delegate>></text>
|
196
|
+
</g>
|
197
|
+
</svg>
|
data/test/helper.rb
CHANGED
@@ -38,13 +38,8 @@ class Test::Unit::TestCase
|
|
38
38
|
@@database_config
|
39
39
|
end
|
40
40
|
|
41
|
-
def
|
42
|
-
|
43
|
-
f.stubs({:static? => false}.merge(options))
|
44
|
-
if block
|
45
|
-
f.send(:instance_eval, &block)
|
46
|
-
end
|
47
|
-
f
|
41
|
+
def stub_dataset(options = {}, &block)
|
42
|
+
stub_instance(Linkage::Dataset, options, &block)
|
48
43
|
end
|
49
44
|
|
50
45
|
def stub_instance(klass, options = {}, &block)
|
@@ -89,10 +84,6 @@ class Test::Unit::TestCase
|
|
89
84
|
|
90
85
|
def new_result_set(&block)
|
91
86
|
klass = Class.new(Linkage::ResultSet)
|
92
|
-
klass.send(:define_method, :score_set) do
|
93
|
-
end
|
94
|
-
klass.send(:define_method, :match_set) do
|
95
|
-
end
|
96
87
|
if block_given?
|
97
88
|
klass.class_eval(&block)
|
98
89
|
end
|
@@ -41,14 +41,16 @@ class IntegrationTests::TestDatabaseResultSet < Test::Unit::TestCase
|
|
41
41
|
db[:scores].order(:id_1, :id_2).each do |row|
|
42
42
|
assert_equal row[:id_1], row[:id_2]
|
43
43
|
assert_equal 1, row[:comparator_id]
|
44
|
-
|
44
|
+
assert_kind_of Float, row[:score]
|
45
|
+
assert_equal 1.0, row[:score]
|
45
46
|
end
|
46
47
|
|
47
48
|
assert db.table_exists?(:matches)
|
48
49
|
assert_equal 10, db[:matches].count
|
49
50
|
db[:matches].order(:id_1, :id_2).each do |row|
|
50
51
|
assert_equal row[:id_1], row[:id_2]
|
51
|
-
|
52
|
+
assert_kind_of Float, row[:score]
|
53
|
+
assert_equal 1.0, row[:score]
|
52
54
|
end
|
53
55
|
end
|
54
56
|
end
|
@@ -36,6 +36,35 @@ class UnitTests::TestComparators::TestStrcompare < Test::Unit::TestCase
|
|
36
36
|
assert_equal 0.961, comp.score({:foo => 'martha'}, {:bar => 'marhta'})
|
37
37
|
assert_equal 0.840, comp.score({:foo => 'dwayne'}, {:bar => 'duane'})
|
38
38
|
assert_equal 0.813, comp.score({:foo => 'dixon'}, {:bar => 'dicksonx'})
|
39
|
+
assert_equal 0.783, comp.score({:foo => 'erin'}, {:bar => 'afrin'})
|
40
|
+
assert_equal 0.939, comp.score({:foo => 'KEICHENBERG'}, {:bar => 'REICHENBERG'})
|
41
|
+
assert_equal 0.823, comp.score({:foo => 'KEICHENBERG'}, {:bar => 'EEICHENBERG'})
|
42
|
+
assert_equal 0.775, comp.score({:foo => 'airplane'}, {:bar => 'plane'})
|
43
|
+
assert_equal 0, comp.score({:foo => 'cat'}, {:bar => 'dog'})
|
44
|
+
end
|
45
|
+
|
46
|
+
test "score for reverse-jarowinkler" do
|
47
|
+
field_1 = stub('field 1', :name => :foo, :ruby_type => { :type => String })
|
48
|
+
field_2 = stub('field 2', :name => :bar, :ruby_type => { :type => String })
|
49
|
+
comp = Strcompare.new(field_1, field_2, :reverse_jarowinkler)
|
50
|
+
assert_equal 0.950, comp.score({:foo => 'martha'}, {:bar => 'marhta'})
|
51
|
+
assert_equal 0.858, comp.score({:foo => 'dwayne'}, {:bar => 'duane'})
|
52
|
+
assert_equal 0.775, comp.score({:foo => 'dixon'}, {:bar => 'dicksonx'})
|
53
|
+
assert_equal 0.848, comp.score({:foo => 'erin'}, {:bar => 'afrin'})
|
54
|
+
assert_equal 0.964, comp.score({:foo => 'KEICHENBERG'}, {:bar => 'REICHENBERG'})
|
55
|
+
assert_equal 0.964, comp.score({:foo => 'KEICHENBERG'}, {:bar => 'EEICHENBERG'})
|
56
|
+
assert_equal 0.925, comp.score({:foo => 'airplane'}, {:bar => 'plane'})
|
57
|
+
assert_equal 0, comp.score({:foo => 'cat'}, {:bar => 'dog'})
|
58
|
+
end
|
59
|
+
|
60
|
+
test "score for damerau-levenshtein" do
|
61
|
+
field_1 = stub('field 1', :name => :foo, :ruby_type => { :type => String })
|
62
|
+
field_2 = stub('field 2', :name => :bar, :ruby_type => { :type => String })
|
63
|
+
comp = Strcompare.new(field_1, field_2, :damerau_levenshtein)
|
64
|
+
assert_equal 0.833, comp.score({:foo => 'martha'}, {:bar => 'marhta'})
|
65
|
+
assert_equal 0.750, comp.score({:foo => 'dwayne'}, {:bar => 'duane'})
|
66
|
+
assert_equal 0.688, comp.score({:foo => 'dixon'}, {:bar => 'dicksonx'})
|
67
|
+
assert_equal 0.889, comp.score({:foo => 'perfect'}, {:bar => 'perfect10'})
|
39
68
|
assert_equal 0, comp.score({:foo => 'cat'}, {:bar => 'dog'})
|
40
69
|
end
|
41
70
|
|