linkage 0.1.0.pre → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +2 -0
- data/Guardfile +0 -1
- data/TODO +2 -0
- data/lib/linkage.rb +1 -0
- data/lib/linkage/comparator.rb +12 -2
- data/lib/linkage/comparators/strcompare.rb +68 -16
- data/lib/linkage/configuration.rb +112 -8
- data/lib/linkage/dataset.rb +124 -9
- data/lib/linkage/exceptions.rb +5 -0
- data/lib/linkage/field.rb +55 -18
- data/lib/linkage/field_set.rb +20 -0
- data/lib/linkage/helpers.rb +7 -0
- data/lib/linkage/helpers/csv.rb +28 -0
- data/lib/linkage/helpers/database.rb +47 -0
- data/lib/linkage/import_buffer.rb +3 -3
- data/lib/linkage/match_recorder.rb +4 -0
- data/lib/linkage/match_set.rb +51 -13
- data/lib/linkage/match_sets/csv.rb +36 -9
- data/lib/linkage/match_sets/database.rb +43 -2
- data/lib/linkage/matcher.rb +49 -3
- data/lib/linkage/result_set.rb +60 -22
- data/lib/linkage/result_sets/csv.rb +46 -28
- data/lib/linkage/result_sets/database.rb +44 -26
- data/lib/linkage/runner.rb +10 -0
- data/lib/linkage/score_recorder.rb +5 -0
- data/lib/linkage/score_set.rb +78 -20
- data/lib/linkage/score_sets/csv.rb +41 -15
- data/lib/linkage/score_sets/database.rb +43 -5
- data/lib/linkage/version.rb +1 -1
- data/linkage.gemspec +2 -0
- data/misc/uml/linkage.dia +0 -0
- data/misc/uml/linkage.png +0 -0
- data/misc/uml/linkage.svg +197 -0
- data/test/helper.rb +2 -11
- data/test/integration/test_database_result_set.rb +4 -2
- data/test/unit/comparators/test_strcompare.rb +29 -0
- data/test/unit/match_sets/test_csv.rb +44 -13
- data/test/unit/match_sets/test_database.rb +42 -1
- data/test/unit/result_sets/test_csv.rb +9 -69
- data/test/unit/result_sets/test_database.rb +20 -11
- data/test/unit/score_sets/test_csv.rb +68 -25
- data/test/unit/score_sets/test_database.rb +57 -1
- data/test/unit/test_comparator.rb +8 -0
- data/test/unit/test_configuration.rb +33 -6
- data/test/unit/test_dataset.rb +0 -7
- data/test/unit/test_matcher.rb +52 -3
- data/test/unit/test_result_set.rb +8 -14
- metadata +66 -32
data/lib/linkage/matcher.rb
CHANGED
@@ -1,9 +1,29 @@
|
|
1
1
|
module Linkage
|
2
|
+
# {Matcher} is responsible for combining scores from a {ScoreSet} and deciding
|
3
|
+
# which pairs of records match. There are two parameters you can use to
|
4
|
+
# determine how {Matcher} does this: `algorithm` and `threshold`.
|
5
|
+
#
|
6
|
+
# There are currently two algorithm options: `:mean` and `:sum`. The mean
|
7
|
+
# algorithm will create a mean score for each pair of records. The sum
|
8
|
+
# algorithm will create a total score for each pair of records.
|
9
|
+
#
|
10
|
+
# The `threshold` parameter determines what is considered a match. If the
|
11
|
+
# result score for a pair of records (depending on the algorithm used) is
|
12
|
+
# greater than or equal to the threshold, then the pair is considered to be a
|
13
|
+
# match.
|
14
|
+
#
|
15
|
+
# Whenever {Matcher} finds a match, it uses the observer pattern to notify
|
16
|
+
# other objects that a match has been found. Usually the only observer is a
|
17
|
+
# {MatchRecorder}.
|
2
18
|
class Matcher
|
3
19
|
include Observable
|
4
20
|
|
5
21
|
attr_reader :comparators, :score_set, :algorithm, :threshold
|
6
22
|
|
23
|
+
# @param comparators [Array<Comparator>]
|
24
|
+
# @param score_set [ScoreSet]
|
25
|
+
# @param algorithm [Symbol] `:mean` or `:sum`
|
26
|
+
# @param threshold [Numeric]
|
7
27
|
def initialize(comparators, score_set, algorithm, threshold)
|
8
28
|
@comparators = comparators
|
9
29
|
@score_set = score_set
|
@@ -11,20 +31,46 @@ module Linkage
|
|
11
31
|
@threshold = threshold
|
12
32
|
end
|
13
33
|
|
34
|
+
# Find matches.
|
14
35
|
def run
|
15
36
|
send(@algorithm)
|
16
37
|
end
|
17
38
|
|
18
|
-
|
19
|
-
|
39
|
+
# Combine scores for each pair of records via mean, then compare the
|
40
|
+
# combined score to the threshold. Notify observers if there's a match.
|
20
41
|
def mean
|
42
|
+
w = @comparators.collect { |comparator| comparator.weight || 1 }
|
43
|
+
@score_set.open_for_reading
|
21
44
|
@score_set.each_pair do |id_1, id_2, scores|
|
22
|
-
|
45
|
+
sum = 0
|
46
|
+
scores.each do |key, value|
|
47
|
+
sum += value * w[key-1]
|
48
|
+
end
|
49
|
+
mean = sum / @comparators.length.to_f
|
23
50
|
if mean >= @threshold
|
24
51
|
changed
|
25
52
|
notify_observers(id_1, id_2, mean)
|
26
53
|
end
|
27
54
|
end
|
55
|
+
@score_set.close
|
56
|
+
end
|
57
|
+
|
58
|
+
# Combine scores for each pair of records via sum, then compare the
|
59
|
+
# combined score to the threshold. Notify observers if there's a match.
|
60
|
+
def sum
|
61
|
+
w = @comparators.collect { |comparator| comparator.weight || 1 }
|
62
|
+
@score_set.open_for_reading
|
63
|
+
@score_set.each_pair do |id_1, id_2, scores|
|
64
|
+
sum = 0
|
65
|
+
scores.each do |key, value|
|
66
|
+
sum += value * w[key-1]
|
67
|
+
end
|
68
|
+
if sum >= @threshold
|
69
|
+
changed
|
70
|
+
notify_observers(id_1, id_2, sum)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
@score_set.close
|
28
74
|
end
|
29
75
|
end
|
30
76
|
end
|
data/lib/linkage/result_set.rb
CHANGED
@@ -1,37 +1,75 @@
|
|
1
1
|
module Linkage
|
2
|
+
# A {ResultSet} is a convenience class for wrapping a {ScoreSet} and a
|
3
|
+
# {MatchSet}. Most of the time, you'll want to use the same storage format for
|
4
|
+
# both scores and matches. {ResultSet} provides a way to group both sets
|
5
|
+
# together.
|
6
|
+
#
|
7
|
+
# The default implementation of {ResultSet} merely returns whatever {ScoreSet}
|
8
|
+
# and {MatchSet} you pass to it during creation (see {#initialize}). However,
|
9
|
+
# {ResultSet} can be subclassed to provide easy initialization of sets of the
|
10
|
+
# same format. Currently there are two subclasses:
|
11
|
+
#
|
12
|
+
# * CSV ({ResultSets::CSV})
|
13
|
+
# * Database ({ResultSets::Database})
|
14
|
+
#
|
15
|
+
# If you want to implement a custom {ResultSet}, create a class that inherits
|
16
|
+
# {ResultSet} and defines both {#score_set} and {#match_set} to return a
|
17
|
+
# {ScoreSet} and {MatchSet} respectively. You can then register that class via
|
18
|
+
# {.register} to make it easier to use.
|
2
19
|
class ResultSet
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
20
|
+
class << self
|
21
|
+
# Register a new result set. Subclasses must define {#score_set} and
|
22
|
+
# {#match_set}. Otherwise, an `ArgumentError` will be raised when you try
|
23
|
+
# to call {.register}.
|
24
|
+
#
|
25
|
+
# @param [String] name Result set name used in {.klass_for}
|
26
|
+
# @param [Class] klass ResultSet subclass
|
27
|
+
def register(name, klass)
|
28
|
+
methods = klass.instance_methods
|
29
|
+
missing = []
|
30
|
+
unless methods.include?(:score_set)
|
31
|
+
missing.push("#score_set")
|
32
|
+
end
|
33
|
+
unless methods.include?(:match_set)
|
34
|
+
missing.push("#match_set")
|
35
|
+
end
|
36
|
+
unless missing.empty?
|
37
|
+
raise ArgumentError, "class must define #{missing.join(" and ")}"
|
38
|
+
end
|
39
|
+
|
40
|
+
@result_set ||= {}
|
41
|
+
@result_set[name] = klass
|
17
42
|
end
|
18
43
|
|
19
|
-
|
20
|
-
|
44
|
+
# Return a registered ResultSet subclass or `nil` if it doesn't exist.
|
45
|
+
#
|
46
|
+
# @param [String] name of registered result set
|
47
|
+
# @return [Class, nil]
|
48
|
+
def klass_for(name)
|
49
|
+
@result_set ? @result_set[name] : nil
|
50
|
+
end
|
51
|
+
alias :[] :klass_for
|
21
52
|
end
|
22
53
|
|
23
|
-
|
24
|
-
|
54
|
+
# @param [ScoreSet] score_set
|
55
|
+
# @param [MatchSet] match_set
|
56
|
+
def initialize(score_set, match_set)
|
57
|
+
@score_set = score_set
|
58
|
+
@match_set = match_set
|
25
59
|
end
|
26
60
|
|
27
|
-
#
|
61
|
+
# Returns a {ScoreSet}.
|
62
|
+
#
|
63
|
+
# @return [ScoreSet]
|
28
64
|
def score_set
|
29
|
-
|
65
|
+
@score_set
|
30
66
|
end
|
31
67
|
|
32
|
-
#
|
68
|
+
# Returns a {MatchSet}.
|
69
|
+
#
|
70
|
+
# @return [MatchSet]
|
33
71
|
def match_set
|
34
|
-
|
72
|
+
@match_set
|
35
73
|
end
|
36
74
|
end
|
37
75
|
end
|
@@ -1,8 +1,51 @@
|
|
1
1
|
module Linkage
|
2
2
|
module ResultSets
|
3
|
+
# {CSV ResultSets::CSV} is a subclass of {ResultSet ResultSet} that makes it
|
4
|
+
# convenient to set up a {ScoreSets::CSV} and {MatchSets::CSV} at the same
|
5
|
+
# time. For example:
|
6
|
+
#
|
7
|
+
# ```ruby
|
8
|
+
# result_set = Linkage::ResultSets::CSV.new('/some/path')
|
9
|
+
# ```
|
10
|
+
#
|
11
|
+
# Or by using {ResultSet.[] ResultSet.[]}:
|
12
|
+
#
|
13
|
+
# ```ruby
|
14
|
+
# result_set = Linkage::ResultSet['csv'].new('/some/path')
|
15
|
+
# ```
|
16
|
+
#
|
17
|
+
# {#initialize ResultSets::CSV.new} takes either a directory name as its
|
18
|
+
# argument or a Hash of options. Passing in a directory name is equivalent
|
19
|
+
# to passing in a Hash with the `:dir` key. For example:
|
20
|
+
#
|
21
|
+
# ```ruby
|
22
|
+
# result_set = Linkage::ResultSet['csv'].new('/some/path')
|
23
|
+
# ```
|
24
|
+
#
|
25
|
+
# is the same as:
|
26
|
+
#
|
27
|
+
# ```ruby
|
28
|
+
# result_set = Linkage::ResultSet['csv'].new({:dir => '/some/path'})
|
29
|
+
# ```
|
30
|
+
#
|
31
|
+
# The `:dir` option lets you specify the parent directory for the score set
|
32
|
+
# and result set files (which are `scores.csv` and `results.csv` by default).
|
33
|
+
#
|
34
|
+
# The only other relevant option is `:overwrite`, which controls whether or
|
35
|
+
# not overwriting existing files is permitted.
|
36
|
+
#
|
37
|
+
# @see ScoreSets::CSV
|
38
|
+
# @see MatchSets::CSV
|
3
39
|
class CSV < ResultSet
|
40
|
+
# @overload initialize(dir)
|
41
|
+
# @param [String] dir parent directory of CSV files
|
42
|
+
# @overload initialize(options)
|
43
|
+
# @param [Hash] options
|
44
|
+
# @option options [String] :dir parent directory of CSV files
|
45
|
+
# @option options [Boolean] :overwrite (false) whether or not to allow
|
46
|
+
# overwriting existing files
|
4
47
|
def initialize(dir_or_options = nil)
|
5
|
-
|
48
|
+
@options =
|
6
49
|
case dir_or_options
|
7
50
|
when nil
|
8
51
|
{}
|
@@ -13,39 +56,14 @@ module Linkage
|
|
13
56
|
else
|
14
57
|
raise ArgumentError, "expected nil, a String, or a Hash, got #{dir_or_options.class}"
|
15
58
|
end
|
16
|
-
|
17
|
-
if opts[:dir]
|
18
|
-
opts[:dir] = File.expand_path(opts[:dir])
|
19
|
-
FileUtils.mkdir_p(opts[:dir])
|
20
|
-
end
|
21
|
-
|
22
|
-
@score_set_args = extract_args_for(:scores, opts)
|
23
|
-
@match_set_args = extract_args_for(:matches, opts)
|
24
59
|
end
|
25
60
|
|
26
61
|
def score_set
|
27
|
-
@score_set ||= ScoreSet['csv'].new(
|
62
|
+
@score_set ||= ScoreSet['csv'].new(@options)
|
28
63
|
end
|
29
64
|
|
30
65
|
def match_set
|
31
|
-
@match_set ||= MatchSet['csv'].new(
|
32
|
-
end
|
33
|
-
|
34
|
-
private
|
35
|
-
|
36
|
-
def extract_args_for(name, opts)
|
37
|
-
dir = opts[:dir] || '.'
|
38
|
-
opts = opts[name]
|
39
|
-
|
40
|
-
filename =
|
41
|
-
case opts
|
42
|
-
when Hash, nil
|
43
|
-
opts = opts ? opts.dup : {}
|
44
|
-
opts.delete(:filename) || "#{name}.csv"
|
45
|
-
when String
|
46
|
-
opts
|
47
|
-
end
|
48
|
-
[File.join(dir, filename), opts]
|
66
|
+
@match_set ||= MatchSet['csv'].new(@options)
|
49
67
|
end
|
50
68
|
end
|
51
69
|
|
@@ -1,39 +1,57 @@
|
|
1
1
|
module Linkage
|
2
2
|
module ResultSets
|
3
|
+
# {Database ResultSets::Database} is the {ResultSet ResultSet} for writing
|
4
|
+
# to database tables. You can use it by either referencing it directly like
|
5
|
+
# so:
|
6
|
+
#
|
7
|
+
# ```ruby
|
8
|
+
# result_set = Linkage::ResultSets::Database.new(connection_options, options)
|
9
|
+
# ```
|
10
|
+
#
|
11
|
+
# Or by using {ResultSet.[] ResultSet.[]}:
|
12
|
+
#
|
13
|
+
# ```ruby
|
14
|
+
# result_set = Linkage::ResultSet['database'].new(connection_options, options)
|
15
|
+
# ```
|
16
|
+
#
|
17
|
+
# You can setup a database connection in a few different ways. By default, a
|
18
|
+
# SQLite database with the filename of `results.db` will be created in the
|
19
|
+
# current working directory. If you want something different, you can either
|
20
|
+
# specify a Sequel-style URI, provide connection options for
|
21
|
+
# `Sequel.connect`, or you can just specify a
|
22
|
+
# {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Database.html Sequel::Database}
|
23
|
+
# object to use.
|
24
|
+
#
|
25
|
+
# There are a couple of non-Sequel connection options:
|
26
|
+
# * `:filename` - specify filename to use for a SQLite database
|
27
|
+
# * `:dir` - specify the parent directory for a SQLite database
|
28
|
+
#
|
29
|
+
# This result set creates a {ScoreSets::Database database-backed score set}
|
30
|
+
# and a {Matchsets::Database database-backed match set} with their default
|
31
|
+
# table names (`scores` and `matches` respectively. If either table already
|
32
|
+
# exists, an {ExistsError} will be raised unless you set the `:overwrite`
|
33
|
+
# option to a truthy value in the second options hash.
|
34
|
+
#
|
35
|
+
# @see ScoreSets::Database
|
36
|
+
# @see MatchSets::Database
|
3
37
|
class Database < ResultSet
|
4
|
-
|
5
|
-
@database = nil
|
6
|
-
@options = {}
|
38
|
+
include Helpers::Database
|
7
39
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
when Hash
|
16
|
-
database_opts = {}
|
17
|
-
database_or_options.each_pair do |key, value|
|
18
|
-
if key == :scores || key == :matches
|
19
|
-
@options[key] = value
|
20
|
-
else
|
21
|
-
database_opts[key] = value
|
22
|
-
end
|
23
|
-
end
|
24
|
-
else
|
25
|
-
raise ArgumentError, "expected Sequel::Database, a String, or a Hash, got #{database_or_options.class}"
|
26
|
-
end
|
27
|
-
@database = Sequel.connect(database_opts)
|
28
|
-
end
|
40
|
+
DEFAULT_OPTIONS = {
|
41
|
+
:filename => 'results.db'
|
42
|
+
}
|
43
|
+
|
44
|
+
def initialize(connection_options = {}, options = {})
|
45
|
+
@database = database_connection(connection_options, DEFAULT_OPTIONS)
|
46
|
+
@options = options
|
29
47
|
end
|
30
48
|
|
31
49
|
def score_set
|
32
|
-
@score_set ||= ScoreSet['database'].new(@database, @options
|
50
|
+
@score_set ||= ScoreSet['database'].new(@database, @options)
|
33
51
|
end
|
34
52
|
|
35
53
|
def match_set
|
36
|
-
@match_set ||= MatchSet['database'].new(@database, @options
|
54
|
+
@match_set ||= MatchSet['database'].new(@database, @options)
|
37
55
|
end
|
38
56
|
end
|
39
57
|
|
data/lib/linkage/runner.rb
CHANGED
@@ -1,5 +1,15 @@
|
|
1
1
|
module Linkage
|
2
2
|
# Use this class to run a configuration created by {Dataset#link_with}.
|
3
|
+
#
|
4
|
+
# During a record linkage, one or more {Comparator}s generate scores. Each
|
5
|
+
# score is recorded by a {ScoreRecorder}, which uses a {ScoreSet} to actually
|
6
|
+
# save the score. After the scoring is complete, a {Matcher} combines the
|
7
|
+
# scores to create matches. Each match is recorded by a {MatchRecorder}, which
|
8
|
+
# uses a {MatchSet} to actually save the match information.
|
9
|
+
#
|
10
|
+
# So to save scores and matches, we need both a {ScoreSet} and a {MatchSet}.
|
11
|
+
# To make this easier, a {ResultSet} can be used to configure both {ScoreSet}s
|
12
|
+
# and {MatchSet}s.
|
3
13
|
class Runner
|
4
14
|
attr_reader :config
|
5
15
|
|
@@ -1,5 +1,10 @@
|
|
1
1
|
module Linkage
|
2
|
+
# {ScoreRecorder} is responsible for observing a set of {Comparator} for
|
3
|
+
# changes and saving matches to a {ScoreSet} via {ScoreSet#add_score}.
|
2
4
|
class ScoreRecorder
|
5
|
+
# @param comparators [Array<Comparator>]
|
6
|
+
# @param score_set [ScoreSet]
|
7
|
+
# @param primary_keys [Array<Symbol>]
|
3
8
|
def initialize(comparators, score_set, primary_keys)
|
4
9
|
@comparators = comparators
|
5
10
|
@score_set = score_set
|
data/lib/linkage/score_set.rb
CHANGED
@@ -1,45 +1,103 @@
|
|
1
1
|
module Linkage
|
2
|
+
# A {ScoreSet} is responsible for keeping track of scores. During the record
|
3
|
+
# linkage process, one or more {Comparator}s generate scores. These scores are
|
4
|
+
# handled by a {ScoreRecorder}, which uses a {ScoreSet} to actually save the
|
5
|
+
# scores. {ScoreSet} is also used to fetch the linkage scores so that a
|
6
|
+
# {Matcher} can create matches.
|
7
|
+
#
|
8
|
+
# {ScoreSet} is the superclass of implementations for different formats.
|
9
|
+
# Currently there are two formats for storing scores:
|
10
|
+
#
|
11
|
+
# * CSV ({ScoreSets::CSV})
|
12
|
+
# * Database ({ScoreSets::Database})
|
13
|
+
#
|
14
|
+
# See the documentation for score set you're interested in for more
|
15
|
+
# information.
|
16
|
+
#
|
17
|
+
# If you want to implement a custom {ScoreSet}, create a class that inherits
|
18
|
+
# {ScoreSet} and defines at least {#add_score} and {#each_pair}. You can then
|
19
|
+
# register that class via {.register}.
|
20
|
+
#
|
21
|
+
# @abstract
|
2
22
|
class ScoreSet
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
23
|
+
class << self
|
24
|
+
# Register a new score set. Subclasses must define at least {#add_score}
|
25
|
+
# and {#each_pair}. Otherwise, an `ArgumentError` will be raised when you
|
26
|
+
# try to call {.register}.
|
27
|
+
#
|
28
|
+
# @param [String] name Score set name used in {.klass_for}
|
29
|
+
# @param [Class] klass ScoreSet subclass
|
30
|
+
def register(name, klass)
|
31
|
+
methods = klass.instance_methods(false)
|
32
|
+
missing = []
|
33
|
+
unless methods.include?(:add_score)
|
34
|
+
missing.push("#add_score")
|
35
|
+
end
|
36
|
+
unless methods.include?(:each_pair)
|
37
|
+
missing.push("#each_pair")
|
38
|
+
end
|
39
|
+
unless missing.empty?
|
40
|
+
raise ArgumentError, "class must define #{missing.join(" and ")}"
|
41
|
+
end
|
18
42
|
|
19
|
-
|
20
|
-
|
21
|
-
|
43
|
+
@score_sets ||= {}
|
44
|
+
@score_sets[name] = klass
|
45
|
+
end
|
22
46
|
|
23
|
-
|
24
|
-
|
47
|
+
# Return a registered ScoreSet subclass or `nil` if it doesn't exist.
|
48
|
+
#
|
49
|
+
# @param [String] name of registered score set
|
50
|
+
# @return [Class, nil]
|
51
|
+
def klass_for(name)
|
52
|
+
@score_sets ? @score_sets[name] : nil
|
53
|
+
end
|
54
|
+
alias :[] :klass_for
|
25
55
|
end
|
26
56
|
|
57
|
+
# This is called by {Matcher#run}, before any scores are read via
|
58
|
+
# {#each_pair}. Subclasses can redefine this to perform any setup needed
|
59
|
+
# for reading scores.
|
27
60
|
def open_for_reading
|
28
61
|
end
|
29
62
|
|
63
|
+
# This is called by {ScoreRecorder#start}, before any scores are added via
|
64
|
+
# {#add_score}. Subclasses can redefine this to perform any setup needed
|
65
|
+
# for saving scores.
|
30
66
|
def open_for_writing
|
31
67
|
end
|
32
68
|
|
69
|
+
# Add a score to the ScoreSet. Subclasses must redefine this.
|
70
|
+
#
|
71
|
+
# @param comparator_id [Fixnum] 1-indexed comparator index
|
72
|
+
# @param id_1 [Object] record id from first dataset
|
73
|
+
# @param id_2 [Object] record id from second dataset
|
74
|
+
# @param value [Fixnum, Float] score value
|
33
75
|
# @abstract
|
34
76
|
def add_score(comparator_id, id_1, id_2, value)
|
35
77
|
raise NotImplementedError
|
36
78
|
end
|
37
79
|
|
80
|
+
# Yield scores for each pair of records. Subclasses must redefine this.
|
81
|
+
# This method is called by {Matcher#run} with a block with three
|
82
|
+
# parameters:
|
83
|
+
#
|
84
|
+
# ```ruby
|
85
|
+
# score_set.each_pair do |id_1, id_2, scores|
|
86
|
+
# end
|
87
|
+
# ```
|
88
|
+
#
|
89
|
+
# `scores` should be a Hash where comparator ids are keys and scores are
|
90
|
+
# values. For example: `{ 1 => 0.5, 2 => 0.75, 3 => 1 }`. Note that not all
|
91
|
+
# comparators (including {Comparators::Compare}) create scores for each
|
92
|
+
# pair. A missing score means that pair was given a score of 0.
|
93
|
+
#
|
38
94
|
# @abstract
|
39
95
|
def each_pair(&block)
|
40
96
|
raise NotImplementedError
|
41
97
|
end
|
42
98
|
|
99
|
+
# This is called by {ScoreRecorder#stop}, after all scores have been added.
|
100
|
+
# Subclasses can redefine this to perform any teardown needed.
|
43
101
|
def close
|
44
102
|
end
|
45
103
|
end
|