linkage 0.1.0.pre → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +2 -0
- data/Guardfile +0 -1
- data/TODO +2 -0
- data/lib/linkage.rb +1 -0
- data/lib/linkage/comparator.rb +12 -2
- data/lib/linkage/comparators/strcompare.rb +68 -16
- data/lib/linkage/configuration.rb +112 -8
- data/lib/linkage/dataset.rb +124 -9
- data/lib/linkage/exceptions.rb +5 -0
- data/lib/linkage/field.rb +55 -18
- data/lib/linkage/field_set.rb +20 -0
- data/lib/linkage/helpers.rb +7 -0
- data/lib/linkage/helpers/csv.rb +28 -0
- data/lib/linkage/helpers/database.rb +47 -0
- data/lib/linkage/import_buffer.rb +3 -3
- data/lib/linkage/match_recorder.rb +4 -0
- data/lib/linkage/match_set.rb +51 -13
- data/lib/linkage/match_sets/csv.rb +36 -9
- data/lib/linkage/match_sets/database.rb +43 -2
- data/lib/linkage/matcher.rb +49 -3
- data/lib/linkage/result_set.rb +60 -22
- data/lib/linkage/result_sets/csv.rb +46 -28
- data/lib/linkage/result_sets/database.rb +44 -26
- data/lib/linkage/runner.rb +10 -0
- data/lib/linkage/score_recorder.rb +5 -0
- data/lib/linkage/score_set.rb +78 -20
- data/lib/linkage/score_sets/csv.rb +41 -15
- data/lib/linkage/score_sets/database.rb +43 -5
- data/lib/linkage/version.rb +1 -1
- data/linkage.gemspec +2 -0
- data/misc/uml/linkage.dia +0 -0
- data/misc/uml/linkage.png +0 -0
- data/misc/uml/linkage.svg +197 -0
- data/test/helper.rb +2 -11
- data/test/integration/test_database_result_set.rb +4 -2
- data/test/unit/comparators/test_strcompare.rb +29 -0
- data/test/unit/match_sets/test_csv.rb +44 -13
- data/test/unit/match_sets/test_database.rb +42 -1
- data/test/unit/result_sets/test_csv.rb +9 -69
- data/test/unit/result_sets/test_database.rb +20 -11
- data/test/unit/score_sets/test_csv.rb +68 -25
- data/test/unit/score_sets/test_database.rb +57 -1
- data/test/unit/test_comparator.rb +8 -0
- data/test/unit/test_configuration.rb +33 -6
- data/test/unit/test_dataset.rb +0 -7
- data/test/unit/test_matcher.rb +52 -3
- data/test/unit/test_result_set.rb +8 -14
- metadata +66 -32
data/lib/linkage/matcher.rb
CHANGED
@@ -1,9 +1,29 @@
|
|
1
1
|
module Linkage
|
2
|
+
# {Matcher} is responsible for combining scores from a {ScoreSet} and deciding
|
3
|
+
# which pairs of records match. There are two parameters you can use to
|
4
|
+
# determine how {Matcher} does this: `algorithm` and `threshold`.
|
5
|
+
#
|
6
|
+
# There are currently two algorithm options: `:mean` and `:sum`. The mean
|
7
|
+
# algorithm will create a mean score for each pair of records. The sum
|
8
|
+
# algorithm will create a total score for each pair of records.
|
9
|
+
#
|
10
|
+
# The `threshold` parameter determines what is considered a match. If the
|
11
|
+
# result score for a pair of records (depending on the algorithm used) is
|
12
|
+
# greater than or equal to the threshold, then the pair is considered to be a
|
13
|
+
# match.
|
14
|
+
#
|
15
|
+
# Whenever {Matcher} finds a match, it uses the observer pattern to notify
|
16
|
+
# other objects that a match has been found. Usually the only observer is a
|
17
|
+
# {MatchRecorder}.
|
2
18
|
class Matcher
|
3
19
|
include Observable
|
4
20
|
|
5
21
|
attr_reader :comparators, :score_set, :algorithm, :threshold
|
6
22
|
|
23
|
+
# @param comparators [Array<Comparator>]
|
24
|
+
# @param score_set [ScoreSet]
|
25
|
+
# @param algorithm [Symbol] `:mean` or `:sum`
|
26
|
+
# @param threshold [Numeric]
|
7
27
|
def initialize(comparators, score_set, algorithm, threshold)
|
8
28
|
@comparators = comparators
|
9
29
|
@score_set = score_set
|
@@ -11,20 +31,46 @@ module Linkage
|
|
11
31
|
@threshold = threshold
|
12
32
|
end
|
13
33
|
|
34
|
+
# Find matches.
|
14
35
|
def run
|
15
36
|
send(@algorithm)
|
16
37
|
end
|
17
38
|
|
18
|
-
|
19
|
-
|
39
|
+
# Combine scores for each pair of records via mean, then compare the
|
40
|
+
# combined score to the threshold. Notify observers if there's a match.
|
20
41
|
def mean
|
42
|
+
w = @comparators.collect { |comparator| comparator.weight || 1 }
|
43
|
+
@score_set.open_for_reading
|
21
44
|
@score_set.each_pair do |id_1, id_2, scores|
|
22
|
-
|
45
|
+
sum = 0
|
46
|
+
scores.each do |key, value|
|
47
|
+
sum += value * w[key-1]
|
48
|
+
end
|
49
|
+
mean = sum / @comparators.length.to_f
|
23
50
|
if mean >= @threshold
|
24
51
|
changed
|
25
52
|
notify_observers(id_1, id_2, mean)
|
26
53
|
end
|
27
54
|
end
|
55
|
+
@score_set.close
|
56
|
+
end
|
57
|
+
|
58
|
+
# Combine scores for each pair of records via sum, then compare the
|
59
|
+
# combined score to the threshold. Notify observers if there's a match.
|
60
|
+
def sum
|
61
|
+
w = @comparators.collect { |comparator| comparator.weight || 1 }
|
62
|
+
@score_set.open_for_reading
|
63
|
+
@score_set.each_pair do |id_1, id_2, scores|
|
64
|
+
sum = 0
|
65
|
+
scores.each do |key, value|
|
66
|
+
sum += value * w[key-1]
|
67
|
+
end
|
68
|
+
if sum >= @threshold
|
69
|
+
changed
|
70
|
+
notify_observers(id_1, id_2, sum)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
@score_set.close
|
28
74
|
end
|
29
75
|
end
|
30
76
|
end
|
data/lib/linkage/result_set.rb
CHANGED
@@ -1,37 +1,75 @@
|
|
1
1
|
module Linkage
|
2
|
+
# A {ResultSet} is a convenience class for wrapping a {ScoreSet} and a
|
3
|
+
# {MatchSet}. Most of the time, you'll want to use the same storage format for
|
4
|
+
# both scores and matches. {ResultSet} provides a way to group both sets
|
5
|
+
# together.
|
6
|
+
#
|
7
|
+
# The default implementation of {ResultSet} merely returns whatever {ScoreSet}
|
8
|
+
# and {MatchSet} you pass to it during creation (see {#initialize}). However,
|
9
|
+
# {ResultSet} can be subclassed to provide easy initialization of sets of the
|
10
|
+
# same format. Currently there are two subclasses:
|
11
|
+
#
|
12
|
+
# * CSV ({ResultSets::CSV})
|
13
|
+
# * Database ({ResultSets::Database})
|
14
|
+
#
|
15
|
+
# If you want to implement a custom {ResultSet}, create a class that inherits
|
16
|
+
# {ResultSet} and defines both {#score_set} and {#match_set} to return a
|
17
|
+
# {ScoreSet} and {MatchSet} respectively. You can then register that class via
|
18
|
+
# {.register} to make it easier to use.
|
2
19
|
class ResultSet
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
20
|
+
class << self
|
21
|
+
# Register a new result set. Subclasses must define {#score_set} and
|
22
|
+
# {#match_set}. Otherwise, an `ArgumentError` will be raised when you try
|
23
|
+
# to call {.register}.
|
24
|
+
#
|
25
|
+
# @param [String] name Result set name used in {.klass_for}
|
26
|
+
# @param [Class] klass ResultSet subclass
|
27
|
+
def register(name, klass)
|
28
|
+
methods = klass.instance_methods
|
29
|
+
missing = []
|
30
|
+
unless methods.include?(:score_set)
|
31
|
+
missing.push("#score_set")
|
32
|
+
end
|
33
|
+
unless methods.include?(:match_set)
|
34
|
+
missing.push("#match_set")
|
35
|
+
end
|
36
|
+
unless missing.empty?
|
37
|
+
raise ArgumentError, "class must define #{missing.join(" and ")}"
|
38
|
+
end
|
39
|
+
|
40
|
+
@result_set ||= {}
|
41
|
+
@result_set[name] = klass
|
17
42
|
end
|
18
43
|
|
19
|
-
|
20
|
-
|
44
|
+
# Return a registered ResultSet subclass or `nil` if it doesn't exist.
|
45
|
+
#
|
46
|
+
# @param [String] name of registered result set
|
47
|
+
# @return [Class, nil]
|
48
|
+
def klass_for(name)
|
49
|
+
@result_set ? @result_set[name] : nil
|
50
|
+
end
|
51
|
+
alias :[] :klass_for
|
21
52
|
end
|
22
53
|
|
23
|
-
|
24
|
-
|
54
|
+
# @param [ScoreSet] score_set
|
55
|
+
# @param [MatchSet] match_set
|
56
|
+
def initialize(score_set, match_set)
|
57
|
+
@score_set = score_set
|
58
|
+
@match_set = match_set
|
25
59
|
end
|
26
60
|
|
27
|
-
#
|
61
|
+
# Returns a {ScoreSet}.
|
62
|
+
#
|
63
|
+
# @return [ScoreSet]
|
28
64
|
def score_set
|
29
|
-
|
65
|
+
@score_set
|
30
66
|
end
|
31
67
|
|
32
|
-
#
|
68
|
+
# Returns a {MatchSet}.
|
69
|
+
#
|
70
|
+
# @return [MatchSet]
|
33
71
|
def match_set
|
34
|
-
|
72
|
+
@match_set
|
35
73
|
end
|
36
74
|
end
|
37
75
|
end
|
@@ -1,8 +1,51 @@
|
|
1
1
|
module Linkage
|
2
2
|
module ResultSets
|
3
|
+
# {CSV ResultSets::CSV} is a subclass of {ResultSet ResultSet} that makes it
|
4
|
+
# convenient to set up a {ScoreSets::CSV} and {MatchSets::CSV} at the same
|
5
|
+
# time. For example:
|
6
|
+
#
|
7
|
+
# ```ruby
|
8
|
+
# result_set = Linkage::ResultSets::CSV.new('/some/path')
|
9
|
+
# ```
|
10
|
+
#
|
11
|
+
# Or by using {ResultSet.[] ResultSet.[]}:
|
12
|
+
#
|
13
|
+
# ```ruby
|
14
|
+
# result_set = Linkage::ResultSet['csv'].new('/some/path')
|
15
|
+
# ```
|
16
|
+
#
|
17
|
+
# {#initialize ResultSets::CSV.new} takes either a directory name as its
|
18
|
+
# argument or a Hash of options. Passing in a directory name is equivalent
|
19
|
+
# to passing in a Hash with the `:dir` key. For example:
|
20
|
+
#
|
21
|
+
# ```ruby
|
22
|
+
# result_set = Linkage::ResultSet['csv'].new('/some/path')
|
23
|
+
# ```
|
24
|
+
#
|
25
|
+
# is the same as:
|
26
|
+
#
|
27
|
+
# ```ruby
|
28
|
+
# result_set = Linkage::ResultSet['csv'].new({:dir => '/some/path'})
|
29
|
+
# ```
|
30
|
+
#
|
31
|
+
# The `:dir` option lets you specify the parent directory for the score set
|
32
|
+
# and result set files (which are `scores.csv` and `results.csv` by default).
|
33
|
+
#
|
34
|
+
# The only other relevant option is `:overwrite`, which controls whether or
|
35
|
+
# not overwriting existing files is permitted.
|
36
|
+
#
|
37
|
+
# @see ScoreSets::CSV
|
38
|
+
# @see MatchSets::CSV
|
3
39
|
class CSV < ResultSet
|
40
|
+
# @overload initialize(dir)
|
41
|
+
# @param [String] dir parent directory of CSV files
|
42
|
+
# @overload initialize(options)
|
43
|
+
# @param [Hash] options
|
44
|
+
# @option options [String] :dir parent directory of CSV files
|
45
|
+
# @option options [Boolean] :overwrite (false) whether or not to allow
|
46
|
+
# overwriting existing files
|
4
47
|
def initialize(dir_or_options = nil)
|
5
|
-
|
48
|
+
@options =
|
6
49
|
case dir_or_options
|
7
50
|
when nil
|
8
51
|
{}
|
@@ -13,39 +56,14 @@ module Linkage
|
|
13
56
|
else
|
14
57
|
raise ArgumentError, "expected nil, a String, or a Hash, got #{dir_or_options.class}"
|
15
58
|
end
|
16
|
-
|
17
|
-
if opts[:dir]
|
18
|
-
opts[:dir] = File.expand_path(opts[:dir])
|
19
|
-
FileUtils.mkdir_p(opts[:dir])
|
20
|
-
end
|
21
|
-
|
22
|
-
@score_set_args = extract_args_for(:scores, opts)
|
23
|
-
@match_set_args = extract_args_for(:matches, opts)
|
24
59
|
end
|
25
60
|
|
26
61
|
def score_set
|
27
|
-
@score_set ||= ScoreSet['csv'].new(
|
62
|
+
@score_set ||= ScoreSet['csv'].new(@options)
|
28
63
|
end
|
29
64
|
|
30
65
|
def match_set
|
31
|
-
@match_set ||= MatchSet['csv'].new(
|
32
|
-
end
|
33
|
-
|
34
|
-
private
|
35
|
-
|
36
|
-
def extract_args_for(name, opts)
|
37
|
-
dir = opts[:dir] || '.'
|
38
|
-
opts = opts[name]
|
39
|
-
|
40
|
-
filename =
|
41
|
-
case opts
|
42
|
-
when Hash, nil
|
43
|
-
opts = opts ? opts.dup : {}
|
44
|
-
opts.delete(:filename) || "#{name}.csv"
|
45
|
-
when String
|
46
|
-
opts
|
47
|
-
end
|
48
|
-
[File.join(dir, filename), opts]
|
66
|
+
@match_set ||= MatchSet['csv'].new(@options)
|
49
67
|
end
|
50
68
|
end
|
51
69
|
|
@@ -1,39 +1,57 @@
|
|
1
1
|
module Linkage
|
2
2
|
module ResultSets
|
3
|
+
# {Database ResultSets::Database} is the {ResultSet ResultSet} for writing
|
4
|
+
# to database tables. You can use it by either referencing it directly like
|
5
|
+
# so:
|
6
|
+
#
|
7
|
+
# ```ruby
|
8
|
+
# result_set = Linkage::ResultSets::Database.new(connection_options, options)
|
9
|
+
# ```
|
10
|
+
#
|
11
|
+
# Or by using {ResultSet.[] ResultSet.[]}:
|
12
|
+
#
|
13
|
+
# ```ruby
|
14
|
+
# result_set = Linkage::ResultSet['database'].new(connection_options, options)
|
15
|
+
# ```
|
16
|
+
#
|
17
|
+
# You can setup a database connection in a few different ways. By default, a
|
18
|
+
# SQLite database with the filename of `results.db` will be created in the
|
19
|
+
# current working directory. If you want something different, you can either
|
20
|
+
# specify a Sequel-style URI, provide connection options for
|
21
|
+
# `Sequel.connect`, or you can just specify a
|
22
|
+
# {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Database.html Sequel::Database}
|
23
|
+
# object to use.
|
24
|
+
#
|
25
|
+
# There are a couple of non-Sequel connection options:
|
26
|
+
# * `:filename` - specify filename to use for a SQLite database
|
27
|
+
# * `:dir` - specify the parent directory for a SQLite database
|
28
|
+
#
|
29
|
+
# This result set creates a {ScoreSets::Database database-backed score set}
|
30
|
+
# and a {Matchsets::Database database-backed match set} with their default
|
31
|
+
# table names (`scores` and `matches` respectively. If either table already
|
32
|
+
# exists, an {ExistsError} will be raised unless you set the `:overwrite`
|
33
|
+
# option to a truthy value in the second options hash.
|
34
|
+
#
|
35
|
+
# @see ScoreSets::Database
|
36
|
+
# @see MatchSets::Database
|
3
37
|
class Database < ResultSet
|
4
|
-
|
5
|
-
@database = nil
|
6
|
-
@options = {}
|
38
|
+
include Helpers::Database
|
7
39
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
when Hash
|
16
|
-
database_opts = {}
|
17
|
-
database_or_options.each_pair do |key, value|
|
18
|
-
if key == :scores || key == :matches
|
19
|
-
@options[key] = value
|
20
|
-
else
|
21
|
-
database_opts[key] = value
|
22
|
-
end
|
23
|
-
end
|
24
|
-
else
|
25
|
-
raise ArgumentError, "expected Sequel::Database, a String, or a Hash, got #{database_or_options.class}"
|
26
|
-
end
|
27
|
-
@database = Sequel.connect(database_opts)
|
28
|
-
end
|
40
|
+
DEFAULT_OPTIONS = {
|
41
|
+
:filename => 'results.db'
|
42
|
+
}
|
43
|
+
|
44
|
+
def initialize(connection_options = {}, options = {})
|
45
|
+
@database = database_connection(connection_options, DEFAULT_OPTIONS)
|
46
|
+
@options = options
|
29
47
|
end
|
30
48
|
|
31
49
|
def score_set
|
32
|
-
@score_set ||= ScoreSet['database'].new(@database, @options
|
50
|
+
@score_set ||= ScoreSet['database'].new(@database, @options)
|
33
51
|
end
|
34
52
|
|
35
53
|
def match_set
|
36
|
-
@match_set ||= MatchSet['database'].new(@database, @options
|
54
|
+
@match_set ||= MatchSet['database'].new(@database, @options)
|
37
55
|
end
|
38
56
|
end
|
39
57
|
|
data/lib/linkage/runner.rb
CHANGED
@@ -1,5 +1,15 @@
|
|
1
1
|
module Linkage
|
2
2
|
# Use this class to run a configuration created by {Dataset#link_with}.
|
3
|
+
#
|
4
|
+
# During a record linkage, one or more {Comparator}s generate scores. Each
|
5
|
+
# score is recorded by a {ScoreRecorder}, which uses a {ScoreSet} to actually
|
6
|
+
# save the score. After the scoring is complete, a {Matcher} combines the
|
7
|
+
# scores to create matches. Each match is recorded by a {MatchRecorder}, which
|
8
|
+
# uses a {MatchSet} to actually save the match information.
|
9
|
+
#
|
10
|
+
# So to save scores and matches, we need both a {ScoreSet} and a {MatchSet}.
|
11
|
+
# To make this easier, a {ResultSet} can be used to configure both {ScoreSet}s
|
12
|
+
# and {MatchSet}s.
|
3
13
|
class Runner
|
4
14
|
attr_reader :config
|
5
15
|
|
@@ -1,5 +1,10 @@
|
|
1
1
|
module Linkage
|
2
|
+
# {ScoreRecorder} is responsible for observing a set of {Comparator} for
|
3
|
+
# changes and saving matches to a {ScoreSet} via {ScoreSet#add_score}.
|
2
4
|
class ScoreRecorder
|
5
|
+
# @param comparators [Array<Comparator>]
|
6
|
+
# @param score_set [ScoreSet]
|
7
|
+
# @param primary_keys [Array<Symbol>]
|
3
8
|
def initialize(comparators, score_set, primary_keys)
|
4
9
|
@comparators = comparators
|
5
10
|
@score_set = score_set
|
data/lib/linkage/score_set.rb
CHANGED
@@ -1,45 +1,103 @@
|
|
1
1
|
module Linkage
|
2
|
+
# A {ScoreSet} is responsible for keeping track of scores. During the record
|
3
|
+
# linkage process, one or more {Comparator}s generate scores. These scores are
|
4
|
+
# handled by a {ScoreRecorder}, which uses a {ScoreSet} to actually save the
|
5
|
+
# scores. {ScoreSet} is also used to fetch the linkage scores so that a
|
6
|
+
# {Matcher} can create matches.
|
7
|
+
#
|
8
|
+
# {ScoreSet} is the superclass of implementations for different formats.
|
9
|
+
# Currently there are two formats for storing scores:
|
10
|
+
#
|
11
|
+
# * CSV ({ScoreSets::CSV})
|
12
|
+
# * Database ({ScoreSets::Database})
|
13
|
+
#
|
14
|
+
# See the documentation for score set you're interested in for more
|
15
|
+
# information.
|
16
|
+
#
|
17
|
+
# If you want to implement a custom {ScoreSet}, create a class that inherits
|
18
|
+
# {ScoreSet} and defines at least {#add_score} and {#each_pair}. You can then
|
19
|
+
# register that class via {.register}.
|
20
|
+
#
|
21
|
+
# @abstract
|
2
22
|
class ScoreSet
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
23
|
+
class << self
|
24
|
+
# Register a new score set. Subclasses must define at least {#add_score}
|
25
|
+
# and {#each_pair}. Otherwise, an `ArgumentError` will be raised when you
|
26
|
+
# try to call {.register}.
|
27
|
+
#
|
28
|
+
# @param [String] name Score set name used in {.klass_for}
|
29
|
+
# @param [Class] klass ScoreSet subclass
|
30
|
+
def register(name, klass)
|
31
|
+
methods = klass.instance_methods(false)
|
32
|
+
missing = []
|
33
|
+
unless methods.include?(:add_score)
|
34
|
+
missing.push("#add_score")
|
35
|
+
end
|
36
|
+
unless methods.include?(:each_pair)
|
37
|
+
missing.push("#each_pair")
|
38
|
+
end
|
39
|
+
unless missing.empty?
|
40
|
+
raise ArgumentError, "class must define #{missing.join(" and ")}"
|
41
|
+
end
|
18
42
|
|
19
|
-
|
20
|
-
|
21
|
-
|
43
|
+
@score_sets ||= {}
|
44
|
+
@score_sets[name] = klass
|
45
|
+
end
|
22
46
|
|
23
|
-
|
24
|
-
|
47
|
+
# Return a registered ScoreSet subclass or `nil` if it doesn't exist.
|
48
|
+
#
|
49
|
+
# @param [String] name of registered score set
|
50
|
+
# @return [Class, nil]
|
51
|
+
def klass_for(name)
|
52
|
+
@score_sets ? @score_sets[name] : nil
|
53
|
+
end
|
54
|
+
alias :[] :klass_for
|
25
55
|
end
|
26
56
|
|
57
|
+
# This is called by {Matcher#run}, before any scores are read via
|
58
|
+
# {#each_pair}. Subclasses can redefine this to perform any setup needed
|
59
|
+
# for reading scores.
|
27
60
|
def open_for_reading
|
28
61
|
end
|
29
62
|
|
63
|
+
# This is called by {ScoreRecorder#start}, before any scores are added via
|
64
|
+
# {#add_score}. Subclasses can redefine this to perform any setup needed
|
65
|
+
# for saving scores.
|
30
66
|
def open_for_writing
|
31
67
|
end
|
32
68
|
|
69
|
+
# Add a score to the ScoreSet. Subclasses must redefine this.
|
70
|
+
#
|
71
|
+
# @param comparator_id [Fixnum] 1-indexed comparator index
|
72
|
+
# @param id_1 [Object] record id from first dataset
|
73
|
+
# @param id_2 [Object] record id from second dataset
|
74
|
+
# @param value [Fixnum, Float] score value
|
33
75
|
# @abstract
|
34
76
|
def add_score(comparator_id, id_1, id_2, value)
|
35
77
|
raise NotImplementedError
|
36
78
|
end
|
37
79
|
|
80
|
+
# Yield scores for each pair of records. Subclasses must redefine this.
|
81
|
+
# This method is called by {Matcher#run} with a block with three
|
82
|
+
# parameters:
|
83
|
+
#
|
84
|
+
# ```ruby
|
85
|
+
# score_set.each_pair do |id_1, id_2, scores|
|
86
|
+
# end
|
87
|
+
# ```
|
88
|
+
#
|
89
|
+
# `scores` should be a Hash where comparator ids are keys and scores are
|
90
|
+
# values. For example: `{ 1 => 0.5, 2 => 0.75, 3 => 1 }`. Note that not all
|
91
|
+
# comparators (including {Comparators::Compare}) create scores for each
|
92
|
+
# pair. A missing score means that pair was given a score of 0.
|
93
|
+
#
|
38
94
|
# @abstract
|
39
95
|
def each_pair(&block)
|
40
96
|
raise NotImplementedError
|
41
97
|
end
|
42
98
|
|
99
|
+
# This is called by {ScoreRecorder#stop}, after all scores have been added.
|
100
|
+
# Subclasses can redefine this to perform any teardown needed.
|
43
101
|
def close
|
44
102
|
end
|
45
103
|
end
|