linkage 0.1.0.pre → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +2 -0
  3. data/Guardfile +0 -1
  4. data/TODO +2 -0
  5. data/lib/linkage.rb +1 -0
  6. data/lib/linkage/comparator.rb +12 -2
  7. data/lib/linkage/comparators/strcompare.rb +68 -16
  8. data/lib/linkage/configuration.rb +112 -8
  9. data/lib/linkage/dataset.rb +124 -9
  10. data/lib/linkage/exceptions.rb +5 -0
  11. data/lib/linkage/field.rb +55 -18
  12. data/lib/linkage/field_set.rb +20 -0
  13. data/lib/linkage/helpers.rb +7 -0
  14. data/lib/linkage/helpers/csv.rb +28 -0
  15. data/lib/linkage/helpers/database.rb +47 -0
  16. data/lib/linkage/import_buffer.rb +3 -3
  17. data/lib/linkage/match_recorder.rb +4 -0
  18. data/lib/linkage/match_set.rb +51 -13
  19. data/lib/linkage/match_sets/csv.rb +36 -9
  20. data/lib/linkage/match_sets/database.rb +43 -2
  21. data/lib/linkage/matcher.rb +49 -3
  22. data/lib/linkage/result_set.rb +60 -22
  23. data/lib/linkage/result_sets/csv.rb +46 -28
  24. data/lib/linkage/result_sets/database.rb +44 -26
  25. data/lib/linkage/runner.rb +10 -0
  26. data/lib/linkage/score_recorder.rb +5 -0
  27. data/lib/linkage/score_set.rb +78 -20
  28. data/lib/linkage/score_sets/csv.rb +41 -15
  29. data/lib/linkage/score_sets/database.rb +43 -5
  30. data/lib/linkage/version.rb +1 -1
  31. data/linkage.gemspec +2 -0
  32. data/misc/uml/linkage.dia +0 -0
  33. data/misc/uml/linkage.png +0 -0
  34. data/misc/uml/linkage.svg +197 -0
  35. data/test/helper.rb +2 -11
  36. data/test/integration/test_database_result_set.rb +4 -2
  37. data/test/unit/comparators/test_strcompare.rb +29 -0
  38. data/test/unit/match_sets/test_csv.rb +44 -13
  39. data/test/unit/match_sets/test_database.rb +42 -1
  40. data/test/unit/result_sets/test_csv.rb +9 -69
  41. data/test/unit/result_sets/test_database.rb +20 -11
  42. data/test/unit/score_sets/test_csv.rb +68 -25
  43. data/test/unit/score_sets/test_database.rb +57 -1
  44. data/test/unit/test_comparator.rb +8 -0
  45. data/test/unit/test_configuration.rb +33 -6
  46. data/test/unit/test_dataset.rb +0 -7
  47. data/test/unit/test_matcher.rb +52 -3
  48. data/test/unit/test_result_set.rb +8 -14
  49. metadata +66 -32
@@ -2,20 +2,52 @@ require 'csv'
2
2
 
3
3
  module Linkage
4
4
  module ScoreSets
5
+ # {CSV ScoreSets::CSV} is an implementation of {ScoreSet} for saving scores
6
+ # in a CSV file.
7
+ #
8
+ # There are three options available:
9
+ #
10
+ # * `:filename` - which file to store scores in; can be an absolute path
11
+ # or relative path
12
+ # * `:dir` - which directory to put the file in; used if `:filename` is a
13
+ # relative path
14
+ # * `:overwrite` - indicate whether or not to overwrite an existing file
15
+ #
16
+ # By default, `:filename` is `'scores.csv'`, and the other options are
17
+ # blank. This means that it will write scores to the `'scores.csv'` file in
18
+ # the current working directory and will raise an error if the file already
19
+ # exists.
20
+ #
21
+ # If you specify `:dir`, that path will be created if it doesn't exist yet.
22
+ #
23
+ # The resulting file looks like this:
24
+ #
25
+ # comparator_id,id_1,id_2,score
26
+ # 1,123,456,1
27
+ # 1,124,457,0.5
28
+ # 2,123,456,0
29
+ #
30
+ # @see Helpers::CSV
5
31
  class CSV < ScoreSet
6
- def initialize(filename, options = {})
7
- @filename = filename
8
- @overwrite = options[:overwrite]
32
+ include Linkage::Helpers::CSV
33
+
34
+ DEFAULT_OPTIONS = {
35
+ :filename => 'scores.csv'
36
+ }
37
+
38
+ # @param [Hash] options
39
+ # @option options [String] :filename
40
+ # @option options [String] :dir
41
+ # @option options [Boolean] :overwrite
42
+ def initialize(options = {})
43
+ @options = DEFAULT_OPTIONS.merge(options.reject { |k, v| v.nil? })
9
44
  end
10
45
 
11
46
  def open_for_reading
12
47
  raise "already open for writing, try closing first" if @mode == :write
13
48
  return if @mode == :read
14
49
 
15
- if !File.exist?(@filename)
16
- raise MissingError, "#{@filename} does not exist"
17
- end
18
- @csv = ::CSV.open(@filename, 'rb', :headers => true)
50
+ @csv = open_csv_for_reading(@options)
19
51
  @mode = :read
20
52
  end
21
53
 
@@ -23,11 +55,7 @@ module Linkage
23
55
  raise "already open for reading, try closing first" if @mode == :read
24
56
  return if @mode == :write
25
57
 
26
- if !@overwrite && File.exist?(@filename)
27
- raise ExistsError, "#{@filename} exists and not in overwrite mode"
28
- end
29
-
30
- @csv = ::CSV.open(@filename, 'wb')
58
+ @csv = open_csv_for_writing(@options)
31
59
  @csv << %w{comparator_id id_1 id_2 score}
32
60
  @mode = :write
33
61
  end
@@ -38,7 +66,7 @@ module Linkage
38
66
  end
39
67
 
40
68
  def each_pair
41
- open_for_reading
69
+ raise "not in read mode" if @mode != :read
42
70
 
43
71
  pairs = Hash.new { |h, k| h[k] = {} }
44
72
  @csv.each do |row|
@@ -49,8 +77,6 @@ module Linkage
49
77
  pairs.each_pair do |pair, scores|
50
78
  yield pair[0], pair[1], scores
51
79
  end
52
-
53
- close
54
80
  end
55
81
 
56
82
  def close
@@ -1,8 +1,48 @@
1
1
  module Linkage
2
2
  module ScoreSets
3
+ # {Database ScoreSets::Database} is an implementation of {ScoreSet} for saving
4
+ # scores in a relational database.
5
+ #
6
+ # Scores are saved in a database table with the following columns:
7
+ # - comparator_id (integer)
8
+ # - id_1 (string)
9
+ # - id_2 (string)
10
+ # - score (float)
11
+ #
12
+ # You can setup a database connection in a few different ways. By default, a
13
+ # SQLite database with the filename of `scores.db` will be created in the
14
+ # current working directory. If you want something different, you can either
15
+ # specify a Sequel-style URI, provide connection options for
16
+ # `Sequel.connect`, or you can just specify a `Sequel::Database` object to
17
+ # use.
18
+ #
19
+ # There are a couple of non-Sequel connection options:
20
+ # * `:filename` - specify filename to use for a SQLite database
21
+ # * `:dir` - specify the parent directory for a SQLite database
22
+ #
23
+ # In addition to connection options, there are behavioral options you can
24
+ # set. By default, the table name used is called `scores`, but you change
25
+ # that by setting the `:table_name` option in the second options hash. If
26
+ # the table already exists, an {ExistsError} will be raised unless you set
27
+ # the `:overwrite` option to a truthy value in the second options hash.
3
28
  class Database < ScoreSet
4
- def initialize(database, options = {})
5
- @database = database
29
+ include Helpers::Database
30
+
31
+ DEFAULT_OPTIONS = {
32
+ :filename => 'scores.db'
33
+ }
34
+
35
+ # @override initialize(connection_options = {}, options = {})
36
+ # @param connection_options [Hash]
37
+ # @param options [Hash]
38
+ # @override initialize(uri, options = {})
39
+ # @param uri [String]
40
+ # @param options [Hash]
41
+ # @override initialize(database, options = {})
42
+ # @param database [Sequel::Database]
43
+ # @param options [Hash]
44
+ def initialize(connection_options = {}, options = {})
45
+ @database = database_connection(connection_options, DEFAULT_OPTIONS)
6
46
  @table_name = options[:table_name] || :scores
7
47
  @overwrite = options[:overwrite]
8
48
  end
@@ -51,7 +91,7 @@ module Linkage
51
91
  end
52
92
 
53
93
  def each_pair
54
- open_for_reading
94
+ raise "not in read mode" if @mode != :read
55
95
 
56
96
  current_pair = nil
57
97
  @dataset.order(:id_1, :id_2, :comparator_id).each do |row|
@@ -63,8 +103,6 @@ module Linkage
63
103
  scores[row[:comparator_id]] = row[:score]
64
104
  end
65
105
  yield(*current_pair) unless current_pair.nil?
66
-
67
- close
68
106
  end
69
107
 
70
108
  def close
@@ -1,3 +1,3 @@
1
1
  module Linkage
2
- VERSION = "0.1.0.pre"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -28,6 +28,8 @@ Gem::Specification.new do |gem|
28
28
  gem.add_development_dependency "mysql2"
29
29
  gem.add_development_dependency "guard-test"
30
30
  gem.add_development_dependency "guard-yard"
31
+ gem.add_development_dependency "redcarpet"
32
+ gem.add_development_dependency "yard-redcarpet-ext"
31
33
 
32
34
  gem.required_ruby_version = '>= 1.9'
33
35
  end
Binary file
Binary file
@@ -0,0 +1,197 @@
1
+ <?xml version="1.0" encoding="UTF-8" standalone="no"?>
2
+ <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/PR-SVG-20010719/DTD/svg10.dtd">
3
+ <svg width="45cm" height="28cm" viewBox="10 8 891 557" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
4
+ <g>
5
+ <rect style="fill: #ffffff" x="386" y="413" width="102.4" height="28"/>
6
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="386" y="413" width="102.4" height="28"/>
7
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="437.2" y="432">Dataset</text>
8
+ <rect style="fill: #ffffff" x="386" y="441" width="102.4" height="20"/>
9
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="386" y="441" width="102.4" height="20"/>
10
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="389" y="455">+table_name</text>
11
+ <rect style="fill: #ffffff" x="386" y="461" width="102.4" height="20"/>
12
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="386" y="461" width="102.4" height="20"/>
13
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="389" y="475">+link_with()</text>
14
+ </g>
15
+ <g>
16
+ <rect style="fill: #ffffff" x="75" y="433" width="154.9" height="28"/>
17
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="75" y="433" width="154.9" height="28"/>
18
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="152.45" y="452">Sequel::Dataset</text>
19
+ </g>
20
+ <g>
21
+ <rect style="fill: #ffffff" x="574" y="500" width="84.25" height="28"/>
22
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="574" y="500" width="84.25" height="28"/>
23
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="616.125" y="519">FieldSet</text>
24
+ </g>
25
+ <g>
26
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="514.577,447 542.201,447 542.201,514 573.008,514 "/>
27
+ <polygon style="fill: #000000" points="489.405,447 503.405,442.2 517.405,447 503.405,451.8 "/>
28
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="489.405,447 503.405,442.2 517.405,447 503.405,451.8 "/>
29
+ <text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="521.405" y="443"/>
30
+ <text font-size="12.7998" style="fill: #000000;text-anchor:end;font-family:monospace;font-style:normal;font-weight:normal" x="569.008" y="510"/>
31
+ </g>
32
+ <g>
33
+ <rect style="fill: #ffffff" x="774" y="464" width="125.5" height="28"/>
34
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="774" y="464" width="125.5" height="28"/>
35
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="836.75" y="483">Field</text>
36
+ <rect style="fill: #ffffff" x="774" y="492" width="125.5" height="36"/>
37
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="774" y="492" width="125.5" height="36"/>
38
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="777" y="506">+name</text>
39
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="777" y="522">+schema</text>
40
+ <rect style="fill: #ffffff" x="774" y="528" width="125.5" height="36"/>
41
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="774" y="528" width="125.5" height="36"/>
42
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="777" y="542">+ruby_type()</text>
43
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="777" y="558">+primary_key?()</text>
44
+ </g>
45
+ <g>
46
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="684.427,514 674.255,514 771.992,514 772.992,514 "/>
47
+ <polygon style="fill: #ffffff" points="659.255,514 673.255,509.2 687.255,514 673.255,518.8 "/>
48
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="659.255,514 673.255,509.2 687.255,514 673.255,518.8 "/>
49
+ <text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="691.255" y="510"/>
50
+ <text font-size="12.7998" style="fill: #000000;text-anchor:end;font-family:monospace;font-style:normal;font-weight:normal" x="768.992" y="510">1..*</text>
51
+ </g>
52
+ <g>
53
+ <rect style="fill: #ffffff" x="11" y="237" width="97.2" height="28"/>
54
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="11" y="237" width="97.2" height="28"/>
55
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="59.6" y="256">ResultSet</text>
56
+ </g>
57
+ <g>
58
+ <rect style="fill: #ffffff" x="332" y="9" width="102.4" height="28"/>
59
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="332" y="9" width="102.4" height="28"/>
60
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="383.2" y="28">ScoreSet</text>
61
+ <rect style="fill: #ffffff" x="332" y="37" width="102.4" height="36"/>
62
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="332" y="37" width="102.4" height="36"/>
63
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="335" y="51">+add_score()</text>
64
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="335" y="67">+each_pair()</text>
65
+ </g>
66
+ <g>
67
+ <rect style="fill: #ffffff" x="168.6" y="226.3" width="102.4" height="28"/>
68
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="168.6" y="226.3" width="102.4" height="28"/>
69
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="219.8" y="245.3">MatchSet</text>
70
+ <rect style="fill: #ffffff" x="168.6" y="254.3" width="102.4" height="20"/>
71
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="168.6" y="254.3" width="102.4" height="20"/>
72
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="171.6" y="268.3">+add_match()</text>
73
+ </g>
74
+ <g>
75
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="59.6,211.828 59.6,48.75 332,48.75 332,47 "/>
76
+ <polygon style="fill: #ffffff" points="59.6,237 54.8,223 59.6,209 64.4,223 "/>
77
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="59.6,237 54.8,223 59.6,209 64.4,223 "/>
78
+ </g>
79
+ <g>
80
+ <rect style="fill: #ffffff" x="434.002" y="234" width="133.1" height="28"/>
81
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="434.002" y="234" width="133.1" height="28"/>
82
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="500.552" y="253">Configuration</text>
83
+ </g>
84
+ <g>
85
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,288.157 500.552,315.75 59.6,315.75 59.6,266.002 "/>
86
+ <polygon style="fill: #ffffff" points="500.552,262.986 505.352,276.986 500.552,290.986 495.752,276.986 "/>
87
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,262.986 505.352,276.986 500.552,290.986 495.752,276.986 "/>
88
+ </g>
89
+ <g>
90
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,288.179 500.552,344.499 437.2,344.499 437.2,411.991 "/>
91
+ <polygon style="fill: #ffffff" points="500.552,263.007 505.352,277.007 500.552,291.007 495.752,277.007 "/>
92
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,263.007 505.352,277.007 500.552,291.007 495.752,277.007 "/>
93
+ <text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="441.2" y="407.991">1..2</text>
94
+ </g>
95
+ <g>
96
+ <rect style="fill: #ffffff" x="699.002" y="189" width="116.9" height="28"/>
97
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="699.002" y="189" width="116.9" height="28"/>
98
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="757.452" y="208">Comparator</text>
99
+ <rect style="fill: #ffffff" x="699.002" y="217" width="116.9" height="36"/>
100
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="699.002" y="217" width="116.9" height="36"/>
101
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="702.002" y="231">+weight</text>
102
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="702.002" y="247">+type</text>
103
+ <rect style="fill: #ffffff" x="699.002" y="253" width="116.9" height="20"/>
104
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="699.002" y="253" width="116.9" height="20"/>
105
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="702.002" y="267">+score()</text>
106
+ </g>
107
+ <g>
108
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="593.282,248 640.052,248 640.052,231 697.995,231 "/>
109
+ <polygon style="fill: #000000" points="568.11,248 582.11,243.2 596.11,248 582.11,252.8 "/>
110
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="568.11,248 582.11,243.2 596.11,248 582.11,252.8 "/>
111
+ <text font-size="12.7998" style="fill: #000000;text-anchor:end;font-family:monospace;font-style:normal;font-weight:normal" x="693.995" y="227">1..*</text>
112
+ </g>
113
+ <g>
114
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="757.452,299.177 757.452,375.5 836.75,375.5 836.75,462.994 "/>
115
+ <polygon style="fill: #ffffff" points="757.452,274.005 762.252,288.005 757.452,302.005 752.652,288.005 "/>
116
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="757.452,274.005 762.252,288.005 757.452,302.005 752.652,288.005 "/>
117
+ <text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="840.75" y="458.994">2</text>
118
+ </g>
119
+ <g>
120
+ <rect style="fill: #ffffff" x="498.002" y="102" width="142.5" height="28"/>
121
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="498.002" y="102" width="142.5" height="28"/>
122
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="569.252" y="121">ScoreRecorder</text>
123
+ </g>
124
+ <g>
125
+ <line style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x1="608.305" y1="139.864" x2="698.042" y2="194.698"/>
126
+ <polygon style="fill: #000000" points="610.913,135.597 594.653,131.521 605.698,144.13 "/>
127
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="610.913,135.597 594.653,131.521 605.698,144.13 "/>
128
+ <text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="645.921" y="172.849">update</text>
129
+ </g>
130
+ <g>
131
+ <line style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x1="451.169" y1="68.3993" x2="532.096" y2="101.022"/>
132
+ <polygon style="fill: #000000" points="453.039,63.7619 436.33,62.4173 449.3,73.0367 "/>
133
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="453.039,63.7619 436.33,62.4173 449.3,73.0367 "/>
134
+ <text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="483.749" y="91.5327">add_score</text>
135
+ </g>
136
+ <g>
137
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,207.821 500.552,175 569.252,175 569.252,131.007 "/>
138
+ <polygon style="fill: #000000" points="500.552,232.993 495.752,218.993 500.552,204.993 505.352,218.993 "/>
139
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,232.993 495.752,218.993 500.552,204.993 505.352,218.993 "/>
140
+ </g>
141
+ <g>
142
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="134.378,251 145.4,251 145.4,250.3 167.594,250.3 "/>
143
+ <polygon style="fill: #ffffff" points="109.206,251 123.206,246.2 137.206,251 123.206,255.8 "/>
144
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="109.206,251 123.206,246.2 137.206,251 123.206,255.8 "/>
145
+ </g>
146
+ <g>
147
+ <rect style="fill: #ffffff" x="316.002" y="209.75" width="84" height="28"/>
148
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="316.002" y="209.75" width="84" height="28"/>
149
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="358.002" y="228.75">Matcher</text>
150
+ </g>
151
+ <g>
152
+ <rect style="fill: #ffffff" x="240.002" y="124.75" width="147" height="28"/>
153
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="240.002" y="124.75" width="147" height="28"/>
154
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="313.502" y="143.75">MatchRecorder</text>
155
+ </g>
156
+ <g>
157
+ <line style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x1="329.236" y1="168.804" x2="350.153" y2="208.757"/>
158
+ <polygon style="fill: #000000" points="333.666,166.485 321.815,154.629 324.807,171.123 "/>
159
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="333.666,166.485 321.815,154.629 324.807,171.123 "/>
160
+ <text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="335.752" y="191.25">update</text>
161
+ </g>
162
+ <g>
163
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="408.779,248 377.002,248 377.002,237.75 358.002,237.75 "/>
164
+ <polygon style="fill: #000000" points="433.951,248 419.951,252.8 405.951,248 419.951,243.2 "/>
165
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="433.951,248 419.951,252.8 405.951,248 419.951,243.2 "/>
166
+ </g>
167
+ <g>
168
+ <line style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x1="251.712" y1="212.31" x2="300.909" y2="153.742"/>
169
+ <polygon style="fill: #000000" points="247.883,209.094 241.421,224.561 255.541,215.526 "/>
170
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="247.883,209.094 241.421,224.561 255.541,215.526 "/>
171
+ <text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="270.843" y="199.534">add_match</text>
172
+ </g>
173
+ <g>
174
+ <rect style="fill: #ffffff" x="595.002" y="347.75" width="87" height="28"/>
175
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="595.002" y="347.75" width="87" height="28"/>
176
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="638.502" y="366.75">Runner</text>
177
+ <rect style="fill: #ffffff" x="595.002" y="375.75" width="87" height="20"/>
178
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="595.002" y="375.75" width="87" height="20"/>
179
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="598.002" y="389.75">+execute()</text>
180
+ </g>
181
+ <g>
182
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="638.502,321.595 638.502,294.75 567.102,294.75 567.102,262 "/>
183
+ <polygon style="fill: #ffffff" points="638.502,346.766 633.702,332.766 638.502,318.766 643.302,332.766 "/>
184
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="638.502,346.766 633.702,332.766 638.502,318.766 643.302,332.766 "/>
185
+ </g>
186
+ <g>
187
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="434.002,208.828 434.002,186.375 387.002,186.375 387.002,152.75 "/>
188
+ <polygon style="fill: #000000" points="434.002,234 429.202,220 434.002,206 438.802,220 "/>
189
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="434.002,234 429.202,220 434.002,206 438.802,220 "/>
190
+ </g>
191
+ <g>
192
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="359.855,447 294.447,447 294.447,447 229.9,447 "/>
193
+ <polygon style="fill: #ffffff" points="385.027,447 371.027,451.8 357.027,447 371.027,442.2 "/>
194
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="385.027,447 371.027,451.8 357.027,447 371.027,442.2 "/>
195
+ <text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:monospace;font-style:normal;font-weight:normal" x="294.447" y="443">&lt;&lt;delegate&gt;&gt;</text>
196
+ </g>
197
+ </svg>
@@ -38,13 +38,8 @@ class Test::Unit::TestCase
38
38
  @@database_config
39
39
  end
40
40
 
41
- def stub_field(name, options = {}, &block)
42
- f = Linkage::Field.allocate
43
- f.stubs({:static? => false}.merge(options))
44
- if block
45
- f.send(:instance_eval, &block)
46
- end
47
- f
41
+ def stub_dataset(options = {}, &block)
42
+ stub_instance(Linkage::Dataset, options, &block)
48
43
  end
49
44
 
50
45
  def stub_instance(klass, options = {}, &block)
@@ -89,10 +84,6 @@ class Test::Unit::TestCase
89
84
 
90
85
  def new_result_set(&block)
91
86
  klass = Class.new(Linkage::ResultSet)
92
- klass.send(:define_method, :score_set) do
93
- end
94
- klass.send(:define_method, :match_set) do
95
- end
96
87
  if block_given?
97
88
  klass.class_eval(&block)
98
89
  end
@@ -41,14 +41,16 @@ class IntegrationTests::TestDatabaseResultSet < Test::Unit::TestCase
41
41
  db[:scores].order(:id_1, :id_2).each do |row|
42
42
  assert_equal row[:id_1], row[:id_2]
43
43
  assert_equal 1, row[:comparator_id]
44
- assert_same 1.0, row[:score]
44
+ assert_kind_of Float, row[:score]
45
+ assert_equal 1.0, row[:score]
45
46
  end
46
47
 
47
48
  assert db.table_exists?(:matches)
48
49
  assert_equal 10, db[:matches].count
49
50
  db[:matches].order(:id_1, :id_2).each do |row|
50
51
  assert_equal row[:id_1], row[:id_2]
51
- assert_same 1.0, row[:score]
52
+ assert_kind_of Float, row[:score]
53
+ assert_equal 1.0, row[:score]
52
54
  end
53
55
  end
54
56
  end
@@ -36,6 +36,35 @@ class UnitTests::TestComparators::TestStrcompare < Test::Unit::TestCase
36
36
  assert_equal 0.961, comp.score({:foo => 'martha'}, {:bar => 'marhta'})
37
37
  assert_equal 0.840, comp.score({:foo => 'dwayne'}, {:bar => 'duane'})
38
38
  assert_equal 0.813, comp.score({:foo => 'dixon'}, {:bar => 'dicksonx'})
39
+ assert_equal 0.783, comp.score({:foo => 'erin'}, {:bar => 'afrin'})
40
+ assert_equal 0.939, comp.score({:foo => 'KEICHENBERG'}, {:bar => 'REICHENBERG'})
41
+ assert_equal 0.823, comp.score({:foo => 'KEICHENBERG'}, {:bar => 'EEICHENBERG'})
42
+ assert_equal 0.775, comp.score({:foo => 'airplane'}, {:bar => 'plane'})
43
+ assert_equal 0, comp.score({:foo => 'cat'}, {:bar => 'dog'})
44
+ end
45
+
46
+ test "score for reverse-jarowinkler" do
47
+ field_1 = stub('field 1', :name => :foo, :ruby_type => { :type => String })
48
+ field_2 = stub('field 2', :name => :bar, :ruby_type => { :type => String })
49
+ comp = Strcompare.new(field_1, field_2, :reverse_jarowinkler)
50
+ assert_equal 0.950, comp.score({:foo => 'martha'}, {:bar => 'marhta'})
51
+ assert_equal 0.858, comp.score({:foo => 'dwayne'}, {:bar => 'duane'})
52
+ assert_equal 0.775, comp.score({:foo => 'dixon'}, {:bar => 'dicksonx'})
53
+ assert_equal 0.848, comp.score({:foo => 'erin'}, {:bar => 'afrin'})
54
+ assert_equal 0.964, comp.score({:foo => 'KEICHENBERG'}, {:bar => 'REICHENBERG'})
55
+ assert_equal 0.964, comp.score({:foo => 'KEICHENBERG'}, {:bar => 'EEICHENBERG'})
56
+ assert_equal 0.925, comp.score({:foo => 'airplane'}, {:bar => 'plane'})
57
+ assert_equal 0, comp.score({:foo => 'cat'}, {:bar => 'dog'})
58
+ end
59
+
60
+ test "score for damerau-levenshtein" do
61
+ field_1 = stub('field 1', :name => :foo, :ruby_type => { :type => String })
62
+ field_2 = stub('field 2', :name => :bar, :ruby_type => { :type => String })
63
+ comp = Strcompare.new(field_1, field_2, :damerau_levenshtein)
64
+ assert_equal 0.833, comp.score({:foo => 'martha'}, {:bar => 'marhta'})
65
+ assert_equal 0.750, comp.score({:foo => 'dwayne'}, {:bar => 'duane'})
66
+ assert_equal 0.688, comp.score({:foo => 'dixon'}, {:bar => 'dicksonx'})
67
+ assert_equal 0.889, comp.score({:foo => 'perfect'}, {:bar => 'perfect10'})
39
68
  assert_equal 0, comp.score({:foo => 'cat'}, {:bar => 'dog'})
40
69
  end
41
70