linkage 0.1.0.pre → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +2 -0
  3. data/Guardfile +0 -1
  4. data/TODO +2 -0
  5. data/lib/linkage.rb +1 -0
  6. data/lib/linkage/comparator.rb +12 -2
  7. data/lib/linkage/comparators/strcompare.rb +68 -16
  8. data/lib/linkage/configuration.rb +112 -8
  9. data/lib/linkage/dataset.rb +124 -9
  10. data/lib/linkage/exceptions.rb +5 -0
  11. data/lib/linkage/field.rb +55 -18
  12. data/lib/linkage/field_set.rb +20 -0
  13. data/lib/linkage/helpers.rb +7 -0
  14. data/lib/linkage/helpers/csv.rb +28 -0
  15. data/lib/linkage/helpers/database.rb +47 -0
  16. data/lib/linkage/import_buffer.rb +3 -3
  17. data/lib/linkage/match_recorder.rb +4 -0
  18. data/lib/linkage/match_set.rb +51 -13
  19. data/lib/linkage/match_sets/csv.rb +36 -9
  20. data/lib/linkage/match_sets/database.rb +43 -2
  21. data/lib/linkage/matcher.rb +49 -3
  22. data/lib/linkage/result_set.rb +60 -22
  23. data/lib/linkage/result_sets/csv.rb +46 -28
  24. data/lib/linkage/result_sets/database.rb +44 -26
  25. data/lib/linkage/runner.rb +10 -0
  26. data/lib/linkage/score_recorder.rb +5 -0
  27. data/lib/linkage/score_set.rb +78 -20
  28. data/lib/linkage/score_sets/csv.rb +41 -15
  29. data/lib/linkage/score_sets/database.rb +43 -5
  30. data/lib/linkage/version.rb +1 -1
  31. data/linkage.gemspec +2 -0
  32. data/misc/uml/linkage.dia +0 -0
  33. data/misc/uml/linkage.png +0 -0
  34. data/misc/uml/linkage.svg +197 -0
  35. data/test/helper.rb +2 -11
  36. data/test/integration/test_database_result_set.rb +4 -2
  37. data/test/unit/comparators/test_strcompare.rb +29 -0
  38. data/test/unit/match_sets/test_csv.rb +44 -13
  39. data/test/unit/match_sets/test_database.rb +42 -1
  40. data/test/unit/result_sets/test_csv.rb +9 -69
  41. data/test/unit/result_sets/test_database.rb +20 -11
  42. data/test/unit/score_sets/test_csv.rb +68 -25
  43. data/test/unit/score_sets/test_database.rb +57 -1
  44. data/test/unit/test_comparator.rb +8 -0
  45. data/test/unit/test_configuration.rb +33 -6
  46. data/test/unit/test_dataset.rb +0 -7
  47. data/test/unit/test_matcher.rb +52 -3
  48. data/test/unit/test_result_set.rb +8 -14
  49. metadata +66 -32
@@ -2,20 +2,52 @@ require 'csv'
2
2
 
3
3
  module Linkage
4
4
  module ScoreSets
5
+ # {CSV ScoreSets::CSV} is an implementation of {ScoreSet} for saving scores
6
+ # in a CSV file.
7
+ #
8
+ # There are three options available:
9
+ #
10
+ # * `:filename` - which file to store scores in; can be an absolute path
11
+ # or relative path
12
+ # * `:dir` - which directory to put the file in; used if `:filename` is a
13
+ # relative path
14
+ # * `:overwrite` - indicate whether or not to overwrite an existing file
15
+ #
16
+ # By default, `:filename` is `'scores.csv'`, and the other options are
17
+ # blank. This means that it will write scores to the `'scores.csv'` file in
18
+ # the current working directory and will raise an error if the file already
19
+ # exists.
20
+ #
21
+ # If you specify `:dir`, that path will be created if it doesn't exist yet.
22
+ #
23
+ # The resulting file looks like this:
24
+ #
25
+ # comparator_id,id_1,id_2,score
26
+ # 1,123,456,1
27
+ # 1,124,457,0.5
28
+ # 2,123,456,0
29
+ #
30
+ # @see Helpers::CSV
5
31
  class CSV < ScoreSet
6
- def initialize(filename, options = {})
7
- @filename = filename
8
- @overwrite = options[:overwrite]
32
+ include Linkage::Helpers::CSV
33
+
34
+ DEFAULT_OPTIONS = {
35
+ :filename => 'scores.csv'
36
+ }
37
+
38
+ # @param [Hash] options
39
+ # @option options [String] :filename
40
+ # @option options [String] :dir
41
+ # @option options [Boolean] :overwrite
42
+ def initialize(options = {})
43
+ @options = DEFAULT_OPTIONS.merge(options.reject { |k, v| v.nil? })
9
44
  end
10
45
 
11
46
  def open_for_reading
12
47
  raise "already open for writing, try closing first" if @mode == :write
13
48
  return if @mode == :read
14
49
 
15
- if !File.exist?(@filename)
16
- raise MissingError, "#{@filename} does not exist"
17
- end
18
- @csv = ::CSV.open(@filename, 'rb', :headers => true)
50
+ @csv = open_csv_for_reading(@options)
19
51
  @mode = :read
20
52
  end
21
53
 
@@ -23,11 +55,7 @@ module Linkage
23
55
  raise "already open for reading, try closing first" if @mode == :read
24
56
  return if @mode == :write
25
57
 
26
- if !@overwrite && File.exist?(@filename)
27
- raise ExistsError, "#{@filename} exists and not in overwrite mode"
28
- end
29
-
30
- @csv = ::CSV.open(@filename, 'wb')
58
+ @csv = open_csv_for_writing(@options)
31
59
  @csv << %w{comparator_id id_1 id_2 score}
32
60
  @mode = :write
33
61
  end
@@ -38,7 +66,7 @@ module Linkage
38
66
  end
39
67
 
40
68
  def each_pair
41
- open_for_reading
69
+ raise "not in read mode" if @mode != :read
42
70
 
43
71
  pairs = Hash.new { |h, k| h[k] = {} }
44
72
  @csv.each do |row|
@@ -49,8 +77,6 @@ module Linkage
49
77
  pairs.each_pair do |pair, scores|
50
78
  yield pair[0], pair[1], scores
51
79
  end
52
-
53
- close
54
80
  end
55
81
 
56
82
  def close
@@ -1,8 +1,48 @@
1
1
  module Linkage
2
2
  module ScoreSets
3
+ # {Database ScoreSets::Database} is an implementation of {ScoreSet} for saving
4
+ # scores in a relational database.
5
+ #
6
+ # Scores are saved in a database table with the following columns:
7
+ # - comparator_id (integer)
8
+ # - id_1 (string)
9
+ # - id_2 (string)
10
+ # - score (float)
11
+ #
12
+ # You can setup a database connection in a few different ways. By default, a
13
+ # SQLite database with the filename of `scores.db` will be created in the
14
+ # current working directory. If you want something different, you can either
15
+ # specify a Sequel-style URI, provide connection options for
16
+ # `Sequel.connect`, or you can just specify a `Sequel::Database` object to
17
+ # use.
18
+ #
19
+ # There are a couple of non-Sequel connection options:
20
+ # * `:filename` - specify filename to use for a SQLite database
21
+ # * `:dir` - specify the parent directory for a SQLite database
22
+ #
23
+ # In addition to connection options, there are behavioral options you can
24
+ # set. By default, the table name used is called `scores`, but you change
25
+ # that by setting the `:table_name` option in the second options hash. If
26
+ # the table already exists, an {ExistsError} will be raised unless you set
27
+ # the `:overwrite` option to a truthy value in the second options hash.
3
28
  class Database < ScoreSet
4
- def initialize(database, options = {})
5
- @database = database
29
+ include Helpers::Database
30
+
31
+ DEFAULT_OPTIONS = {
32
+ :filename => 'scores.db'
33
+ }
34
+
35
+ # @override initialize(connection_options = {}, options = {})
36
+ # @param connection_options [Hash]
37
+ # @param options [Hash]
38
+ # @override initialize(uri, options = {})
39
+ # @param uri [String]
40
+ # @param options [Hash]
41
+ # @override initialize(database, options = {})
42
+ # @param database [Sequel::Database]
43
+ # @param options [Hash]
44
+ def initialize(connection_options = {}, options = {})
45
+ @database = database_connection(connection_options, DEFAULT_OPTIONS)
6
46
  @table_name = options[:table_name] || :scores
7
47
  @overwrite = options[:overwrite]
8
48
  end
@@ -51,7 +91,7 @@ module Linkage
51
91
  end
52
92
 
53
93
  def each_pair
54
- open_for_reading
94
+ raise "not in read mode" if @mode != :read
55
95
 
56
96
  current_pair = nil
57
97
  @dataset.order(:id_1, :id_2, :comparator_id).each do |row|
@@ -63,8 +103,6 @@ module Linkage
63
103
  scores[row[:comparator_id]] = row[:score]
64
104
  end
65
105
  yield(*current_pair) unless current_pair.nil?
66
-
67
- close
68
106
  end
69
107
 
70
108
  def close
@@ -1,3 +1,3 @@
1
1
  module Linkage
2
- VERSION = "0.1.0.pre"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -28,6 +28,8 @@ Gem::Specification.new do |gem|
28
28
  gem.add_development_dependency "mysql2"
29
29
  gem.add_development_dependency "guard-test"
30
30
  gem.add_development_dependency "guard-yard"
31
+ gem.add_development_dependency "redcarpet"
32
+ gem.add_development_dependency "yard-redcarpet-ext"
31
33
 
32
34
  gem.required_ruby_version = '>= 1.9'
33
35
  end
Binary file
Binary file
@@ -0,0 +1,197 @@
1
+ <?xml version="1.0" encoding="UTF-8" standalone="no"?>
2
+ <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/PR-SVG-20010719/DTD/svg10.dtd">
3
+ <svg width="45cm" height="28cm" viewBox="10 8 891 557" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
4
+ <g>
5
+ <rect style="fill: #ffffff" x="386" y="413" width="102.4" height="28"/>
6
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="386" y="413" width="102.4" height="28"/>
7
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="437.2" y="432">Dataset</text>
8
+ <rect style="fill: #ffffff" x="386" y="441" width="102.4" height="20"/>
9
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="386" y="441" width="102.4" height="20"/>
10
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="389" y="455">+table_name</text>
11
+ <rect style="fill: #ffffff" x="386" y="461" width="102.4" height="20"/>
12
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="386" y="461" width="102.4" height="20"/>
13
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="389" y="475">+link_with()</text>
14
+ </g>
15
+ <g>
16
+ <rect style="fill: #ffffff" x="75" y="433" width="154.9" height="28"/>
17
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="75" y="433" width="154.9" height="28"/>
18
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="152.45" y="452">Sequel::Dataset</text>
19
+ </g>
20
+ <g>
21
+ <rect style="fill: #ffffff" x="574" y="500" width="84.25" height="28"/>
22
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="574" y="500" width="84.25" height="28"/>
23
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="616.125" y="519">FieldSet</text>
24
+ </g>
25
+ <g>
26
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="514.577,447 542.201,447 542.201,514 573.008,514 "/>
27
+ <polygon style="fill: #000000" points="489.405,447 503.405,442.2 517.405,447 503.405,451.8 "/>
28
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="489.405,447 503.405,442.2 517.405,447 503.405,451.8 "/>
29
+ <text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="521.405" y="443"/>
30
+ <text font-size="12.7998" style="fill: #000000;text-anchor:end;font-family:monospace;font-style:normal;font-weight:normal" x="569.008" y="510"/>
31
+ </g>
32
+ <g>
33
+ <rect style="fill: #ffffff" x="774" y="464" width="125.5" height="28"/>
34
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="774" y="464" width="125.5" height="28"/>
35
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="836.75" y="483">Field</text>
36
+ <rect style="fill: #ffffff" x="774" y="492" width="125.5" height="36"/>
37
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="774" y="492" width="125.5" height="36"/>
38
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="777" y="506">+name</text>
39
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="777" y="522">+schema</text>
40
+ <rect style="fill: #ffffff" x="774" y="528" width="125.5" height="36"/>
41
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="774" y="528" width="125.5" height="36"/>
42
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="777" y="542">+ruby_type()</text>
43
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="777" y="558">+primary_key?()</text>
44
+ </g>
45
+ <g>
46
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="684.427,514 674.255,514 771.992,514 772.992,514 "/>
47
+ <polygon style="fill: #ffffff" points="659.255,514 673.255,509.2 687.255,514 673.255,518.8 "/>
48
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="659.255,514 673.255,509.2 687.255,514 673.255,518.8 "/>
49
+ <text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="691.255" y="510"/>
50
+ <text font-size="12.7998" style="fill: #000000;text-anchor:end;font-family:monospace;font-style:normal;font-weight:normal" x="768.992" y="510">1..*</text>
51
+ </g>
52
+ <g>
53
+ <rect style="fill: #ffffff" x="11" y="237" width="97.2" height="28"/>
54
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="11" y="237" width="97.2" height="28"/>
55
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="59.6" y="256">ResultSet</text>
56
+ </g>
57
+ <g>
58
+ <rect style="fill: #ffffff" x="332" y="9" width="102.4" height="28"/>
59
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="332" y="9" width="102.4" height="28"/>
60
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="383.2" y="28">ScoreSet</text>
61
+ <rect style="fill: #ffffff" x="332" y="37" width="102.4" height="36"/>
62
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="332" y="37" width="102.4" height="36"/>
63
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="335" y="51">+add_score()</text>
64
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="335" y="67">+each_pair()</text>
65
+ </g>
66
+ <g>
67
+ <rect style="fill: #ffffff" x="168.6" y="226.3" width="102.4" height="28"/>
68
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="168.6" y="226.3" width="102.4" height="28"/>
69
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="219.8" y="245.3">MatchSet</text>
70
+ <rect style="fill: #ffffff" x="168.6" y="254.3" width="102.4" height="20"/>
71
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="168.6" y="254.3" width="102.4" height="20"/>
72
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="171.6" y="268.3">+add_match()</text>
73
+ </g>
74
+ <g>
75
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="59.6,211.828 59.6,48.75 332,48.75 332,47 "/>
76
+ <polygon style="fill: #ffffff" points="59.6,237 54.8,223 59.6,209 64.4,223 "/>
77
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="59.6,237 54.8,223 59.6,209 64.4,223 "/>
78
+ </g>
79
+ <g>
80
+ <rect style="fill: #ffffff" x="434.002" y="234" width="133.1" height="28"/>
81
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="434.002" y="234" width="133.1" height="28"/>
82
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="500.552" y="253">Configuration</text>
83
+ </g>
84
+ <g>
85
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,288.157 500.552,315.75 59.6,315.75 59.6,266.002 "/>
86
+ <polygon style="fill: #ffffff" points="500.552,262.986 505.352,276.986 500.552,290.986 495.752,276.986 "/>
87
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,262.986 505.352,276.986 500.552,290.986 495.752,276.986 "/>
88
+ </g>
89
+ <g>
90
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,288.179 500.552,344.499 437.2,344.499 437.2,411.991 "/>
91
+ <polygon style="fill: #ffffff" points="500.552,263.007 505.352,277.007 500.552,291.007 495.752,277.007 "/>
92
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,263.007 505.352,277.007 500.552,291.007 495.752,277.007 "/>
93
+ <text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="441.2" y="407.991">1..2</text>
94
+ </g>
95
+ <g>
96
+ <rect style="fill: #ffffff" x="699.002" y="189" width="116.9" height="28"/>
97
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="699.002" y="189" width="116.9" height="28"/>
98
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="757.452" y="208">Comparator</text>
99
+ <rect style="fill: #ffffff" x="699.002" y="217" width="116.9" height="36"/>
100
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="699.002" y="217" width="116.9" height="36"/>
101
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="702.002" y="231">+weight</text>
102
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="702.002" y="247">+type</text>
103
+ <rect style="fill: #ffffff" x="699.002" y="253" width="116.9" height="20"/>
104
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="699.002" y="253" width="116.9" height="20"/>
105
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="702.002" y="267">+score()</text>
106
+ </g>
107
+ <g>
108
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="593.282,248 640.052,248 640.052,231 697.995,231 "/>
109
+ <polygon style="fill: #000000" points="568.11,248 582.11,243.2 596.11,248 582.11,252.8 "/>
110
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="568.11,248 582.11,243.2 596.11,248 582.11,252.8 "/>
111
+ <text font-size="12.7998" style="fill: #000000;text-anchor:end;font-family:monospace;font-style:normal;font-weight:normal" x="693.995" y="227">1..*</text>
112
+ </g>
113
+ <g>
114
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="757.452,299.177 757.452,375.5 836.75,375.5 836.75,462.994 "/>
115
+ <polygon style="fill: #ffffff" points="757.452,274.005 762.252,288.005 757.452,302.005 752.652,288.005 "/>
116
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="757.452,274.005 762.252,288.005 757.452,302.005 752.652,288.005 "/>
117
+ <text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="840.75" y="458.994">2</text>
118
+ </g>
119
+ <g>
120
+ <rect style="fill: #ffffff" x="498.002" y="102" width="142.5" height="28"/>
121
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="498.002" y="102" width="142.5" height="28"/>
122
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="569.252" y="121">ScoreRecorder</text>
123
+ </g>
124
+ <g>
125
+ <line style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x1="608.305" y1="139.864" x2="698.042" y2="194.698"/>
126
+ <polygon style="fill: #000000" points="610.913,135.597 594.653,131.521 605.698,144.13 "/>
127
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="610.913,135.597 594.653,131.521 605.698,144.13 "/>
128
+ <text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="645.921" y="172.849">update</text>
129
+ </g>
130
+ <g>
131
+ <line style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x1="451.169" y1="68.3993" x2="532.096" y2="101.022"/>
132
+ <polygon style="fill: #000000" points="453.039,63.7619 436.33,62.4173 449.3,73.0367 "/>
133
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="453.039,63.7619 436.33,62.4173 449.3,73.0367 "/>
134
+ <text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="483.749" y="91.5327">add_score</text>
135
+ </g>
136
+ <g>
137
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,207.821 500.552,175 569.252,175 569.252,131.007 "/>
138
+ <polygon style="fill: #000000" points="500.552,232.993 495.752,218.993 500.552,204.993 505.352,218.993 "/>
139
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="500.552,232.993 495.752,218.993 500.552,204.993 505.352,218.993 "/>
140
+ </g>
141
+ <g>
142
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="134.378,251 145.4,251 145.4,250.3 167.594,250.3 "/>
143
+ <polygon style="fill: #ffffff" points="109.206,251 123.206,246.2 137.206,251 123.206,255.8 "/>
144
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="109.206,251 123.206,246.2 137.206,251 123.206,255.8 "/>
145
+ </g>
146
+ <g>
147
+ <rect style="fill: #ffffff" x="316.002" y="209.75" width="84" height="28"/>
148
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="316.002" y="209.75" width="84" height="28"/>
149
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="358.002" y="228.75">Matcher</text>
150
+ </g>
151
+ <g>
152
+ <rect style="fill: #ffffff" x="240.002" y="124.75" width="147" height="28"/>
153
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="240.002" y="124.75" width="147" height="28"/>
154
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="313.502" y="143.75">MatchRecorder</text>
155
+ </g>
156
+ <g>
157
+ <line style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x1="329.236" y1="168.804" x2="350.153" y2="208.757"/>
158
+ <polygon style="fill: #000000" points="333.666,166.485 321.815,154.629 324.807,171.123 "/>
159
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="333.666,166.485 321.815,154.629 324.807,171.123 "/>
160
+ <text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="335.752" y="191.25">update</text>
161
+ </g>
162
+ <g>
163
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="408.779,248 377.002,248 377.002,237.75 358.002,237.75 "/>
164
+ <polygon style="fill: #000000" points="433.951,248 419.951,252.8 405.951,248 419.951,243.2 "/>
165
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="433.951,248 419.951,252.8 405.951,248 419.951,243.2 "/>
166
+ </g>
167
+ <g>
168
+ <line style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x1="251.712" y1="212.31" x2="300.909" y2="153.742"/>
169
+ <polygon style="fill: #000000" points="247.883,209.094 241.421,224.561 255.541,215.526 "/>
170
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="247.883,209.094 241.421,224.561 255.541,215.526 "/>
171
+ <text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="270.843" y="199.534">add_match</text>
172
+ </g>
173
+ <g>
174
+ <rect style="fill: #ffffff" x="595.002" y="347.75" width="87" height="28"/>
175
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="595.002" y="347.75" width="87" height="28"/>
176
+ <text font-size="16" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:700" x="638.502" y="366.75">Runner</text>
177
+ <rect style="fill: #ffffff" x="595.002" y="375.75" width="87" height="20"/>
178
+ <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="595.002" y="375.75" width="87" height="20"/>
179
+ <text font-size="12.8" style="fill: #000000;text-anchor:start;font-family:monospace;font-style:normal;font-weight:normal" x="598.002" y="389.75">+execute()</text>
180
+ </g>
181
+ <g>
182
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="638.502,321.595 638.502,294.75 567.102,294.75 567.102,262 "/>
183
+ <polygon style="fill: #ffffff" points="638.502,346.766 633.702,332.766 638.502,318.766 643.302,332.766 "/>
184
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="638.502,346.766 633.702,332.766 638.502,318.766 643.302,332.766 "/>
185
+ </g>
186
+ <g>
187
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="434.002,208.828 434.002,186.375 387.002,186.375 387.002,152.75 "/>
188
+ <polygon style="fill: #000000" points="434.002,234 429.202,220 434.002,206 438.802,220 "/>
189
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="434.002,234 429.202,220 434.002,206 438.802,220 "/>
190
+ </g>
191
+ <g>
192
+ <polyline style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="359.855,447 294.447,447 294.447,447 229.9,447 "/>
193
+ <polygon style="fill: #ffffff" points="385.027,447 371.027,451.8 357.027,447 371.027,442.2 "/>
194
+ <polygon style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" points="385.027,447 371.027,451.8 357.027,447 371.027,442.2 "/>
195
+ <text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:monospace;font-style:normal;font-weight:normal" x="294.447" y="443">&lt;&lt;delegate&gt;&gt;</text>
196
+ </g>
197
+ </svg>
@@ -38,13 +38,8 @@ class Test::Unit::TestCase
38
38
  @@database_config
39
39
  end
40
40
 
41
- def stub_field(name, options = {}, &block)
42
- f = Linkage::Field.allocate
43
- f.stubs({:static? => false}.merge(options))
44
- if block
45
- f.send(:instance_eval, &block)
46
- end
47
- f
41
+ def stub_dataset(options = {}, &block)
42
+ stub_instance(Linkage::Dataset, options, &block)
48
43
  end
49
44
 
50
45
  def stub_instance(klass, options = {}, &block)
@@ -89,10 +84,6 @@ class Test::Unit::TestCase
89
84
 
90
85
  def new_result_set(&block)
91
86
  klass = Class.new(Linkage::ResultSet)
92
- klass.send(:define_method, :score_set) do
93
- end
94
- klass.send(:define_method, :match_set) do
95
- end
96
87
  if block_given?
97
88
  klass.class_eval(&block)
98
89
  end
@@ -41,14 +41,16 @@ class IntegrationTests::TestDatabaseResultSet < Test::Unit::TestCase
41
41
  db[:scores].order(:id_1, :id_2).each do |row|
42
42
  assert_equal row[:id_1], row[:id_2]
43
43
  assert_equal 1, row[:comparator_id]
44
- assert_same 1.0, row[:score]
44
+ assert_kind_of Float, row[:score]
45
+ assert_equal 1.0, row[:score]
45
46
  end
46
47
 
47
48
  assert db.table_exists?(:matches)
48
49
  assert_equal 10, db[:matches].count
49
50
  db[:matches].order(:id_1, :id_2).each do |row|
50
51
  assert_equal row[:id_1], row[:id_2]
51
- assert_same 1.0, row[:score]
52
+ assert_kind_of Float, row[:score]
53
+ assert_equal 1.0, row[:score]
52
54
  end
53
55
  end
54
56
  end
@@ -36,6 +36,35 @@ class UnitTests::TestComparators::TestStrcompare < Test::Unit::TestCase
36
36
  assert_equal 0.961, comp.score({:foo => 'martha'}, {:bar => 'marhta'})
37
37
  assert_equal 0.840, comp.score({:foo => 'dwayne'}, {:bar => 'duane'})
38
38
  assert_equal 0.813, comp.score({:foo => 'dixon'}, {:bar => 'dicksonx'})
39
+ assert_equal 0.783, comp.score({:foo => 'erin'}, {:bar => 'afrin'})
40
+ assert_equal 0.939, comp.score({:foo => 'KEICHENBERG'}, {:bar => 'REICHENBERG'})
41
+ assert_equal 0.823, comp.score({:foo => 'KEICHENBERG'}, {:bar => 'EEICHENBERG'})
42
+ assert_equal 0.775, comp.score({:foo => 'airplane'}, {:bar => 'plane'})
43
+ assert_equal 0, comp.score({:foo => 'cat'}, {:bar => 'dog'})
44
+ end
45
+
46
+ test "score for reverse-jarowinkler" do
47
+ field_1 = stub('field 1', :name => :foo, :ruby_type => { :type => String })
48
+ field_2 = stub('field 2', :name => :bar, :ruby_type => { :type => String })
49
+ comp = Strcompare.new(field_1, field_2, :reverse_jarowinkler)
50
+ assert_equal 0.950, comp.score({:foo => 'martha'}, {:bar => 'marhta'})
51
+ assert_equal 0.858, comp.score({:foo => 'dwayne'}, {:bar => 'duane'})
52
+ assert_equal 0.775, comp.score({:foo => 'dixon'}, {:bar => 'dicksonx'})
53
+ assert_equal 0.848, comp.score({:foo => 'erin'}, {:bar => 'afrin'})
54
+ assert_equal 0.964, comp.score({:foo => 'KEICHENBERG'}, {:bar => 'REICHENBERG'})
55
+ assert_equal 0.964, comp.score({:foo => 'KEICHENBERG'}, {:bar => 'EEICHENBERG'})
56
+ assert_equal 0.925, comp.score({:foo => 'airplane'}, {:bar => 'plane'})
57
+ assert_equal 0, comp.score({:foo => 'cat'}, {:bar => 'dog'})
58
+ end
59
+
60
+ test "score for damerau-levenshtein" do
61
+ field_1 = stub('field 1', :name => :foo, :ruby_type => { :type => String })
62
+ field_2 = stub('field 2', :name => :bar, :ruby_type => { :type => String })
63
+ comp = Strcompare.new(field_1, field_2, :damerau_levenshtein)
64
+ assert_equal 0.833, comp.score({:foo => 'martha'}, {:bar => 'marhta'})
65
+ assert_equal 0.750, comp.score({:foo => 'dwayne'}, {:bar => 'duane'})
66
+ assert_equal 0.688, comp.score({:foo => 'dixon'}, {:bar => 'dicksonx'})
67
+ assert_equal 0.889, comp.score({:foo => 'perfect'}, {:bar => 'perfect10'})
39
68
  assert_equal 0, comp.score({:foo => 'cat'}, {:bar => 'dog'})
40
69
  end
41
70