linkage 0.1.0.pre → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +2 -0
  3. data/Guardfile +0 -1
  4. data/TODO +2 -0
  5. data/lib/linkage.rb +1 -0
  6. data/lib/linkage/comparator.rb +12 -2
  7. data/lib/linkage/comparators/strcompare.rb +68 -16
  8. data/lib/linkage/configuration.rb +112 -8
  9. data/lib/linkage/dataset.rb +124 -9
  10. data/lib/linkage/exceptions.rb +5 -0
  11. data/lib/linkage/field.rb +55 -18
  12. data/lib/linkage/field_set.rb +20 -0
  13. data/lib/linkage/helpers.rb +7 -0
  14. data/lib/linkage/helpers/csv.rb +28 -0
  15. data/lib/linkage/helpers/database.rb +47 -0
  16. data/lib/linkage/import_buffer.rb +3 -3
  17. data/lib/linkage/match_recorder.rb +4 -0
  18. data/lib/linkage/match_set.rb +51 -13
  19. data/lib/linkage/match_sets/csv.rb +36 -9
  20. data/lib/linkage/match_sets/database.rb +43 -2
  21. data/lib/linkage/matcher.rb +49 -3
  22. data/lib/linkage/result_set.rb +60 -22
  23. data/lib/linkage/result_sets/csv.rb +46 -28
  24. data/lib/linkage/result_sets/database.rb +44 -26
  25. data/lib/linkage/runner.rb +10 -0
  26. data/lib/linkage/score_recorder.rb +5 -0
  27. data/lib/linkage/score_set.rb +78 -20
  28. data/lib/linkage/score_sets/csv.rb +41 -15
  29. data/lib/linkage/score_sets/database.rb +43 -5
  30. data/lib/linkage/version.rb +1 -1
  31. data/linkage.gemspec +2 -0
  32. data/misc/uml/linkage.dia +0 -0
  33. data/misc/uml/linkage.png +0 -0
  34. data/misc/uml/linkage.svg +197 -0
  35. data/test/helper.rb +2 -11
  36. data/test/integration/test_database_result_set.rb +4 -2
  37. data/test/unit/comparators/test_strcompare.rb +29 -0
  38. data/test/unit/match_sets/test_csv.rb +44 -13
  39. data/test/unit/match_sets/test_database.rb +42 -1
  40. data/test/unit/result_sets/test_csv.rb +9 -69
  41. data/test/unit/result_sets/test_database.rb +20 -11
  42. data/test/unit/score_sets/test_csv.rb +68 -25
  43. data/test/unit/score_sets/test_database.rb +57 -1
  44. data/test/unit/test_comparator.rb +8 -0
  45. data/test/unit/test_configuration.rb +33 -6
  46. data/test/unit/test_dataset.rb +0 -7
  47. data/test/unit/test_matcher.rb +52 -3
  48. data/test/unit/test_result_set.rb +8 -14
  49. metadata +66 -32
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ad6a9ee6a6add94a342e3d02d49d8a4bfeb9122b
4
- data.tar.gz: a989e8e810602dfcd4da596fcc1154a922dedccd
3
+ metadata.gz: 57f4bad92110063c64ed24a43b2a805f4fe6d051
4
+ data.tar.gz: 9d9ff5fda254dae02bde47dac69c94af56300d51
5
5
  SHA512:
6
- metadata.gz: 0d78da4904100679826cf76ae07f7daf984c12e2a762aceb0ad2d1387c5a5778403a782ff104ce48c1fd3ec5588c728fe2b8ea7a79a896da153a8d91c7e4cb14
7
- data.tar.gz: 6797e5598f47413022d18c6732cbf0613efaccba886cda56eb390e3344d131ccfd977b5992c5a2c4557cfbc4f93bf747ddffe849770be5860e793f27fe3e2286
6
+ metadata.gz: 63552888a854815988985d54a628e7594d072765027767ea95b159ef80408c64cc2d5ec608892be15c356aecd028529a02df6ca2792827c26aaffa605ab28b65
7
+ data.tar.gz: 7531bca7bec718605f940557a1572fd3f74528883d08d7137b4c775f95e8b7b4036fe00a6c270f24b30ebf093bb345f97f425f4813aae620f4e69c28b99abde3
data/.yardopts CHANGED
@@ -1 +1,3 @@
1
1
  -m markdown
2
+ --plugin redcarpet-ext
3
+ --private
data/Guardfile CHANGED
@@ -2,7 +2,6 @@ guard 'test' do
2
2
  watch(%r{^lib/linkage/([^/]+/)*([^/]+)\.rb$}) { |m| "test/unit/#{m[1]}test_#{m[2]}.rb" }
3
3
  watch(%r{^test/unit/([^/]+/)*test_.+\.rb$})
4
4
  watch(%r{^test/integration/test_.+\.rb$})
5
- watch('lib/linkage/configuration.rb') { "test/unit/test_dataset.rb" }
6
5
  watch('test/helper.rb') { "test" }
7
6
  end
8
7
 
data/TODO CHANGED
@@ -2,3 +2,5 @@ Features
2
2
  - add matcher algorithms
3
3
  - change comparator_id to more than just an index; need to get comparator ids
4
4
  from result set instead of configuration
5
+ - serialize configuration into different formats
6
+ - disallow bad options in result sets
@@ -9,6 +9,7 @@ module Linkage
9
9
  end
10
10
 
11
11
  path = Pathname.new(File.expand_path(File.dirname(__FILE__))) + 'linkage'
12
+ require path + 'helpers'
12
13
  require path + 'comparator'
13
14
  require path + 'configuration'
14
15
  require path + 'dataset'
@@ -1,7 +1,7 @@
1
1
  module Linkage
2
2
  # {Comparator} is the superclass for comparators in Linkage. Comparators are
3
- # used to compare two records and compute scores based on how closely the two
4
- # records relate.
3
+ # used to compare records and compute scores based on how closely the records
4
+ # relate.
5
5
  #
6
6
  # Each comparator should inherit from {Comparator} and declare itself as
7
7
  # simple or advanced by overriding {#type} (the default is simple). Simple
@@ -22,6 +22,16 @@ module Linkage
22
22
  class Comparator
23
23
  include Observable
24
24
 
25
+ attr_reader :weight
26
+
27
+ def weigh(weight)
28
+ return if weight.nil?
29
+ if not weight.is_a?(Numeric)
30
+ raise "weight must be numeric type"
31
+ end
32
+ @weight = weight
33
+ end
34
+
25
35
  class << self
26
36
  # Register a new comparator. Subclasses must define at least {#score} for
27
37
  # simple comparators, or {#score_dataset} and {#score_datasets} for
@@ -7,6 +7,7 @@ module Linkage
7
7
  # the comparison, along with an operator. Valid operators are:
8
8
  #
9
9
  # * `:jarowinkler` ([Jaro-Winkler distance](http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance))
10
+ # * `:damerau_levenshtein` ([Damerau-Levenshtein distance](http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance))
10
11
  #
11
12
  # Consider the following example, using a {Configuration} as part of
12
13
  # {Dataset#link_with}:
@@ -17,8 +18,11 @@ module Linkage
17
18
  #
18
19
  # For each record, the values of the `foo` and `bar` fields are compared
19
20
  # using the Jaro-Winkler distance algorithm.
21
+ #
22
+ # Damerau-Levenshtein is a modified Levenshtein that allows for transpositions
23
+ # It has additionally been modified to make costs of additions or deletions only 0.5
20
24
  class Strcompare < Comparator
21
- VALID_OPERATIONS = [:jarowinkler]
25
+ VALID_OPERATIONS = [:jarowinkler, :reverse_jarowinkler, :damerau_levenshtein]
22
26
 
23
27
  def initialize(field_1, field_2, operation)
24
28
  if field_1.ruby_type[:type] != String || field_2.ruby_type[:type] != String
@@ -38,6 +42,10 @@ module Linkage
38
42
  case @operation
39
43
  when :jarowinkler
40
44
  jarowinkler(record_1[@name_1], record_2[@name_2])
45
+ when :reverse_jarowinkler
46
+ reverse_jarowinkler(record_1[@name_1], record_2[@name_2])
47
+ when :damerau_levenshtein
48
+ damerau_levenshtein(record_1[@name_1], record_2[@name_2])
41
49
  end
42
50
 
43
51
  result
@@ -50,33 +58,77 @@ module Linkage
50
58
  ba = b.split('')
51
59
  al = a.length
52
60
  bl = b.length
61
+ return 0 if al == 0 || bl == 0
53
62
  l = 0
54
63
  for i in Range.new(0, [[al, bl].min, 4].min-1)
55
64
  break if aa[i] != ba[i]
56
65
  l += 1
57
66
  end
58
- aj = aa - (aa - ba)
59
- bj = ba - (ba - aa)
60
- nm = 0
61
- nt = 0
62
- md = [[al, bl].max/2 - 1, 0].max
67
+ md = [[al, bl].max/2 - 1, 1].max
68
+ usea = []
69
+ useb = []
70
+ # simplify to matching characters
63
71
  for i in Range.new(0, al-1)
64
- bi = ba.index(aa[i])
65
- aji = aj.index(aa[i])
66
- bji = bj.index(aa[i])
67
- if !bi.nil? && (bi + nm - i).abs <= md
68
- nm += 1
69
- nt += 1 if !bji.nil? && aji != bji
72
+ fi = [[i - md, 0].max, bl-1].min
73
+ li = [i + md, bl-1].min
74
+ for j in Range.new(fi, li)
75
+ if aa[i] == ba[j] and not useb.include?(j)
76
+ usea << i
77
+ useb << j
78
+ break
79
+ end
70
80
  end
71
- ba.delete_at(bi) if !bi.nil?
72
- aj.delete_at(aji) if !aji.nil?
73
- bj.delete_at(bji) if !bji.nil?
74
81
  end
82
+ bada = Range.new(0, al-1).to_a - usea
83
+ badb = Range.new(0, bl-1).to_a - useb
84
+ bada.reverse.each { |x| aa.delete_at(x) }
85
+ badb.reverse.each { |x| ba.delete_at(x) }
86
+ nm = aa.length
75
87
  return 0 if nm == 0
76
- d = (nm/al.to_f + nm/bl.to_f + (nm-nt)/nm.to_f)/3.0
88
+ # count transpositions
89
+ nt = 0
90
+ for i in Range.new(0, nm-1)
91
+ nt +=1 if aa[i] != ba[i]
92
+ end
93
+ d = (nm/al.to_f + nm/bl.to_f + (nm-nt/2.0)/nm.to_f)/3.0
77
94
  w = (d + l * 0.1 * (1 - d)).round(3)
78
95
  w
79
96
  end
97
+
98
+ def reverse_jarowinkler(w1, w2)
99
+ jarowinkler(w1.reverse, w2.reverse)
100
+ end
101
+
102
+ def damerau_levenshtein(w1, w2)
103
+ a = w1.downcase
104
+ b = w2.downcase
105
+ aa = a.split('')
106
+ ba = b.split('')
107
+ al = a.length
108
+ bl = b.length
109
+ denom = [al, bl].max
110
+ return 0 if denom == 0
111
+ oneago = nil
112
+ thisrow = (1..bl).to_a + [0]
113
+ al.times do |x|
114
+ twoago, oneago, thisrow = oneago, thisrow, [0] * bl + [x + 1]
115
+ bl.times do |y|
116
+ if aa[x] == ba[y]
117
+ thisrow[y] = oneago[y - 1]
118
+ else
119
+ delcost = oneago[y] + 0.5
120
+ addcost = thisrow[y - 1] + 0.5
121
+ subcost = oneago[y - 1] + 1
122
+ thisrow[y] = [delcost, addcost, subcost].min
123
+ # remove this statement for original levenshtein
124
+ if x > 0 and y > 0 and aa[x] == ba[y-1] and aa[x-1] == ba[y]
125
+ thisrow[y] = [thisrow[y], twoago[y-2] + 1].min
126
+ end
127
+ end
128
+ end
129
+ end
130
+ return (1 - thisrow[bl - 1] / denom.to_f).round(3)
131
+ end
80
132
  end
81
133
 
82
134
  Comparator.register('strcompare', Strcompare)
@@ -1,21 +1,124 @@
1
1
  module Linkage
2
+ # {Configuration} keeps track of everything needed to run a record linkage,
3
+ # including which datasets you want to link, how you want to link them, and
4
+ # where you want to store the results. Once created, you can supply the
5
+ # {Configuration} to {Runner#initialize} and run it with {Runner#execute}.
6
+ #
7
+ # To create a configuration, usually you will want to use {Dataset#link_with},
8
+ # but you can create it directly if you like (see {#initialize}), like so:
9
+ #
10
+ # ```ruby
11
+ # dataset_1 = Linkage::Dataset.new('mysql://example.com/database_name', 'foo')
12
+ # dataset_2 = Linkage::Dataset.new('postgres://example.com/other_name', 'bar')
13
+ # result_set = Linkage::ResultSet['csv'].new('/home/foo/linkage')
14
+ # config = Linkage::Configuration.new(dataset_1, dataset_2, result_set)
15
+ # ```
16
+ #
17
+ # To add comparators to {Configuration}, you can call methods with the same
18
+ # name as registered comparators. Here's the list of builtin comparators:
19
+ #
20
+ # | Name | Class |
21
+ # |------------|---------------------------|
22
+ # | compare | {Comparators::Compare} |
23
+ # | strcompare | {Comparators::Strcompare} |
24
+ # | within | {Comparators::Within} |
25
+ #
26
+ # For example, if you want to add a {Comparators::Compare} comparator to
27
+ # your configuration, run this:
28
+ #
29
+ # ```ruby
30
+ # config.compare([:foo], [:bar], :equal_to)
31
+ # ```
32
+ #
33
+ # This works via {Configuration#method_missing}. First, the comparator class
34
+ # is fetched via {Comparator.[]}. Then fields are looked up in the {FieldSet}
35
+ # of the {Dataset}. Those {Field}s along with any other arguments you specify
36
+ # are passed to the constructor of the comparator you chose.
37
+ #
38
+ # {Configuration} also contains information about how records are matched.
39
+ # Once scores are computed, the scores for each pair of records are averaged
40
+ # and compared against a threshold value. Record pairs that have an average
41
+ # score greater than or equal to the threshold value are considered matches.
42
+ #
43
+ # The threshold value is `0.5` by default, but you can change it by setting
44
+ # {#threshold} like so:
45
+ #
46
+ # ```ruby
47
+ # config.threshold = 0.75
48
+ # ```
49
+ #
50
+ # Since scores range between 0 and 1 (inclusive), be sure to set a threshold
51
+ # value within the same range. The actual matching work is done by the
52
+ # {Matcher} class.
53
+ #
54
+ # @see Dataset
55
+ # @see ResultSet
56
+ # @see Comparator
57
+ # @see Matcher
58
+ # @see Runner
2
59
  class Configuration
3
- attr_reader :dataset_1, :dataset_2, :result_set, :comparators
4
- attr_accessor :record_cache_size, :algorithm, :threshold
60
+ attr_reader :dataset_1, :dataset_2, :result_set, :comparators, :threshold
61
+ attr_accessor :algorithm
5
62
 
63
+ def threshold=(threshold)
64
+ if not threshold.is_a?(Numeric)
65
+ raise "threshold must be numeric type"
66
+ end
67
+ @threshold = threshold
68
+ end
69
+ # Create a new instance of {Configuration}.
70
+ #
71
+ # @overload initialize(dataset_1, dataset_2, result_set)
72
+ # Create a linkage configuration for two datasets and a result set.
73
+ # @param [Linkage::Dataset] dataset_1
74
+ # @param [Linkage::Dataset] dataset_2
75
+ # @param [Linkage::ResultSet] result_set
76
+ # @overload initialize(dataset, result_set)
77
+ # Create a linkage configuration for one dataset and a result set.
78
+ # @param [Linkage::Dataset] dataset
79
+ # @param [Linkage::ResultSet] result_set
80
+ # @overload initialize(dataset_1, dataset_2, score_set, match_set)
81
+ # Create a linkage configuration for two datasets, a score set, and a
82
+ # match set.
83
+ # @param [Linkage::Dataset] dataset_1
84
+ # @param [Linkage::Dataset] dataset_2
85
+ # @param [Linkage::ScoreSet] score_set
86
+ # @param [Linkage::MatchSet] match_set
87
+ # @overload initialize(dataset, score_set, match_set)
88
+ # Create a linkage configuration for one dataset, a score set, and a
89
+ # match set.
90
+ # @param [Linkage::Dataset] dataset
91
+ # @param [Linkage::ScoreSet] score_set
92
+ # @param [Linkage::MatchSet] match_set
6
93
  def initialize(*args)
7
- if args.length < 2 || args.length > 3
8
- raise ArgumentError, "wrong number of arguments (#{args.length} for 3..4)"
94
+ if args.length < 2 || args.length > 4
95
+ raise ArgumentError, "wrong number of arguments (#{args.length} for 2..4)"
9
96
  end
10
97
 
11
98
  @dataset_1 = args[0]
12
- if args.length > 2 && args[1]
99
+ case args.length
100
+ when 2
101
+ # dataset and result set
102
+ @result_set = args[1]
103
+ when 3
104
+ # dataset 1, dataset 2, and result set
105
+ # dataset, score set, and match set
106
+ case args[1]
107
+ when Dataset, nil
108
+ @dataset_2 = args[1]
109
+ @result_set = args[2]
110
+ when ScoreSet
111
+ @result_set = ResultSet.new(args[1], args[2])
112
+ end
113
+ when 4
114
+ # dataset 1, dataset 2, score set, and match set
13
115
  @dataset_2 = args[1]
116
+ @result_set = ResultSet.new(args[2], args[3])
14
117
  end
15
- @result_set = args[-1]
16
118
 
17
119
  @comparators = []
18
- @record_cache_size = 10_000
120
+ @algorithm = :mean
121
+ @threshold = 0.5
19
122
  end
20
123
 
21
124
  def score_recorder
@@ -29,7 +132,7 @@ module Linkage
29
132
  end
30
133
 
31
134
  def matcher
32
- Matcher.new(@comparators, @result_set.score_set, @algorithm || :mean, @threshold || 0.5)
135
+ Matcher.new(@comparators, @result_set.score_set, @algorithm, @threshold)
33
136
  end
34
137
 
35
138
  def match_recorder(matcher)
@@ -60,6 +163,7 @@ module Linkage
60
163
 
61
164
  comparator = klass.new(*args, &block)
62
165
  @comparators << comparator
166
+ return comparator
63
167
  end
64
168
 
65
169
  protected
@@ -1,8 +1,111 @@
1
1
  module Linkage
2
- # Delegator around Sequel::Dataset with some extra functionality.
2
+ # {Dataset} is a representation of a database table. It is a thin wrapper
3
+ # around a
4
+ # {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
5
+ #
6
+ # There are three ways to create a {Dataset}.
7
+ #
8
+ # Pass in a {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}:
9
+ #
10
+ # ```ruby
11
+ # Linkage::Dataset.new(db[:foo])
12
+ # ```
13
+ #
14
+ # Pass in a {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Database.html `Sequel::Database`}
15
+ # and a table name:
16
+ #
17
+ # ```ruby
18
+ # Linkage::Dataset.new(db, :foo)
19
+ # ```
20
+ #
21
+ # Pass in a
22
+ # {http://sequel.jeremyevans.net/rdoc/files/doc/opening_databases_rdoc.html Sequel-style}
23
+ # connection URI, a table name, and any options you want to pass to
24
+ # {http://sequel.jeremyevans.net/rdoc/classes/Sequel.html#method-c-connect `Sequel.connect`}.
25
+ #
26
+ # ```ruby
27
+ # Linkage::Dataset.new("mysql2://example.com/foo", :bar, :user => 'viking', :password => 'secret')
28
+ # ```
29
+ #
30
+ # Once you've made a {Dataset}, you can use any
31
+ # {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}
32
+ # method on it you wish. For example, if you want to limit the dataset to
33
+ # records that refer to people born after 1985 (assuming date of birth is
34
+ # stored as a date type):
35
+ #
36
+ # ```ruby
37
+ # filtered_dataset = dataset.where('dob > :date', :date => Date.new(1985, 1, 1))
38
+ # ```
39
+ #
40
+ # Note that
41
+ # {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}
42
+ # methods return a __clone__ of a dataset, so you must assign the return value
43
+ # to a variable.
44
+ #
45
+ # Once you have your {Dataset} how you want it, you can use the {#link_with}
46
+ # method to create a {Configuration} for record linkage. The {#link_with}
47
+ # method takes another {Dataset} object and a {ResultSet} and returns a
48
+ # {Configuration}.
49
+ #
50
+ # ```ruby
51
+ # config = dataset.link_with(other_dataset, result_set)
52
+ # config.compare([:foo], [:bar], :equal_to)
53
+ # ```
54
+ #
55
+ # You can pass in a {ScoreSet} and {MatchSet} instead of a {ResultSet} if you
56
+ # wish:
57
+ #
58
+ # ```ruby
59
+ # config = dataset.link_with(other_dataset, score_set, match_set)
60
+ # ```
61
+ #
62
+ # Note that a dataset can be linked with itself the same way, like so:
63
+ #
64
+ # ```ruby
65
+ # config = dataset.link_with(dataset, result_set)
66
+ # config.compare([:foo], [:bar], :equal_to)
67
+ # ```
68
+ #
69
+ # If you give {#link_with} a block, it will yield the same {Configuration}
70
+ # object to the block that it returns.
71
+ #
72
+ # ```ruby
73
+ # config = dataset.link_with(other_dataset, result_set) do |c|
74
+ # c.compare([:foo], [:bar], :equal_to)
75
+ # end
76
+ # ```
77
+ #
78
+ # Once that's done, use a {Runner} to run the record linkage:
79
+ #
80
+ # ```ruby
81
+ # runner = Linkage::Runner.new(config)
82
+ # runner.execute
83
+ # ```
84
+ #
85
+ # @see http://sequel.jeremyevans.net/rdoc/files/doc/opening_databases_rdoc.html Connecting to a database
3
86
  class Dataset
4
- attr_reader :field_set, :table_name
87
+ # @return [Symbol] Returns this dataset's table name.
88
+ attr_reader :table_name
5
89
 
90
+ # @return [FieldSet] Returns this dataset's {FieldSet}.
91
+ attr_reader :field_set
92
+
93
+ # Returns a new instance of {Dataset}.
94
+ #
95
+ # @overload initialize(dataset)
96
+ # Use a specific {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
97
+ # @param dataset [Sequel::Dataset]
98
+ # @overload initialize(database, table_name)
99
+ # Use a specific {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Database.html `Sequel::Database`}.
100
+ # @param database [Sequel::Database]
101
+ # @param table_name [Symbol, String]
102
+ # @overload initialize(uri, table_name, options = {})
103
+ # Use {http://sequel.jeremyevans.net/rdoc/classes/Sequel.html#method-c-connect `Sequel.connect`}
104
+ # to connect to a database.
105
+ # @param uri [String, Hash]
106
+ # @param table_name [Symbol, String]
107
+ # @param options [Hash]
108
+ #
6
109
  def initialize(*args)
7
110
  if args.length == 0 || args.length > 3
8
111
  raise ArgumentError, "wrong number of arguments (#{args.length} for 1..3)"
@@ -31,17 +134,23 @@ module Linkage
31
134
  @field_set = FieldSet.new(self)
32
135
  end
33
136
 
137
+ # Returns the underlying {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
138
+ # @return [Sequel::Dataset]
34
139
  def obj
35
140
  @dataset
36
141
  end
37
142
 
143
+ # Set the underlying {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
38
144
  def obj=(value)
39
145
  @dataset = value
40
146
  end
147
+ private :obj=
41
148
 
42
- # Setup a linkage with another dataset
149
+ # Create a {Configuration} for record linkage.
43
150
  #
44
- # @return [Linkage::Configuration]
151
+ # @param dataset [Dataset]
152
+ # @param result_set [ResultSet]
153
+ # @return [Configuration]
45
154
  def link_with(dataset, result_set)
46
155
  other = dataset.eql?(self) ? nil : dataset
47
156
  conf = Configuration.new(self, other, result_set)
@@ -51,25 +160,31 @@ module Linkage
51
160
  conf
52
161
  end
53
162
 
54
- def database_type
55
- @db.database_type
56
- end
57
-
163
+ # Return the dataset's schema.
164
+ #
165
+ # @return [Array]
166
+ # @see http://sequel.jeremyevans.net/rdoc/classes/Sequel/Database.html#method-i-schema Sequel::Database#schema
58
167
  def schema
59
168
  @db.schema(@table_name)
60
169
  end
61
170
 
171
+ # Returns {FieldSet#primary_key}.
172
+ #
173
+ # @return [Field]
174
+ # @see FieldSet#primary_key
62
175
  def primary_key
63
176
  @field_set.primary_key
64
177
  end
65
178
 
66
179
  protected
67
180
 
181
+ # Delegate methods to the underlying
182
+ # {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
68
183
  def method_missing(name, *args, &block)
69
184
  result = @dataset.send(name, *args, &block)
70
185
  if result.kind_of?(Sequel::Dataset)
71
186
  new_object = clone
72
- new_object.obj = result
187
+ new_object.send(:obj=, result)
73
188
  new_object
74
189
  else
75
190
  result