linkage 0.1.0.pre → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +2 -0
  3. data/Guardfile +0 -1
  4. data/TODO +2 -0
  5. data/lib/linkage.rb +1 -0
  6. data/lib/linkage/comparator.rb +12 -2
  7. data/lib/linkage/comparators/strcompare.rb +68 -16
  8. data/lib/linkage/configuration.rb +112 -8
  9. data/lib/linkage/dataset.rb +124 -9
  10. data/lib/linkage/exceptions.rb +5 -0
  11. data/lib/linkage/field.rb +55 -18
  12. data/lib/linkage/field_set.rb +20 -0
  13. data/lib/linkage/helpers.rb +7 -0
  14. data/lib/linkage/helpers/csv.rb +28 -0
  15. data/lib/linkage/helpers/database.rb +47 -0
  16. data/lib/linkage/import_buffer.rb +3 -3
  17. data/lib/linkage/match_recorder.rb +4 -0
  18. data/lib/linkage/match_set.rb +51 -13
  19. data/lib/linkage/match_sets/csv.rb +36 -9
  20. data/lib/linkage/match_sets/database.rb +43 -2
  21. data/lib/linkage/matcher.rb +49 -3
  22. data/lib/linkage/result_set.rb +60 -22
  23. data/lib/linkage/result_sets/csv.rb +46 -28
  24. data/lib/linkage/result_sets/database.rb +44 -26
  25. data/lib/linkage/runner.rb +10 -0
  26. data/lib/linkage/score_recorder.rb +5 -0
  27. data/lib/linkage/score_set.rb +78 -20
  28. data/lib/linkage/score_sets/csv.rb +41 -15
  29. data/lib/linkage/score_sets/database.rb +43 -5
  30. data/lib/linkage/version.rb +1 -1
  31. data/linkage.gemspec +2 -0
  32. data/misc/uml/linkage.dia +0 -0
  33. data/misc/uml/linkage.png +0 -0
  34. data/misc/uml/linkage.svg +197 -0
  35. data/test/helper.rb +2 -11
  36. data/test/integration/test_database_result_set.rb +4 -2
  37. data/test/unit/comparators/test_strcompare.rb +29 -0
  38. data/test/unit/match_sets/test_csv.rb +44 -13
  39. data/test/unit/match_sets/test_database.rb +42 -1
  40. data/test/unit/result_sets/test_csv.rb +9 -69
  41. data/test/unit/result_sets/test_database.rb +20 -11
  42. data/test/unit/score_sets/test_csv.rb +68 -25
  43. data/test/unit/score_sets/test_database.rb +57 -1
  44. data/test/unit/test_comparator.rb +8 -0
  45. data/test/unit/test_configuration.rb +33 -6
  46. data/test/unit/test_dataset.rb +0 -7
  47. data/test/unit/test_matcher.rb +52 -3
  48. data/test/unit/test_result_set.rb +8 -14
  49. metadata +66 -32
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ad6a9ee6a6add94a342e3d02d49d8a4bfeb9122b
4
- data.tar.gz: a989e8e810602dfcd4da596fcc1154a922dedccd
3
+ metadata.gz: 57f4bad92110063c64ed24a43b2a805f4fe6d051
4
+ data.tar.gz: 9d9ff5fda254dae02bde47dac69c94af56300d51
5
5
  SHA512:
6
- metadata.gz: 0d78da4904100679826cf76ae07f7daf984c12e2a762aceb0ad2d1387c5a5778403a782ff104ce48c1fd3ec5588c728fe2b8ea7a79a896da153a8d91c7e4cb14
7
- data.tar.gz: 6797e5598f47413022d18c6732cbf0613efaccba886cda56eb390e3344d131ccfd977b5992c5a2c4557cfbc4f93bf747ddffe849770be5860e793f27fe3e2286
6
+ metadata.gz: 63552888a854815988985d54a628e7594d072765027767ea95b159ef80408c64cc2d5ec608892be15c356aecd028529a02df6ca2792827c26aaffa605ab28b65
7
+ data.tar.gz: 7531bca7bec718605f940557a1572fd3f74528883d08d7137b4c775f95e8b7b4036fe00a6c270f24b30ebf093bb345f97f425f4813aae620f4e69c28b99abde3
data/.yardopts CHANGED
@@ -1 +1,3 @@
1
1
  -m markdown
2
+ --plugin redcarpet-ext
3
+ --private
data/Guardfile CHANGED
@@ -2,7 +2,6 @@ guard 'test' do
2
2
  watch(%r{^lib/linkage/([^/]+/)*([^/]+)\.rb$}) { |m| "test/unit/#{m[1]}test_#{m[2]}.rb" }
3
3
  watch(%r{^test/unit/([^/]+/)*test_.+\.rb$})
4
4
  watch(%r{^test/integration/test_.+\.rb$})
5
- watch('lib/linkage/configuration.rb') { "test/unit/test_dataset.rb" }
6
5
  watch('test/helper.rb') { "test" }
7
6
  end
8
7
 
data/TODO CHANGED
@@ -2,3 +2,5 @@ Features
2
2
  - add matcher algorithms
3
3
  - change comparator_id to more than just an index; need to get comparator ids
4
4
  from result set instead of configuration
5
+ - serialize configuration into different formats
6
+ - disallow bad options in result sets
@@ -9,6 +9,7 @@ module Linkage
9
9
  end
10
10
 
11
11
  path = Pathname.new(File.expand_path(File.dirname(__FILE__))) + 'linkage'
12
+ require path + 'helpers'
12
13
  require path + 'comparator'
13
14
  require path + 'configuration'
14
15
  require path + 'dataset'
@@ -1,7 +1,7 @@
1
1
  module Linkage
2
2
  # {Comparator} is the superclass for comparators in Linkage. Comparators are
3
- # used to compare two records and compute scores based on how closely the two
4
- # records relate.
3
+ # used to compare records and compute scores based on how closely the records
4
+ # relate.
5
5
  #
6
6
  # Each comparator should inherit from {Comparator} and declare itself as
7
7
  # simple or advanced by overriding {#type} (the default is simple). Simple
@@ -22,6 +22,16 @@ module Linkage
22
22
  class Comparator
23
23
  include Observable
24
24
 
25
+ attr_reader :weight
26
+
27
+ def weigh(weight)
28
+ return if weight.nil?
29
+ if not weight.is_a?(Numeric)
30
+ raise "weight must be numeric type"
31
+ end
32
+ @weight = weight
33
+ end
34
+
25
35
  class << self
26
36
  # Register a new comparator. Subclasses must define at least {#score} for
27
37
  # simple comparators, or {#score_dataset} and {#score_datasets} for
@@ -7,6 +7,7 @@ module Linkage
7
7
  # the comparison, along with an operator. Valid operators are:
8
8
  #
9
9
  # * `:jarowinkler` ([Jaro-Winkler distance](http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance))
10
+ # * `:damerau_levenshtein` ([Damerau-Levenshtein distance](http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance))
10
11
  #
11
12
  # Consider the following example, using a {Configuration} as part of
12
13
  # {Dataset#link_with}:
@@ -17,8 +18,11 @@ module Linkage
17
18
  #
18
19
  # For each record, the values of the `foo` and `bar` fields are compared
19
20
  # using the Jaro-Winkler distance algorithm.
21
+ #
22
+ # Damerau-Levenshtein is a modified Levenshtein that allows for transpositions
23
+ # It has additionally been modified to make costs of additions or deletions only 0.5
20
24
  class Strcompare < Comparator
21
- VALID_OPERATIONS = [:jarowinkler]
25
+ VALID_OPERATIONS = [:jarowinkler, :reverse_jarowinkler, :damerau_levenshtein]
22
26
 
23
27
  def initialize(field_1, field_2, operation)
24
28
  if field_1.ruby_type[:type] != String || field_2.ruby_type[:type] != String
@@ -38,6 +42,10 @@ module Linkage
38
42
  case @operation
39
43
  when :jarowinkler
40
44
  jarowinkler(record_1[@name_1], record_2[@name_2])
45
+ when :reverse_jarowinkler
46
+ reverse_jarowinkler(record_1[@name_1], record_2[@name_2])
47
+ when :damerau_levenshtein
48
+ damerau_levenshtein(record_1[@name_1], record_2[@name_2])
41
49
  end
42
50
 
43
51
  result
@@ -50,33 +58,77 @@ module Linkage
50
58
  ba = b.split('')
51
59
  al = a.length
52
60
  bl = b.length
61
+ return 0 if al == 0 || bl == 0
53
62
  l = 0
54
63
  for i in Range.new(0, [[al, bl].min, 4].min-1)
55
64
  break if aa[i] != ba[i]
56
65
  l += 1
57
66
  end
58
- aj = aa - (aa - ba)
59
- bj = ba - (ba - aa)
60
- nm = 0
61
- nt = 0
62
- md = [[al, bl].max/2 - 1, 0].max
67
+ md = [[al, bl].max/2 - 1, 1].max
68
+ usea = []
69
+ useb = []
70
+ # simplify to matching characters
63
71
  for i in Range.new(0, al-1)
64
- bi = ba.index(aa[i])
65
- aji = aj.index(aa[i])
66
- bji = bj.index(aa[i])
67
- if !bi.nil? && (bi + nm - i).abs <= md
68
- nm += 1
69
- nt += 1 if !bji.nil? && aji != bji
72
+ fi = [[i - md, 0].max, bl-1].min
73
+ li = [i + md, bl-1].min
74
+ for j in Range.new(fi, li)
75
+ if aa[i] == ba[j] and not useb.include?(j)
76
+ usea << i
77
+ useb << j
78
+ break
79
+ end
70
80
  end
71
- ba.delete_at(bi) if !bi.nil?
72
- aj.delete_at(aji) if !aji.nil?
73
- bj.delete_at(bji) if !bji.nil?
74
81
  end
82
+ bada = Range.new(0, al-1).to_a - usea
83
+ badb = Range.new(0, bl-1).to_a - useb
84
+ bada.reverse.each { |x| aa.delete_at(x) }
85
+ badb.reverse.each { |x| ba.delete_at(x) }
86
+ nm = aa.length
75
87
  return 0 if nm == 0
76
- d = (nm/al.to_f + nm/bl.to_f + (nm-nt)/nm.to_f)/3.0
88
+ # count transpositions
89
+ nt = 0
90
+ for i in Range.new(0, nm-1)
91
+ nt +=1 if aa[i] != ba[i]
92
+ end
93
+ d = (nm/al.to_f + nm/bl.to_f + (nm-nt/2.0)/nm.to_f)/3.0
77
94
  w = (d + l * 0.1 * (1 - d)).round(3)
78
95
  w
79
96
  end
97
+
98
+ def reverse_jarowinkler(w1, w2)
99
+ jarowinkler(w1.reverse, w2.reverse)
100
+ end
101
+
102
+ def damerau_levenshtein(w1, w2)
103
+ a = w1.downcase
104
+ b = w2.downcase
105
+ aa = a.split('')
106
+ ba = b.split('')
107
+ al = a.length
108
+ bl = b.length
109
+ denom = [al, bl].max
110
+ return 0 if denom == 0
111
+ oneago = nil
112
+ thisrow = (1..bl).to_a + [0]
113
+ al.times do |x|
114
+ twoago, oneago, thisrow = oneago, thisrow, [0] * bl + [x + 1]
115
+ bl.times do |y|
116
+ if aa[x] == ba[y]
117
+ thisrow[y] = oneago[y - 1]
118
+ else
119
+ delcost = oneago[y] + 0.5
120
+ addcost = thisrow[y - 1] + 0.5
121
+ subcost = oneago[y - 1] + 1
122
+ thisrow[y] = [delcost, addcost, subcost].min
123
+ # remove this statement for original levenshtein
124
+ if x > 0 and y > 0 and aa[x] == ba[y-1] and aa[x-1] == ba[y]
125
+ thisrow[y] = [thisrow[y], twoago[y-2] + 1].min
126
+ end
127
+ end
128
+ end
129
+ end
130
+ return (1 - thisrow[bl - 1] / denom.to_f).round(3)
131
+ end
80
132
  end
81
133
 
82
134
  Comparator.register('strcompare', Strcompare)
@@ -1,21 +1,124 @@
1
1
  module Linkage
2
+ # {Configuration} keeps track of everything needed to run a record linkage,
3
+ # including which datasets you want to link, how you want to link them, and
4
+ # where you want to store the results. Once created, you can supply the
5
+ # {Configuration} to {Runner#initialize} and run it with {Runner#execute}.
6
+ #
7
+ # To create a configuration, usually you will want to use {Dataset#link_with},
8
+ # but you can create it directly if you like (see {#initialize}), like so:
9
+ #
10
+ # ```ruby
11
+ # dataset_1 = Linkage::Dataset.new('mysql://example.com/database_name', 'foo')
12
+ # dataset_2 = Linkage::Dataset.new('postgres://example.com/other_name', 'bar')
13
+ # result_set = Linkage::ResultSet['csv'].new('/home/foo/linkage')
14
+ # config = Linkage::Configuration.new(dataset_1, dataset_2, result_set)
15
+ # ```
16
+ #
17
+ # To add comparators to {Configuration}, you can call methods with the same
18
+ # name as registered comparators. Here's the list of builtin comparators:
19
+ #
20
+ # | Name | Class |
21
+ # |------------|---------------------------|
22
+ # | compare | {Comparators::Compare} |
23
+ # | strcompare | {Comparators::Strcompare} |
24
+ # | within | {Comparators::Within} |
25
+ #
26
+ # For example, if you want to add a {Comparators::Compare} comparator to
27
+ # your configuration, run this:
28
+ #
29
+ # ```ruby
30
+ # config.compare([:foo], [:bar], :equal_to)
31
+ # ```
32
+ #
33
+ # This works via {Configuration#method_missing}. First, the comparator class
34
+ # is fetched via {Comparator.[]}. Then fields are looked up in the {FieldSet}
35
+ # of the {Dataset}. Those {Field}s along with any other arguments you specify
36
+ # are passed to the constructor of the comparator you chose.
37
+ #
38
+ # {Configuration} also contains information about how records are matched.
39
+ # Once scores are computed, the scores for each pair of records are averaged
40
+ # and compared against a threshold value. Record pairs that have an average
41
+ # score greater than or equal to the threshold value are considered matches.
42
+ #
43
+ # The threshold value is `0.5` by default, but you can change it by setting
44
+ # {#threshold} like so:
45
+ #
46
+ # ```ruby
47
+ # config.threshold = 0.75
48
+ # ```
49
+ #
50
+ # Since scores range between 0 and 1 (inclusive), be sure to set a threshold
51
+ # value within the same range. The actual matching work is done by the
52
+ # {Matcher} class.
53
+ #
54
+ # @see Dataset
55
+ # @see ResultSet
56
+ # @see Comparator
57
+ # @see Matcher
58
+ # @see Runner
2
59
  class Configuration
3
- attr_reader :dataset_1, :dataset_2, :result_set, :comparators
4
- attr_accessor :record_cache_size, :algorithm, :threshold
60
+ attr_reader :dataset_1, :dataset_2, :result_set, :comparators, :threshold
61
+ attr_accessor :algorithm
5
62
 
63
+ def threshold=(threshold)
64
+ if not threshold.is_a?(Numeric)
65
+ raise "threshold must be numeric type"
66
+ end
67
+ @threshold = threshold
68
+ end
69
+ # Create a new instance of {Configuration}.
70
+ #
71
+ # @overload initialize(dataset_1, dataset_2, result_set)
72
+ # Create a linkage configuration for two datasets and a result set.
73
+ # @param [Linkage::Dataset] dataset_1
74
+ # @param [Linkage::Dataset] dataset_2
75
+ # @param [Linkage::ResultSet] result_set
76
+ # @overload initialize(dataset, result_set)
77
+ # Create a linkage configuration for one dataset and a result set.
78
+ # @param [Linkage::Dataset] dataset
79
+ # @param [Linkage::ResultSet] result_set
80
+ # @overload initialize(dataset_1, dataset_2, score_set, match_set)
81
+ # Create a linkage configuration for two datasets, a score set, and a
82
+ # match set.
83
+ # @param [Linkage::Dataset] dataset_1
84
+ # @param [Linkage::Dataset] dataset_2
85
+ # @param [Linkage::ScoreSet] score_set
86
+ # @param [Linkage::MatchSet] match_set
87
+ # @overload initialize(dataset, score_set, match_set)
88
+ # Create a linkage configuration for one dataset, a score set, and a
89
+ # match set.
90
+ # @param [Linkage::Dataset] dataset
91
+ # @param [Linkage::ScoreSet] score_set
92
+ # @param [Linkage::MatchSet] match_set
6
93
  def initialize(*args)
7
- if args.length < 2 || args.length > 3
8
- raise ArgumentError, "wrong number of arguments (#{args.length} for 3..4)"
94
+ if args.length < 2 || args.length > 4
95
+ raise ArgumentError, "wrong number of arguments (#{args.length} for 2..4)"
9
96
  end
10
97
 
11
98
  @dataset_1 = args[0]
12
- if args.length > 2 && args[1]
99
+ case args.length
100
+ when 2
101
+ # dataset and result set
102
+ @result_set = args[1]
103
+ when 3
104
+ # dataset 1, dataset 2, and result set
105
+ # dataset, score set, and match set
106
+ case args[1]
107
+ when Dataset, nil
108
+ @dataset_2 = args[1]
109
+ @result_set = args[2]
110
+ when ScoreSet
111
+ @result_set = ResultSet.new(args[1], args[2])
112
+ end
113
+ when 4
114
+ # dataset 1, dataset 2, score set, and match set
13
115
  @dataset_2 = args[1]
116
+ @result_set = ResultSet.new(args[2], args[3])
14
117
  end
15
- @result_set = args[-1]
16
118
 
17
119
  @comparators = []
18
- @record_cache_size = 10_000
120
+ @algorithm = :mean
121
+ @threshold = 0.5
19
122
  end
20
123
 
21
124
  def score_recorder
@@ -29,7 +132,7 @@ module Linkage
29
132
  end
30
133
 
31
134
  def matcher
32
- Matcher.new(@comparators, @result_set.score_set, @algorithm || :mean, @threshold || 0.5)
135
+ Matcher.new(@comparators, @result_set.score_set, @algorithm, @threshold)
33
136
  end
34
137
 
35
138
  def match_recorder(matcher)
@@ -60,6 +163,7 @@ module Linkage
60
163
 
61
164
  comparator = klass.new(*args, &block)
62
165
  @comparators << comparator
166
+ return comparator
63
167
  end
64
168
 
65
169
  protected
@@ -1,8 +1,111 @@
1
1
  module Linkage
2
- # Delegator around Sequel::Dataset with some extra functionality.
2
+ # {Dataset} is a representation of a database table. It is a thin wrapper
3
+ # around a
4
+ # {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
5
+ #
6
+ # There are three ways to create a {Dataset}.
7
+ #
8
+ # Pass in a {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}:
9
+ #
10
+ # ```ruby
11
+ # Linkage::Dataset.new(db[:foo])
12
+ # ```
13
+ #
14
+ # Pass in a {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Database.html `Sequel::Database`}
15
+ # and a table name:
16
+ #
17
+ # ```ruby
18
+ # Linkage::Dataset.new(db, :foo)
19
+ # ```
20
+ #
21
+ # Pass in a
22
+ # {http://sequel.jeremyevans.net/rdoc/files/doc/opening_databases_rdoc.html Sequel-style}
23
+ # connection URI, a table name, and any options you want to pass to
24
+ # {http://sequel.jeremyevans.net/rdoc/classes/Sequel.html#method-c-connect `Sequel.connect`}.
25
+ #
26
+ # ```ruby
27
+ # Linkage::Dataset.new("mysql2://example.com/foo", :bar, :user => 'viking', :password => 'secret')
28
+ # ```
29
+ #
30
+ # Once you've made a {Dataset}, you can use any
31
+ # {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}
32
+ # method on it you wish. For example, if you want to limit the dataset to
33
+ # records that refer to people born after 1985 (assuming date of birth is
34
+ # stored as a date type):
35
+ #
36
+ # ```ruby
37
+ # filtered_dataset = dataset.where('dob > :date', :date => Date.new(1985, 1, 1))
38
+ # ```
39
+ #
40
+ # Note that
41
+ # {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}
42
+ # methods return a __clone__ of a dataset, so you must assign the return value
43
+ # to a variable.
44
+ #
45
+ # Once you have your {Dataset} how you want it, you can use the {#link_with}
46
+ # method to create a {Configuration} for record linkage. The {#link_with}
47
+ # method takes another {Dataset} object and a {ResultSet} and returns a
48
+ # {Configuration}.
49
+ #
50
+ # ```ruby
51
+ # config = dataset.link_with(other_dataset, result_set)
52
+ # config.compare([:foo], [:bar], :equal_to)
53
+ # ```
54
+ #
55
+ # You can pass in a {ScoreSet} and {MatchSet} instead of a {ResultSet} if you
56
+ # wish:
57
+ #
58
+ # ```ruby
59
+ # config = dataset.link_with(other_dataset, score_set, match_set)
60
+ # ```
61
+ #
62
+ # Note that a dataset can be linked with itself the same way, like so:
63
+ #
64
+ # ```ruby
65
+ # config = dataset.link_with(dataset, result_set)
66
+ # config.compare([:foo], [:bar], :equal_to)
67
+ # ```
68
+ #
69
+ # If you give {#link_with} a block, it will yield the same {Configuration}
70
+ # object to the block that it returns.
71
+ #
72
+ # ```ruby
73
+ # config = dataset.link_with(other_dataset, result_set) do |c|
74
+ # c.compare([:foo], [:bar], :equal_to)
75
+ # end
76
+ # ```
77
+ #
78
+ # Once that's done, use a {Runner} to run the record linkage:
79
+ #
80
+ # ```ruby
81
+ # runner = Linkage::Runner.new(config)
82
+ # runner.execute
83
+ # ```
84
+ #
85
+ # @see http://sequel.jeremyevans.net/rdoc/files/doc/opening_databases_rdoc.html Connecting to a database
3
86
  class Dataset
4
- attr_reader :field_set, :table_name
87
+ # @return [Symbol] Returns this dataset's table name.
88
+ attr_reader :table_name
5
89
 
90
+ # @return [FieldSet] Returns this dataset's {FieldSet}.
91
+ attr_reader :field_set
92
+
93
+ # Returns a new instance of {Dataset}.
94
+ #
95
+ # @overload initialize(dataset)
96
+ # Use a specific {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
97
+ # @param dataset [Sequel::Dataset]
98
+ # @overload initialize(database, table_name)
99
+ # Use a specific {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Database.html `Sequel::Database`}.
100
+ # @param database [Sequel::Database]
101
+ # @param table_name [Symbol, String]
102
+ # @overload initialize(uri, table_name, options = {})
103
+ # Use {http://sequel.jeremyevans.net/rdoc/classes/Sequel.html#method-c-connect `Sequel.connect`}
104
+ # to connect to a database.
105
+ # @param uri [String, Hash]
106
+ # @param table_name [Symbol, String]
107
+ # @param options [Hash]
108
+ #
6
109
  def initialize(*args)
7
110
  if args.length == 0 || args.length > 3
8
111
  raise ArgumentError, "wrong number of arguments (#{args.length} for 1..3)"
@@ -31,17 +134,23 @@ module Linkage
31
134
  @field_set = FieldSet.new(self)
32
135
  end
33
136
 
137
+ # Returns the underlying {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
138
+ # @return [Sequel::Dataset]
34
139
  def obj
35
140
  @dataset
36
141
  end
37
142
 
143
+ # Set the underlying {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
38
144
  def obj=(value)
39
145
  @dataset = value
40
146
  end
147
+ private :obj=
41
148
 
42
- # Setup a linkage with another dataset
149
+ # Create a {Configuration} for record linkage.
43
150
  #
44
- # @return [Linkage::Configuration]
151
+ # @param dataset [Dataset]
152
+ # @param result_set [ResultSet]
153
+ # @return [Configuration]
45
154
  def link_with(dataset, result_set)
46
155
  other = dataset.eql?(self) ? nil : dataset
47
156
  conf = Configuration.new(self, other, result_set)
@@ -51,25 +160,31 @@ module Linkage
51
160
  conf
52
161
  end
53
162
 
54
- def database_type
55
- @db.database_type
56
- end
57
-
163
+ # Return the dataset's schema.
164
+ #
165
+ # @return [Array]
166
+ # @see http://sequel.jeremyevans.net/rdoc/classes/Sequel/Database.html#method-i-schema Sequel::Database#schema
58
167
  def schema
59
168
  @db.schema(@table_name)
60
169
  end
61
170
 
171
+ # Returns {FieldSet#primary_key}.
172
+ #
173
+ # @return [Field]
174
+ # @see FieldSet#primary_key
62
175
  def primary_key
63
176
  @field_set.primary_key
64
177
  end
65
178
 
66
179
  protected
67
180
 
181
+ # Delegate methods to the underlying
182
+ # {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
68
183
  def method_missing(name, *args, &block)
69
184
  result = @dataset.send(name, *args, &block)
70
185
  if result.kind_of?(Sequel::Dataset)
71
186
  new_object = clone
72
- new_object.obj = result
187
+ new_object.send(:obj=, result)
73
188
  new_object
74
189
  else
75
190
  result