linkage 0.1.0.pre → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +2 -0
- data/Guardfile +0 -1
- data/TODO +2 -0
- data/lib/linkage.rb +1 -0
- data/lib/linkage/comparator.rb +12 -2
- data/lib/linkage/comparators/strcompare.rb +68 -16
- data/lib/linkage/configuration.rb +112 -8
- data/lib/linkage/dataset.rb +124 -9
- data/lib/linkage/exceptions.rb +5 -0
- data/lib/linkage/field.rb +55 -18
- data/lib/linkage/field_set.rb +20 -0
- data/lib/linkage/helpers.rb +7 -0
- data/lib/linkage/helpers/csv.rb +28 -0
- data/lib/linkage/helpers/database.rb +47 -0
- data/lib/linkage/import_buffer.rb +3 -3
- data/lib/linkage/match_recorder.rb +4 -0
- data/lib/linkage/match_set.rb +51 -13
- data/lib/linkage/match_sets/csv.rb +36 -9
- data/lib/linkage/match_sets/database.rb +43 -2
- data/lib/linkage/matcher.rb +49 -3
- data/lib/linkage/result_set.rb +60 -22
- data/lib/linkage/result_sets/csv.rb +46 -28
- data/lib/linkage/result_sets/database.rb +44 -26
- data/lib/linkage/runner.rb +10 -0
- data/lib/linkage/score_recorder.rb +5 -0
- data/lib/linkage/score_set.rb +78 -20
- data/lib/linkage/score_sets/csv.rb +41 -15
- data/lib/linkage/score_sets/database.rb +43 -5
- data/lib/linkage/version.rb +1 -1
- data/linkage.gemspec +2 -0
- data/misc/uml/linkage.dia +0 -0
- data/misc/uml/linkage.png +0 -0
- data/misc/uml/linkage.svg +197 -0
- data/test/helper.rb +2 -11
- data/test/integration/test_database_result_set.rb +4 -2
- data/test/unit/comparators/test_strcompare.rb +29 -0
- data/test/unit/match_sets/test_csv.rb +44 -13
- data/test/unit/match_sets/test_database.rb +42 -1
- data/test/unit/result_sets/test_csv.rb +9 -69
- data/test/unit/result_sets/test_database.rb +20 -11
- data/test/unit/score_sets/test_csv.rb +68 -25
- data/test/unit/score_sets/test_database.rb +57 -1
- data/test/unit/test_comparator.rb +8 -0
- data/test/unit/test_configuration.rb +33 -6
- data/test/unit/test_dataset.rb +0 -7
- data/test/unit/test_matcher.rb +52 -3
- data/test/unit/test_result_set.rb +8 -14
- metadata +66 -32
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 57f4bad92110063c64ed24a43b2a805f4fe6d051
|
4
|
+
data.tar.gz: 9d9ff5fda254dae02bde47dac69c94af56300d51
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 63552888a854815988985d54a628e7594d072765027767ea95b159ef80408c64cc2d5ec608892be15c356aecd028529a02df6ca2792827c26aaffa605ab28b65
|
7
|
+
data.tar.gz: 7531bca7bec718605f940557a1572fd3f74528883d08d7137b4c775f95e8b7b4036fe00a6c270f24b30ebf093bb345f97f425f4813aae620f4e69c28b99abde3
|
data/.yardopts
CHANGED
data/Guardfile
CHANGED
@@ -2,7 +2,6 @@ guard 'test' do
|
|
2
2
|
watch(%r{^lib/linkage/([^/]+/)*([^/]+)\.rb$}) { |m| "test/unit/#{m[1]}test_#{m[2]}.rb" }
|
3
3
|
watch(%r{^test/unit/([^/]+/)*test_.+\.rb$})
|
4
4
|
watch(%r{^test/integration/test_.+\.rb$})
|
5
|
-
watch('lib/linkage/configuration.rb') { "test/unit/test_dataset.rb" }
|
6
5
|
watch('test/helper.rb') { "test" }
|
7
6
|
end
|
8
7
|
|
data/TODO
CHANGED
data/lib/linkage.rb
CHANGED
data/lib/linkage/comparator.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Linkage
|
2
2
|
# {Comparator} is the superclass for comparators in Linkage. Comparators are
|
3
|
-
# used to compare
|
4
|
-
#
|
3
|
+
# used to compare records and compute scores based on how closely the records
|
4
|
+
# relate.
|
5
5
|
#
|
6
6
|
# Each comparator should inherit from {Comparator} and declare itself as
|
7
7
|
# simple or advanced by overriding {#type} (the default is simple). Simple
|
@@ -22,6 +22,16 @@ module Linkage
|
|
22
22
|
class Comparator
|
23
23
|
include Observable
|
24
24
|
|
25
|
+
attr_reader :weight
|
26
|
+
|
27
|
+
def weigh(weight)
|
28
|
+
return if weight.nil?
|
29
|
+
if not weight.is_a?(Numeric)
|
30
|
+
raise "weight must be numeric type"
|
31
|
+
end
|
32
|
+
@weight = weight
|
33
|
+
end
|
34
|
+
|
25
35
|
class << self
|
26
36
|
# Register a new comparator. Subclasses must define at least {#score} for
|
27
37
|
# simple comparators, or {#score_dataset} and {#score_datasets} for
|
@@ -7,6 +7,7 @@ module Linkage
|
|
7
7
|
# the comparison, along with an operator. Valid operators are:
|
8
8
|
#
|
9
9
|
# * `:jarowinkler` ([Jaro-Winkler distance](http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance))
|
10
|
+
# * `:damerau_levenshtein` ([Damerau-Levenshtein distance](http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance))
|
10
11
|
#
|
11
12
|
# Consider the following example, using a {Configuration} as part of
|
12
13
|
# {Dataset#link_with}:
|
@@ -17,8 +18,11 @@ module Linkage
|
|
17
18
|
#
|
18
19
|
# For each record, the values of the `foo` and `bar` fields are compared
|
19
20
|
# using the Jaro-Winkler distance algorithm.
|
21
|
+
#
|
22
|
+
# Damerau-Levenshtein is a modified Levenshtein that allows for transpositions
|
23
|
+
# It has additionally been modified to make costs of additions or deletions only 0.5
|
20
24
|
class Strcompare < Comparator
|
21
|
-
VALID_OPERATIONS = [:jarowinkler]
|
25
|
+
VALID_OPERATIONS = [:jarowinkler, :reverse_jarowinkler, :damerau_levenshtein]
|
22
26
|
|
23
27
|
def initialize(field_1, field_2, operation)
|
24
28
|
if field_1.ruby_type[:type] != String || field_2.ruby_type[:type] != String
|
@@ -38,6 +42,10 @@ module Linkage
|
|
38
42
|
case @operation
|
39
43
|
when :jarowinkler
|
40
44
|
jarowinkler(record_1[@name_1], record_2[@name_2])
|
45
|
+
when :reverse_jarowinkler
|
46
|
+
reverse_jarowinkler(record_1[@name_1], record_2[@name_2])
|
47
|
+
when :damerau_levenshtein
|
48
|
+
damerau_levenshtein(record_1[@name_1], record_2[@name_2])
|
41
49
|
end
|
42
50
|
|
43
51
|
result
|
@@ -50,33 +58,77 @@ module Linkage
|
|
50
58
|
ba = b.split('')
|
51
59
|
al = a.length
|
52
60
|
bl = b.length
|
61
|
+
return 0 if al == 0 || bl == 0
|
53
62
|
l = 0
|
54
63
|
for i in Range.new(0, [[al, bl].min, 4].min-1)
|
55
64
|
break if aa[i] != ba[i]
|
56
65
|
l += 1
|
57
66
|
end
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
md = [[al, bl].max/2 - 1, 0].max
|
67
|
+
md = [[al, bl].max/2 - 1, 1].max
|
68
|
+
usea = []
|
69
|
+
useb = []
|
70
|
+
# simplify to matching characters
|
63
71
|
for i in Range.new(0, al-1)
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
72
|
+
fi = [[i - md, 0].max, bl-1].min
|
73
|
+
li = [i + md, bl-1].min
|
74
|
+
for j in Range.new(fi, li)
|
75
|
+
if aa[i] == ba[j] and not useb.include?(j)
|
76
|
+
usea << i
|
77
|
+
useb << j
|
78
|
+
break
|
79
|
+
end
|
70
80
|
end
|
71
|
-
ba.delete_at(bi) if !bi.nil?
|
72
|
-
aj.delete_at(aji) if !aji.nil?
|
73
|
-
bj.delete_at(bji) if !bji.nil?
|
74
81
|
end
|
82
|
+
bada = Range.new(0, al-1).to_a - usea
|
83
|
+
badb = Range.new(0, bl-1).to_a - useb
|
84
|
+
bada.reverse.each { |x| aa.delete_at(x) }
|
85
|
+
badb.reverse.each { |x| ba.delete_at(x) }
|
86
|
+
nm = aa.length
|
75
87
|
return 0 if nm == 0
|
76
|
-
|
88
|
+
# count transpositions
|
89
|
+
nt = 0
|
90
|
+
for i in Range.new(0, nm-1)
|
91
|
+
nt +=1 if aa[i] != ba[i]
|
92
|
+
end
|
93
|
+
d = (nm/al.to_f + nm/bl.to_f + (nm-nt/2.0)/nm.to_f)/3.0
|
77
94
|
w = (d + l * 0.1 * (1 - d)).round(3)
|
78
95
|
w
|
79
96
|
end
|
97
|
+
|
98
|
+
def reverse_jarowinkler(w1, w2)
|
99
|
+
jarowinkler(w1.reverse, w2.reverse)
|
100
|
+
end
|
101
|
+
|
102
|
+
def damerau_levenshtein(w1, w2)
|
103
|
+
a = w1.downcase
|
104
|
+
b = w2.downcase
|
105
|
+
aa = a.split('')
|
106
|
+
ba = b.split('')
|
107
|
+
al = a.length
|
108
|
+
bl = b.length
|
109
|
+
denom = [al, bl].max
|
110
|
+
return 0 if denom == 0
|
111
|
+
oneago = nil
|
112
|
+
thisrow = (1..bl).to_a + [0]
|
113
|
+
al.times do |x|
|
114
|
+
twoago, oneago, thisrow = oneago, thisrow, [0] * bl + [x + 1]
|
115
|
+
bl.times do |y|
|
116
|
+
if aa[x] == ba[y]
|
117
|
+
thisrow[y] = oneago[y - 1]
|
118
|
+
else
|
119
|
+
delcost = oneago[y] + 0.5
|
120
|
+
addcost = thisrow[y - 1] + 0.5
|
121
|
+
subcost = oneago[y - 1] + 1
|
122
|
+
thisrow[y] = [delcost, addcost, subcost].min
|
123
|
+
# remove this statement for original levenshtein
|
124
|
+
if x > 0 and y > 0 and aa[x] == ba[y-1] and aa[x-1] == ba[y]
|
125
|
+
thisrow[y] = [thisrow[y], twoago[y-2] + 1].min
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
return (1 - thisrow[bl - 1] / denom.to_f).round(3)
|
131
|
+
end
|
80
132
|
end
|
81
133
|
|
82
134
|
Comparator.register('strcompare', Strcompare)
|
@@ -1,21 +1,124 @@
|
|
1
1
|
module Linkage
|
2
|
+
# {Configuration} keeps track of everything needed to run a record linkage,
|
3
|
+
# including which datasets you want to link, how you want to link them, and
|
4
|
+
# where you want to store the results. Once created, you can supply the
|
5
|
+
# {Configuration} to {Runner#initialize} and run it with {Runner#execute}.
|
6
|
+
#
|
7
|
+
# To create a configuration, usually you will want to use {Dataset#link_with},
|
8
|
+
# but you can create it directly if you like (see {#initialize}), like so:
|
9
|
+
#
|
10
|
+
# ```ruby
|
11
|
+
# dataset_1 = Linkage::Dataset.new('mysql://example.com/database_name', 'foo')
|
12
|
+
# dataset_2 = Linkage::Dataset.new('postgres://example.com/other_name', 'bar')
|
13
|
+
# result_set = Linkage::ResultSet['csv'].new('/home/foo/linkage')
|
14
|
+
# config = Linkage::Configuration.new(dataset_1, dataset_2, result_set)
|
15
|
+
# ```
|
16
|
+
#
|
17
|
+
# To add comparators to {Configuration}, you can call methods with the same
|
18
|
+
# name as registered comparators. Here's the list of builtin comparators:
|
19
|
+
#
|
20
|
+
# | Name | Class |
|
21
|
+
# |------------|---------------------------|
|
22
|
+
# | compare | {Comparators::Compare} |
|
23
|
+
# | strcompare | {Comparators::Strcompare} |
|
24
|
+
# | within | {Comparators::Within} |
|
25
|
+
#
|
26
|
+
# For example, if you want to add a {Comparators::Compare} comparator to
|
27
|
+
# your configuration, run this:
|
28
|
+
#
|
29
|
+
# ```ruby
|
30
|
+
# config.compare([:foo], [:bar], :equal_to)
|
31
|
+
# ```
|
32
|
+
#
|
33
|
+
# This works via {Configuration#method_missing}. First, the comparator class
|
34
|
+
# is fetched via {Comparator.[]}. Then fields are looked up in the {FieldSet}
|
35
|
+
# of the {Dataset}. Those {Field}s along with any other arguments you specify
|
36
|
+
# are passed to the constructor of the comparator you chose.
|
37
|
+
#
|
38
|
+
# {Configuration} also contains information about how records are matched.
|
39
|
+
# Once scores are computed, the scores for each pair of records are averaged
|
40
|
+
# and compared against a threshold value. Record pairs that have an average
|
41
|
+
# score greater than or equal to the threshold value are considered matches.
|
42
|
+
#
|
43
|
+
# The threshold value is `0.5` by default, but you can change it by setting
|
44
|
+
# {#threshold} like so:
|
45
|
+
#
|
46
|
+
# ```ruby
|
47
|
+
# config.threshold = 0.75
|
48
|
+
# ```
|
49
|
+
#
|
50
|
+
# Since scores range between 0 and 1 (inclusive), be sure to set a threshold
|
51
|
+
# value within the same range. The actual matching work is done by the
|
52
|
+
# {Matcher} class.
|
53
|
+
#
|
54
|
+
# @see Dataset
|
55
|
+
# @see ResultSet
|
56
|
+
# @see Comparator
|
57
|
+
# @see Matcher
|
58
|
+
# @see Runner
|
2
59
|
class Configuration
|
3
|
-
attr_reader :dataset_1, :dataset_2, :result_set, :comparators
|
4
|
-
attr_accessor :
|
60
|
+
attr_reader :dataset_1, :dataset_2, :result_set, :comparators, :threshold
|
61
|
+
attr_accessor :algorithm
|
5
62
|
|
63
|
+
def threshold=(threshold)
|
64
|
+
if not threshold.is_a?(Numeric)
|
65
|
+
raise "threshold must be numeric type"
|
66
|
+
end
|
67
|
+
@threshold = threshold
|
68
|
+
end
|
69
|
+
# Create a new instance of {Configuration}.
|
70
|
+
#
|
71
|
+
# @overload initialize(dataset_1, dataset_2, result_set)
|
72
|
+
# Create a linkage configuration for two datasets and a result set.
|
73
|
+
# @param [Linkage::Dataset] dataset_1
|
74
|
+
# @param [Linkage::Dataset] dataset_2
|
75
|
+
# @param [Linkage::ResultSet] result_set
|
76
|
+
# @overload initialize(dataset, result_set)
|
77
|
+
# Create a linkage configuration for one dataset and a result set.
|
78
|
+
# @param [Linkage::Dataset] dataset
|
79
|
+
# @param [Linkage::ResultSet] result_set
|
80
|
+
# @overload initialize(dataset_1, dataset_2, score_set, match_set)
|
81
|
+
# Create a linkage configuration for two datasets, a score set, and a
|
82
|
+
# match set.
|
83
|
+
# @param [Linkage::Dataset] dataset_1
|
84
|
+
# @param [Linkage::Dataset] dataset_2
|
85
|
+
# @param [Linkage::ScoreSet] score_set
|
86
|
+
# @param [Linkage::MatchSet] match_set
|
87
|
+
# @overload initialize(dataset, score_set, match_set)
|
88
|
+
# Create a linkage configuration for one dataset, a score set, and a
|
89
|
+
# match set.
|
90
|
+
# @param [Linkage::Dataset] dataset
|
91
|
+
# @param [Linkage::ScoreSet] score_set
|
92
|
+
# @param [Linkage::MatchSet] match_set
|
6
93
|
def initialize(*args)
|
7
|
-
if args.length < 2 || args.length >
|
8
|
-
raise ArgumentError, "wrong number of arguments (#{args.length} for
|
94
|
+
if args.length < 2 || args.length > 4
|
95
|
+
raise ArgumentError, "wrong number of arguments (#{args.length} for 2..4)"
|
9
96
|
end
|
10
97
|
|
11
98
|
@dataset_1 = args[0]
|
12
|
-
|
99
|
+
case args.length
|
100
|
+
when 2
|
101
|
+
# dataset and result set
|
102
|
+
@result_set = args[1]
|
103
|
+
when 3
|
104
|
+
# dataset 1, dataset 2, and result set
|
105
|
+
# dataset, score set, and match set
|
106
|
+
case args[1]
|
107
|
+
when Dataset, nil
|
108
|
+
@dataset_2 = args[1]
|
109
|
+
@result_set = args[2]
|
110
|
+
when ScoreSet
|
111
|
+
@result_set = ResultSet.new(args[1], args[2])
|
112
|
+
end
|
113
|
+
when 4
|
114
|
+
# dataset 1, dataset 2, score set, and match set
|
13
115
|
@dataset_2 = args[1]
|
116
|
+
@result_set = ResultSet.new(args[2], args[3])
|
14
117
|
end
|
15
|
-
@result_set = args[-1]
|
16
118
|
|
17
119
|
@comparators = []
|
18
|
-
@
|
120
|
+
@algorithm = :mean
|
121
|
+
@threshold = 0.5
|
19
122
|
end
|
20
123
|
|
21
124
|
def score_recorder
|
@@ -29,7 +132,7 @@ module Linkage
|
|
29
132
|
end
|
30
133
|
|
31
134
|
def matcher
|
32
|
-
Matcher.new(@comparators, @result_set.score_set, @algorithm
|
135
|
+
Matcher.new(@comparators, @result_set.score_set, @algorithm, @threshold)
|
33
136
|
end
|
34
137
|
|
35
138
|
def match_recorder(matcher)
|
@@ -60,6 +163,7 @@ module Linkage
|
|
60
163
|
|
61
164
|
comparator = klass.new(*args, &block)
|
62
165
|
@comparators << comparator
|
166
|
+
return comparator
|
63
167
|
end
|
64
168
|
|
65
169
|
protected
|
data/lib/linkage/dataset.rb
CHANGED
@@ -1,8 +1,111 @@
|
|
1
1
|
module Linkage
|
2
|
-
#
|
2
|
+
# {Dataset} is a representation of a database table. It is a thin wrapper
|
3
|
+
# around a
|
4
|
+
# {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
|
5
|
+
#
|
6
|
+
# There are three ways to create a {Dataset}.
|
7
|
+
#
|
8
|
+
# Pass in a {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}:
|
9
|
+
#
|
10
|
+
# ```ruby
|
11
|
+
# Linkage::Dataset.new(db[:foo])
|
12
|
+
# ```
|
13
|
+
#
|
14
|
+
# Pass in a {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Database.html `Sequel::Database`}
|
15
|
+
# and a table name:
|
16
|
+
#
|
17
|
+
# ```ruby
|
18
|
+
# Linkage::Dataset.new(db, :foo)
|
19
|
+
# ```
|
20
|
+
#
|
21
|
+
# Pass in a
|
22
|
+
# {http://sequel.jeremyevans.net/rdoc/files/doc/opening_databases_rdoc.html Sequel-style}
|
23
|
+
# connection URI, a table name, and any options you want to pass to
|
24
|
+
# {http://sequel.jeremyevans.net/rdoc/classes/Sequel.html#method-c-connect `Sequel.connect`}.
|
25
|
+
#
|
26
|
+
# ```ruby
|
27
|
+
# Linkage::Dataset.new("mysql2://example.com/foo", :bar, :user => 'viking', :password => 'secret')
|
28
|
+
# ```
|
29
|
+
#
|
30
|
+
# Once you've made a {Dataset}, you can use any
|
31
|
+
# {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}
|
32
|
+
# method on it you wish. For example, if you want to limit the dataset to
|
33
|
+
# records that refer to people born after 1985 (assuming date of birth is
|
34
|
+
# stored as a date type):
|
35
|
+
#
|
36
|
+
# ```ruby
|
37
|
+
# filtered_dataset = dataset.where('dob > :date', :date => Date.new(1985, 1, 1))
|
38
|
+
# ```
|
39
|
+
#
|
40
|
+
# Note that
|
41
|
+
# {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}
|
42
|
+
# methods return a __clone__ of a dataset, so you must assign the return value
|
43
|
+
# to a variable.
|
44
|
+
#
|
45
|
+
# Once you have your {Dataset} how you want it, you can use the {#link_with}
|
46
|
+
# method to create a {Configuration} for record linkage. The {#link_with}
|
47
|
+
# method takes another {Dataset} object and a {ResultSet} and returns a
|
48
|
+
# {Configuration}.
|
49
|
+
#
|
50
|
+
# ```ruby
|
51
|
+
# config = dataset.link_with(other_dataset, result_set)
|
52
|
+
# config.compare([:foo], [:bar], :equal_to)
|
53
|
+
# ```
|
54
|
+
#
|
55
|
+
# You can pass in a {ScoreSet} and {MatchSet} instead of a {ResultSet} if you
|
56
|
+
# wish:
|
57
|
+
#
|
58
|
+
# ```ruby
|
59
|
+
# config = dataset.link_with(other_dataset, score_set, match_set)
|
60
|
+
# ```
|
61
|
+
#
|
62
|
+
# Note that a dataset can be linked with itself the same way, like so:
|
63
|
+
#
|
64
|
+
# ```ruby
|
65
|
+
# config = dataset.link_with(dataset, result_set)
|
66
|
+
# config.compare([:foo], [:bar], :equal_to)
|
67
|
+
# ```
|
68
|
+
#
|
69
|
+
# If you give {#link_with} a block, it will yield the same {Configuration}
|
70
|
+
# object to the block that it returns.
|
71
|
+
#
|
72
|
+
# ```ruby
|
73
|
+
# config = dataset.link_with(other_dataset, result_set) do |c|
|
74
|
+
# c.compare([:foo], [:bar], :equal_to)
|
75
|
+
# end
|
76
|
+
# ```
|
77
|
+
#
|
78
|
+
# Once that's done, use a {Runner} to run the record linkage:
|
79
|
+
#
|
80
|
+
# ```ruby
|
81
|
+
# runner = Linkage::Runner.new(config)
|
82
|
+
# runner.execute
|
83
|
+
# ```
|
84
|
+
#
|
85
|
+
# @see http://sequel.jeremyevans.net/rdoc/files/doc/opening_databases_rdoc.html Connecting to a database
|
3
86
|
class Dataset
|
4
|
-
|
87
|
+
# @return [Symbol] Returns this dataset's table name.
|
88
|
+
attr_reader :table_name
|
5
89
|
|
90
|
+
# @return [FieldSet] Returns this dataset's {FieldSet}.
|
91
|
+
attr_reader :field_set
|
92
|
+
|
93
|
+
# Returns a new instance of {Dataset}.
|
94
|
+
#
|
95
|
+
# @overload initialize(dataset)
|
96
|
+
# Use a specific {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
|
97
|
+
# @param dataset [Sequel::Dataset]
|
98
|
+
# @overload initialize(database, table_name)
|
99
|
+
# Use a specific {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Database.html `Sequel::Database`}.
|
100
|
+
# @param database [Sequel::Database]
|
101
|
+
# @param table_name [Symbol, String]
|
102
|
+
# @overload initialize(uri, table_name, options = {})
|
103
|
+
# Use {http://sequel.jeremyevans.net/rdoc/classes/Sequel.html#method-c-connect `Sequel.connect`}
|
104
|
+
# to connect to a database.
|
105
|
+
# @param uri [String, Hash]
|
106
|
+
# @param table_name [Symbol, String]
|
107
|
+
# @param options [Hash]
|
108
|
+
#
|
6
109
|
def initialize(*args)
|
7
110
|
if args.length == 0 || args.length > 3
|
8
111
|
raise ArgumentError, "wrong number of arguments (#{args.length} for 1..3)"
|
@@ -31,17 +134,23 @@ module Linkage
|
|
31
134
|
@field_set = FieldSet.new(self)
|
32
135
|
end
|
33
136
|
|
137
|
+
# Returns the underlying {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
|
138
|
+
# @return [Sequel::Dataset]
|
34
139
|
def obj
|
35
140
|
@dataset
|
36
141
|
end
|
37
142
|
|
143
|
+
# Set the underlying {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
|
38
144
|
def obj=(value)
|
39
145
|
@dataset = value
|
40
146
|
end
|
147
|
+
private :obj=
|
41
148
|
|
42
|
-
#
|
149
|
+
# Create a {Configuration} for record linkage.
|
43
150
|
#
|
44
|
-
# @
|
151
|
+
# @param dataset [Dataset]
|
152
|
+
# @param result_set [ResultSet]
|
153
|
+
# @return [Configuration]
|
45
154
|
def link_with(dataset, result_set)
|
46
155
|
other = dataset.eql?(self) ? nil : dataset
|
47
156
|
conf = Configuration.new(self, other, result_set)
|
@@ -51,25 +160,31 @@ module Linkage
|
|
51
160
|
conf
|
52
161
|
end
|
53
162
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
163
|
+
# Return the dataset's schema.
|
164
|
+
#
|
165
|
+
# @return [Array]
|
166
|
+
# @see http://sequel.jeremyevans.net/rdoc/classes/Sequel/Database.html#method-i-schema Sequel::Database#schema
|
58
167
|
def schema
|
59
168
|
@db.schema(@table_name)
|
60
169
|
end
|
61
170
|
|
171
|
+
# Returns {FieldSet#primary_key}.
|
172
|
+
#
|
173
|
+
# @return [Field]
|
174
|
+
# @see FieldSet#primary_key
|
62
175
|
def primary_key
|
63
176
|
@field_set.primary_key
|
64
177
|
end
|
65
178
|
|
66
179
|
protected
|
67
180
|
|
181
|
+
# Delegate methods to the underlying
|
182
|
+
# {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
|
68
183
|
def method_missing(name, *args, &block)
|
69
184
|
result = @dataset.send(name, *args, &block)
|
70
185
|
if result.kind_of?(Sequel::Dataset)
|
71
186
|
new_object = clone
|
72
|
-
new_object.obj
|
187
|
+
new_object.send(:obj=, result)
|
73
188
|
new_object
|
74
189
|
else
|
75
190
|
result
|