linkage 0.1.0.pre → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +2 -0
- data/Guardfile +0 -1
- data/TODO +2 -0
- data/lib/linkage.rb +1 -0
- data/lib/linkage/comparator.rb +12 -2
- data/lib/linkage/comparators/strcompare.rb +68 -16
- data/lib/linkage/configuration.rb +112 -8
- data/lib/linkage/dataset.rb +124 -9
- data/lib/linkage/exceptions.rb +5 -0
- data/lib/linkage/field.rb +55 -18
- data/lib/linkage/field_set.rb +20 -0
- data/lib/linkage/helpers.rb +7 -0
- data/lib/linkage/helpers/csv.rb +28 -0
- data/lib/linkage/helpers/database.rb +47 -0
- data/lib/linkage/import_buffer.rb +3 -3
- data/lib/linkage/match_recorder.rb +4 -0
- data/lib/linkage/match_set.rb +51 -13
- data/lib/linkage/match_sets/csv.rb +36 -9
- data/lib/linkage/match_sets/database.rb +43 -2
- data/lib/linkage/matcher.rb +49 -3
- data/lib/linkage/result_set.rb +60 -22
- data/lib/linkage/result_sets/csv.rb +46 -28
- data/lib/linkage/result_sets/database.rb +44 -26
- data/lib/linkage/runner.rb +10 -0
- data/lib/linkage/score_recorder.rb +5 -0
- data/lib/linkage/score_set.rb +78 -20
- data/lib/linkage/score_sets/csv.rb +41 -15
- data/lib/linkage/score_sets/database.rb +43 -5
- data/lib/linkage/version.rb +1 -1
- data/linkage.gemspec +2 -0
- data/misc/uml/linkage.dia +0 -0
- data/misc/uml/linkage.png +0 -0
- data/misc/uml/linkage.svg +197 -0
- data/test/helper.rb +2 -11
- data/test/integration/test_database_result_set.rb +4 -2
- data/test/unit/comparators/test_strcompare.rb +29 -0
- data/test/unit/match_sets/test_csv.rb +44 -13
- data/test/unit/match_sets/test_database.rb +42 -1
- data/test/unit/result_sets/test_csv.rb +9 -69
- data/test/unit/result_sets/test_database.rb +20 -11
- data/test/unit/score_sets/test_csv.rb +68 -25
- data/test/unit/score_sets/test_database.rb +57 -1
- data/test/unit/test_comparator.rb +8 -0
- data/test/unit/test_configuration.rb +33 -6
- data/test/unit/test_dataset.rb +0 -7
- data/test/unit/test_matcher.rb +52 -3
- data/test/unit/test_result_set.rb +8 -14
- metadata +66 -32
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 57f4bad92110063c64ed24a43b2a805f4fe6d051
|
4
|
+
data.tar.gz: 9d9ff5fda254dae02bde47dac69c94af56300d51
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 63552888a854815988985d54a628e7594d072765027767ea95b159ef80408c64cc2d5ec608892be15c356aecd028529a02df6ca2792827c26aaffa605ab28b65
|
7
|
+
data.tar.gz: 7531bca7bec718605f940557a1572fd3f74528883d08d7137b4c775f95e8b7b4036fe00a6c270f24b30ebf093bb345f97f425f4813aae620f4e69c28b99abde3
|
data/.yardopts
CHANGED
data/Guardfile
CHANGED
@@ -2,7 +2,6 @@ guard 'test' do
|
|
2
2
|
watch(%r{^lib/linkage/([^/]+/)*([^/]+)\.rb$}) { |m| "test/unit/#{m[1]}test_#{m[2]}.rb" }
|
3
3
|
watch(%r{^test/unit/([^/]+/)*test_.+\.rb$})
|
4
4
|
watch(%r{^test/integration/test_.+\.rb$})
|
5
|
-
watch('lib/linkage/configuration.rb') { "test/unit/test_dataset.rb" }
|
6
5
|
watch('test/helper.rb') { "test" }
|
7
6
|
end
|
8
7
|
|
data/TODO
CHANGED
data/lib/linkage.rb
CHANGED
data/lib/linkage/comparator.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Linkage
|
2
2
|
# {Comparator} is the superclass for comparators in Linkage. Comparators are
|
3
|
-
# used to compare
|
4
|
-
#
|
3
|
+
# used to compare records and compute scores based on how closely the records
|
4
|
+
# relate.
|
5
5
|
#
|
6
6
|
# Each comparator should inherit from {Comparator} and declare itself as
|
7
7
|
# simple or advanced by overriding {#type} (the default is simple). Simple
|
@@ -22,6 +22,16 @@ module Linkage
|
|
22
22
|
class Comparator
|
23
23
|
include Observable
|
24
24
|
|
25
|
+
attr_reader :weight
|
26
|
+
|
27
|
+
def weigh(weight)
|
28
|
+
return if weight.nil?
|
29
|
+
if not weight.is_a?(Numeric)
|
30
|
+
raise "weight must be numeric type"
|
31
|
+
end
|
32
|
+
@weight = weight
|
33
|
+
end
|
34
|
+
|
25
35
|
class << self
|
26
36
|
# Register a new comparator. Subclasses must define at least {#score} for
|
27
37
|
# simple comparators, or {#score_dataset} and {#score_datasets} for
|
@@ -7,6 +7,7 @@ module Linkage
|
|
7
7
|
# the comparison, along with an operator. Valid operators are:
|
8
8
|
#
|
9
9
|
# * `:jarowinkler` ([Jaro-Winkler distance](http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance))
|
10
|
+
# * `:damerau_levenshtein` ([Damerau-Levenshtein distance](http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance))
|
10
11
|
#
|
11
12
|
# Consider the following example, using a {Configuration} as part of
|
12
13
|
# {Dataset#link_with}:
|
@@ -17,8 +18,11 @@ module Linkage
|
|
17
18
|
#
|
18
19
|
# For each record, the values of the `foo` and `bar` fields are compared
|
19
20
|
# using the Jaro-Winkler distance algorithm.
|
21
|
+
#
|
22
|
+
# Damerau-Levenshtein is a modified Levenshtein that allows for transpositions
|
23
|
+
# It has additionally been modified to make costs of additions or deletions only 0.5
|
20
24
|
class Strcompare < Comparator
|
21
|
-
VALID_OPERATIONS = [:jarowinkler]
|
25
|
+
VALID_OPERATIONS = [:jarowinkler, :reverse_jarowinkler, :damerau_levenshtein]
|
22
26
|
|
23
27
|
def initialize(field_1, field_2, operation)
|
24
28
|
if field_1.ruby_type[:type] != String || field_2.ruby_type[:type] != String
|
@@ -38,6 +42,10 @@ module Linkage
|
|
38
42
|
case @operation
|
39
43
|
when :jarowinkler
|
40
44
|
jarowinkler(record_1[@name_1], record_2[@name_2])
|
45
|
+
when :reverse_jarowinkler
|
46
|
+
reverse_jarowinkler(record_1[@name_1], record_2[@name_2])
|
47
|
+
when :damerau_levenshtein
|
48
|
+
damerau_levenshtein(record_1[@name_1], record_2[@name_2])
|
41
49
|
end
|
42
50
|
|
43
51
|
result
|
@@ -50,33 +58,77 @@ module Linkage
|
|
50
58
|
ba = b.split('')
|
51
59
|
al = a.length
|
52
60
|
bl = b.length
|
61
|
+
return 0 if al == 0 || bl == 0
|
53
62
|
l = 0
|
54
63
|
for i in Range.new(0, [[al, bl].min, 4].min-1)
|
55
64
|
break if aa[i] != ba[i]
|
56
65
|
l += 1
|
57
66
|
end
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
md = [[al, bl].max/2 - 1, 0].max
|
67
|
+
md = [[al, bl].max/2 - 1, 1].max
|
68
|
+
usea = []
|
69
|
+
useb = []
|
70
|
+
# simplify to matching characters
|
63
71
|
for i in Range.new(0, al-1)
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
72
|
+
fi = [[i - md, 0].max, bl-1].min
|
73
|
+
li = [i + md, bl-1].min
|
74
|
+
for j in Range.new(fi, li)
|
75
|
+
if aa[i] == ba[j] and not useb.include?(j)
|
76
|
+
usea << i
|
77
|
+
useb << j
|
78
|
+
break
|
79
|
+
end
|
70
80
|
end
|
71
|
-
ba.delete_at(bi) if !bi.nil?
|
72
|
-
aj.delete_at(aji) if !aji.nil?
|
73
|
-
bj.delete_at(bji) if !bji.nil?
|
74
81
|
end
|
82
|
+
bada = Range.new(0, al-1).to_a - usea
|
83
|
+
badb = Range.new(0, bl-1).to_a - useb
|
84
|
+
bada.reverse.each { |x| aa.delete_at(x) }
|
85
|
+
badb.reverse.each { |x| ba.delete_at(x) }
|
86
|
+
nm = aa.length
|
75
87
|
return 0 if nm == 0
|
76
|
-
|
88
|
+
# count transpositions
|
89
|
+
nt = 0
|
90
|
+
for i in Range.new(0, nm-1)
|
91
|
+
nt +=1 if aa[i] != ba[i]
|
92
|
+
end
|
93
|
+
d = (nm/al.to_f + nm/bl.to_f + (nm-nt/2.0)/nm.to_f)/3.0
|
77
94
|
w = (d + l * 0.1 * (1 - d)).round(3)
|
78
95
|
w
|
79
96
|
end
|
97
|
+
|
98
|
+
def reverse_jarowinkler(w1, w2)
|
99
|
+
jarowinkler(w1.reverse, w2.reverse)
|
100
|
+
end
|
101
|
+
|
102
|
+
def damerau_levenshtein(w1, w2)
|
103
|
+
a = w1.downcase
|
104
|
+
b = w2.downcase
|
105
|
+
aa = a.split('')
|
106
|
+
ba = b.split('')
|
107
|
+
al = a.length
|
108
|
+
bl = b.length
|
109
|
+
denom = [al, bl].max
|
110
|
+
return 0 if denom == 0
|
111
|
+
oneago = nil
|
112
|
+
thisrow = (1..bl).to_a + [0]
|
113
|
+
al.times do |x|
|
114
|
+
twoago, oneago, thisrow = oneago, thisrow, [0] * bl + [x + 1]
|
115
|
+
bl.times do |y|
|
116
|
+
if aa[x] == ba[y]
|
117
|
+
thisrow[y] = oneago[y - 1]
|
118
|
+
else
|
119
|
+
delcost = oneago[y] + 0.5
|
120
|
+
addcost = thisrow[y - 1] + 0.5
|
121
|
+
subcost = oneago[y - 1] + 1
|
122
|
+
thisrow[y] = [delcost, addcost, subcost].min
|
123
|
+
# remove this statement for original levenshtein
|
124
|
+
if x > 0 and y > 0 and aa[x] == ba[y-1] and aa[x-1] == ba[y]
|
125
|
+
thisrow[y] = [thisrow[y], twoago[y-2] + 1].min
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
return (1 - thisrow[bl - 1] / denom.to_f).round(3)
|
131
|
+
end
|
80
132
|
end
|
81
133
|
|
82
134
|
Comparator.register('strcompare', Strcompare)
|
@@ -1,21 +1,124 @@
|
|
1
1
|
module Linkage
|
2
|
+
# {Configuration} keeps track of everything needed to run a record linkage,
|
3
|
+
# including which datasets you want to link, how you want to link them, and
|
4
|
+
# where you want to store the results. Once created, you can supply the
|
5
|
+
# {Configuration} to {Runner#initialize} and run it with {Runner#execute}.
|
6
|
+
#
|
7
|
+
# To create a configuration, usually you will want to use {Dataset#link_with},
|
8
|
+
# but you can create it directly if you like (see {#initialize}), like so:
|
9
|
+
#
|
10
|
+
# ```ruby
|
11
|
+
# dataset_1 = Linkage::Dataset.new('mysql://example.com/database_name', 'foo')
|
12
|
+
# dataset_2 = Linkage::Dataset.new('postgres://example.com/other_name', 'bar')
|
13
|
+
# result_set = Linkage::ResultSet['csv'].new('/home/foo/linkage')
|
14
|
+
# config = Linkage::Configuration.new(dataset_1, dataset_2, result_set)
|
15
|
+
# ```
|
16
|
+
#
|
17
|
+
# To add comparators to {Configuration}, you can call methods with the same
|
18
|
+
# name as registered comparators. Here's the list of builtin comparators:
|
19
|
+
#
|
20
|
+
# | Name | Class |
|
21
|
+
# |------------|---------------------------|
|
22
|
+
# | compare | {Comparators::Compare} |
|
23
|
+
# | strcompare | {Comparators::Strcompare} |
|
24
|
+
# | within | {Comparators::Within} |
|
25
|
+
#
|
26
|
+
# For example, if you want to add a {Comparators::Compare} comparator to
|
27
|
+
# your configuration, run this:
|
28
|
+
#
|
29
|
+
# ```ruby
|
30
|
+
# config.compare([:foo], [:bar], :equal_to)
|
31
|
+
# ```
|
32
|
+
#
|
33
|
+
# This works via {Configuration#method_missing}. First, the comparator class
|
34
|
+
# is fetched via {Comparator.[]}. Then fields are looked up in the {FieldSet}
|
35
|
+
# of the {Dataset}. Those {Field}s along with any other arguments you specify
|
36
|
+
# are passed to the constructor of the comparator you chose.
|
37
|
+
#
|
38
|
+
# {Configuration} also contains information about how records are matched.
|
39
|
+
# Once scores are computed, the scores for each pair of records are averaged
|
40
|
+
# and compared against a threshold value. Record pairs that have an average
|
41
|
+
# score greater than or equal to the threshold value are considered matches.
|
42
|
+
#
|
43
|
+
# The threshold value is `0.5` by default, but you can change it by setting
|
44
|
+
# {#threshold} like so:
|
45
|
+
#
|
46
|
+
# ```ruby
|
47
|
+
# config.threshold = 0.75
|
48
|
+
# ```
|
49
|
+
#
|
50
|
+
# Since scores range between 0 and 1 (inclusive), be sure to set a threshold
|
51
|
+
# value within the same range. The actual matching work is done by the
|
52
|
+
# {Matcher} class.
|
53
|
+
#
|
54
|
+
# @see Dataset
|
55
|
+
# @see ResultSet
|
56
|
+
# @see Comparator
|
57
|
+
# @see Matcher
|
58
|
+
# @see Runner
|
2
59
|
class Configuration
|
3
|
-
attr_reader :dataset_1, :dataset_2, :result_set, :comparators
|
4
|
-
attr_accessor :
|
60
|
+
attr_reader :dataset_1, :dataset_2, :result_set, :comparators, :threshold
|
61
|
+
attr_accessor :algorithm
|
5
62
|
|
63
|
+
def threshold=(threshold)
|
64
|
+
if not threshold.is_a?(Numeric)
|
65
|
+
raise "threshold must be numeric type"
|
66
|
+
end
|
67
|
+
@threshold = threshold
|
68
|
+
end
|
69
|
+
# Create a new instance of {Configuration}.
|
70
|
+
#
|
71
|
+
# @overload initialize(dataset_1, dataset_2, result_set)
|
72
|
+
# Create a linkage configuration for two datasets and a result set.
|
73
|
+
# @param [Linkage::Dataset] dataset_1
|
74
|
+
# @param [Linkage::Dataset] dataset_2
|
75
|
+
# @param [Linkage::ResultSet] result_set
|
76
|
+
# @overload initialize(dataset, result_set)
|
77
|
+
# Create a linkage configuration for one dataset and a result set.
|
78
|
+
# @param [Linkage::Dataset] dataset
|
79
|
+
# @param [Linkage::ResultSet] result_set
|
80
|
+
# @overload initialize(dataset_1, dataset_2, score_set, match_set)
|
81
|
+
# Create a linkage configuration for two datasets, a score set, and a
|
82
|
+
# match set.
|
83
|
+
# @param [Linkage::Dataset] dataset_1
|
84
|
+
# @param [Linkage::Dataset] dataset_2
|
85
|
+
# @param [Linkage::ScoreSet] score_set
|
86
|
+
# @param [Linkage::MatchSet] match_set
|
87
|
+
# @overload initialize(dataset, score_set, match_set)
|
88
|
+
# Create a linkage configuration for one dataset, a score set, and a
|
89
|
+
# match set.
|
90
|
+
# @param [Linkage::Dataset] dataset
|
91
|
+
# @param [Linkage::ScoreSet] score_set
|
92
|
+
# @param [Linkage::MatchSet] match_set
|
6
93
|
def initialize(*args)
|
7
|
-
if args.length < 2 || args.length >
|
8
|
-
raise ArgumentError, "wrong number of arguments (#{args.length} for
|
94
|
+
if args.length < 2 || args.length > 4
|
95
|
+
raise ArgumentError, "wrong number of arguments (#{args.length} for 2..4)"
|
9
96
|
end
|
10
97
|
|
11
98
|
@dataset_1 = args[0]
|
12
|
-
|
99
|
+
case args.length
|
100
|
+
when 2
|
101
|
+
# dataset and result set
|
102
|
+
@result_set = args[1]
|
103
|
+
when 3
|
104
|
+
# dataset 1, dataset 2, and result set
|
105
|
+
# dataset, score set, and match set
|
106
|
+
case args[1]
|
107
|
+
when Dataset, nil
|
108
|
+
@dataset_2 = args[1]
|
109
|
+
@result_set = args[2]
|
110
|
+
when ScoreSet
|
111
|
+
@result_set = ResultSet.new(args[1], args[2])
|
112
|
+
end
|
113
|
+
when 4
|
114
|
+
# dataset 1, dataset 2, score set, and match set
|
13
115
|
@dataset_2 = args[1]
|
116
|
+
@result_set = ResultSet.new(args[2], args[3])
|
14
117
|
end
|
15
|
-
@result_set = args[-1]
|
16
118
|
|
17
119
|
@comparators = []
|
18
|
-
@
|
120
|
+
@algorithm = :mean
|
121
|
+
@threshold = 0.5
|
19
122
|
end
|
20
123
|
|
21
124
|
def score_recorder
|
@@ -29,7 +132,7 @@ module Linkage
|
|
29
132
|
end
|
30
133
|
|
31
134
|
def matcher
|
32
|
-
Matcher.new(@comparators, @result_set.score_set, @algorithm
|
135
|
+
Matcher.new(@comparators, @result_set.score_set, @algorithm, @threshold)
|
33
136
|
end
|
34
137
|
|
35
138
|
def match_recorder(matcher)
|
@@ -60,6 +163,7 @@ module Linkage
|
|
60
163
|
|
61
164
|
comparator = klass.new(*args, &block)
|
62
165
|
@comparators << comparator
|
166
|
+
return comparator
|
63
167
|
end
|
64
168
|
|
65
169
|
protected
|
data/lib/linkage/dataset.rb
CHANGED
@@ -1,8 +1,111 @@
|
|
1
1
|
module Linkage
|
2
|
-
#
|
2
|
+
# {Dataset} is a representation of a database table. It is a thin wrapper
|
3
|
+
# around a
|
4
|
+
# {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
|
5
|
+
#
|
6
|
+
# There are three ways to create a {Dataset}.
|
7
|
+
#
|
8
|
+
# Pass in a {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}:
|
9
|
+
#
|
10
|
+
# ```ruby
|
11
|
+
# Linkage::Dataset.new(db[:foo])
|
12
|
+
# ```
|
13
|
+
#
|
14
|
+
# Pass in a {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Database.html `Sequel::Database`}
|
15
|
+
# and a table name:
|
16
|
+
#
|
17
|
+
# ```ruby
|
18
|
+
# Linkage::Dataset.new(db, :foo)
|
19
|
+
# ```
|
20
|
+
#
|
21
|
+
# Pass in a
|
22
|
+
# {http://sequel.jeremyevans.net/rdoc/files/doc/opening_databases_rdoc.html Sequel-style}
|
23
|
+
# connection URI, a table name, and any options you want to pass to
|
24
|
+
# {http://sequel.jeremyevans.net/rdoc/classes/Sequel.html#method-c-connect `Sequel.connect`}.
|
25
|
+
#
|
26
|
+
# ```ruby
|
27
|
+
# Linkage::Dataset.new("mysql2://example.com/foo", :bar, :user => 'viking', :password => 'secret')
|
28
|
+
# ```
|
29
|
+
#
|
30
|
+
# Once you've made a {Dataset}, you can use any
|
31
|
+
# {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}
|
32
|
+
# method on it you wish. For example, if you want to limit the dataset to
|
33
|
+
# records that refer to people born after 1985 (assuming date of birth is
|
34
|
+
# stored as a date type):
|
35
|
+
#
|
36
|
+
# ```ruby
|
37
|
+
# filtered_dataset = dataset.where('dob > :date', :date => Date.new(1985, 1, 1))
|
38
|
+
# ```
|
39
|
+
#
|
40
|
+
# Note that
|
41
|
+
# {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}
|
42
|
+
# methods return a __clone__ of a dataset, so you must assign the return value
|
43
|
+
# to a variable.
|
44
|
+
#
|
45
|
+
# Once you have your {Dataset} how you want it, you can use the {#link_with}
|
46
|
+
# method to create a {Configuration} for record linkage. The {#link_with}
|
47
|
+
# method takes another {Dataset} object and a {ResultSet} and returns a
|
48
|
+
# {Configuration}.
|
49
|
+
#
|
50
|
+
# ```ruby
|
51
|
+
# config = dataset.link_with(other_dataset, result_set)
|
52
|
+
# config.compare([:foo], [:bar], :equal_to)
|
53
|
+
# ```
|
54
|
+
#
|
55
|
+
# You can pass in a {ScoreSet} and {MatchSet} instead of a {ResultSet} if you
|
56
|
+
# wish:
|
57
|
+
#
|
58
|
+
# ```ruby
|
59
|
+
# config = dataset.link_with(other_dataset, score_set, match_set)
|
60
|
+
# ```
|
61
|
+
#
|
62
|
+
# Note that a dataset can be linked with itself the same way, like so:
|
63
|
+
#
|
64
|
+
# ```ruby
|
65
|
+
# config = dataset.link_with(dataset, result_set)
|
66
|
+
# config.compare([:foo], [:bar], :equal_to)
|
67
|
+
# ```
|
68
|
+
#
|
69
|
+
# If you give {#link_with} a block, it will yield the same {Configuration}
|
70
|
+
# object to the block that it returns.
|
71
|
+
#
|
72
|
+
# ```ruby
|
73
|
+
# config = dataset.link_with(other_dataset, result_set) do |c|
|
74
|
+
# c.compare([:foo], [:bar], :equal_to)
|
75
|
+
# end
|
76
|
+
# ```
|
77
|
+
#
|
78
|
+
# Once that's done, use a {Runner} to run the record linkage:
|
79
|
+
#
|
80
|
+
# ```ruby
|
81
|
+
# runner = Linkage::Runner.new(config)
|
82
|
+
# runner.execute
|
83
|
+
# ```
|
84
|
+
#
|
85
|
+
# @see http://sequel.jeremyevans.net/rdoc/files/doc/opening_databases_rdoc.html Connecting to a database
|
3
86
|
class Dataset
|
4
|
-
|
87
|
+
# @return [Symbol] Returns this dataset's table name.
|
88
|
+
attr_reader :table_name
|
5
89
|
|
90
|
+
# @return [FieldSet] Returns this dataset's {FieldSet}.
|
91
|
+
attr_reader :field_set
|
92
|
+
|
93
|
+
# Returns a new instance of {Dataset}.
|
94
|
+
#
|
95
|
+
# @overload initialize(dataset)
|
96
|
+
# Use a specific {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
|
97
|
+
# @param dataset [Sequel::Dataset]
|
98
|
+
# @overload initialize(database, table_name)
|
99
|
+
# Use a specific {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Database.html `Sequel::Database`}.
|
100
|
+
# @param database [Sequel::Database]
|
101
|
+
# @param table_name [Symbol, String]
|
102
|
+
# @overload initialize(uri, table_name, options = {})
|
103
|
+
# Use {http://sequel.jeremyevans.net/rdoc/classes/Sequel.html#method-c-connect `Sequel.connect`}
|
104
|
+
# to connect to a database.
|
105
|
+
# @param uri [String, Hash]
|
106
|
+
# @param table_name [Symbol, String]
|
107
|
+
# @param options [Hash]
|
108
|
+
#
|
6
109
|
def initialize(*args)
|
7
110
|
if args.length == 0 || args.length > 3
|
8
111
|
raise ArgumentError, "wrong number of arguments (#{args.length} for 1..3)"
|
@@ -31,17 +134,23 @@ module Linkage
|
|
31
134
|
@field_set = FieldSet.new(self)
|
32
135
|
end
|
33
136
|
|
137
|
+
# Returns the underlying {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
|
138
|
+
# @return [Sequel::Dataset]
|
34
139
|
def obj
|
35
140
|
@dataset
|
36
141
|
end
|
37
142
|
|
143
|
+
# Set the underlying {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
|
38
144
|
def obj=(value)
|
39
145
|
@dataset = value
|
40
146
|
end
|
147
|
+
private :obj=
|
41
148
|
|
42
|
-
#
|
149
|
+
# Create a {Configuration} for record linkage.
|
43
150
|
#
|
44
|
-
# @
|
151
|
+
# @param dataset [Dataset]
|
152
|
+
# @param result_set [ResultSet]
|
153
|
+
# @return [Configuration]
|
45
154
|
def link_with(dataset, result_set)
|
46
155
|
other = dataset.eql?(self) ? nil : dataset
|
47
156
|
conf = Configuration.new(self, other, result_set)
|
@@ -51,25 +160,31 @@ module Linkage
|
|
51
160
|
conf
|
52
161
|
end
|
53
162
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
163
|
+
# Return the dataset's schema.
|
164
|
+
#
|
165
|
+
# @return [Array]
|
166
|
+
# @see http://sequel.jeremyevans.net/rdoc/classes/Sequel/Database.html#method-i-schema Sequel::Database#schema
|
58
167
|
def schema
|
59
168
|
@db.schema(@table_name)
|
60
169
|
end
|
61
170
|
|
171
|
+
# Returns {FieldSet#primary_key}.
|
172
|
+
#
|
173
|
+
# @return [Field]
|
174
|
+
# @see FieldSet#primary_key
|
62
175
|
def primary_key
|
63
176
|
@field_set.primary_key
|
64
177
|
end
|
65
178
|
|
66
179
|
protected
|
67
180
|
|
181
|
+
# Delegate methods to the underlying
|
182
|
+
# {http://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html `Sequel::Dataset`}.
|
68
183
|
def method_missing(name, *args, &block)
|
69
184
|
result = @dataset.send(name, *args, &block)
|
70
185
|
if result.kind_of?(Sequel::Dataset)
|
71
186
|
new_object = clone
|
72
|
-
new_object.obj
|
187
|
+
new_object.send(:obj=, result)
|
73
188
|
new_object
|
74
189
|
else
|
75
190
|
result
|