linkage 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +2 -1
- data/Gemfile.lock +19 -8
- data/README.markdown +8 -5
- data/Rakefile +0 -8
- data/VERSION +1 -1
- data/lib/linkage/configuration.rb +245 -157
- data/lib/linkage/data.rb +0 -24
- data/lib/linkage/dataset.rb +26 -183
- data/lib/linkage/field.rb +0 -3
- data/lib/linkage/field_set.rb +16 -0
- data/lib/linkage/function.rb +0 -7
- data/lib/linkage/result_set.rb +68 -0
- data/lib/linkage/runner/single_threaded.rb +29 -39
- data/lib/linkage/runner.rb +8 -36
- data/lib/linkage.rb +3 -1
- data/linkage.gemspec +14 -17
- data/test/helper.rb +1 -1
- data/test/integration/test_cross_linkage.rb +6 -2
- data/test/integration/test_dataset.rb +30 -0
- data/test/integration/test_dual_linkage.rb +9 -4
- data/test/integration/test_self_linkage.rb +23 -8
- data/test/unit/test_configuration.rb +90 -72
- data/test/unit/test_data.rb +0 -61
- data/test/unit/test_dataset.rb +19 -319
- data/test/unit/test_field.rb +0 -6
- data/test/unit/test_field_set.rb +31 -0
- data/test/unit/test_function.rb +6 -30
- data/test/unit/test_result_set.rb +18 -0
- data/test/unit/test_runner.rb +20 -5
- metadata +57 -41
- data/lib/linkage/expectation.rb +0 -138
- data/test/unit/test_expectation.rb +0 -390
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2012-02-28 00:00:00.000000000 -06:00
|
13
|
+
default_executable:
|
13
14
|
dependencies:
|
14
15
|
- !ruby/object:Gem::Dependency
|
15
16
|
name: sequel
|
16
|
-
requirement: &
|
17
|
+
requirement: &13934860 !ruby/object:Gem::Requirement
|
17
18
|
none: false
|
18
19
|
requirements:
|
19
20
|
- - ! '>='
|
@@ -21,10 +22,10 @@ dependencies:
|
|
21
22
|
version: '0'
|
22
23
|
type: :runtime
|
23
24
|
prerelease: false
|
24
|
-
version_requirements: *
|
25
|
+
version_requirements: *13934860
|
25
26
|
- !ruby/object:Gem::Dependency
|
26
27
|
name: bundler
|
27
|
-
requirement: &
|
28
|
+
requirement: &13932740 !ruby/object:Gem::Requirement
|
28
29
|
none: false
|
29
30
|
requirements:
|
30
31
|
- - ~>
|
@@ -32,10 +33,10 @@ dependencies:
|
|
32
33
|
version: 1.0.0
|
33
34
|
type: :development
|
34
35
|
prerelease: false
|
35
|
-
version_requirements: *
|
36
|
+
version_requirements: *13932740
|
36
37
|
- !ruby/object:Gem::Dependency
|
37
38
|
name: jeweler
|
38
|
-
requirement: &
|
39
|
+
requirement: &13929520 !ruby/object:Gem::Requirement
|
39
40
|
none: false
|
40
41
|
requirements:
|
41
42
|
- - ~>
|
@@ -43,21 +44,10 @@ dependencies:
|
|
43
44
|
version: 1.6.4
|
44
45
|
type: :development
|
45
46
|
prerelease: false
|
46
|
-
version_requirements: *
|
47
|
-
- !ruby/object:Gem::Dependency
|
48
|
-
name: rcov
|
49
|
-
requirement: &7137480 !ruby/object:Gem::Requirement
|
50
|
-
none: false
|
51
|
-
requirements:
|
52
|
-
- - ! '>='
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
type: :development
|
56
|
-
prerelease: false
|
57
|
-
version_requirements: *7137480
|
47
|
+
version_requirements: *13929520
|
58
48
|
- !ruby/object:Gem::Dependency
|
59
49
|
name: test-unit
|
60
|
-
requirement: &
|
50
|
+
requirement: &13266740 !ruby/object:Gem::Requirement
|
61
51
|
none: false
|
62
52
|
requirements:
|
63
53
|
- - =
|
@@ -65,10 +55,10 @@ dependencies:
|
|
65
55
|
version: 2.3.2
|
66
56
|
type: :development
|
67
57
|
prerelease: false
|
68
|
-
version_requirements: *
|
58
|
+
version_requirements: *13266740
|
69
59
|
- !ruby/object:Gem::Dependency
|
70
60
|
name: mocha
|
71
|
-
requirement: &
|
61
|
+
requirement: &13265420 !ruby/object:Gem::Requirement
|
72
62
|
none: false
|
73
63
|
requirements:
|
74
64
|
- - ! '>='
|
@@ -76,10 +66,10 @@ dependencies:
|
|
76
66
|
version: '0'
|
77
67
|
type: :development
|
78
68
|
prerelease: false
|
79
|
-
version_requirements: *
|
69
|
+
version_requirements: *13265420
|
80
70
|
- !ruby/object:Gem::Dependency
|
81
71
|
name: sqlite3
|
82
|
-
requirement: &
|
72
|
+
requirement: &13264380 !ruby/object:Gem::Requirement
|
83
73
|
none: false
|
84
74
|
requirements:
|
85
75
|
- - ! '>='
|
@@ -87,10 +77,10 @@ dependencies:
|
|
87
77
|
version: '0'
|
88
78
|
type: :development
|
89
79
|
prerelease: false
|
90
|
-
version_requirements: *
|
80
|
+
version_requirements: *13264380
|
91
81
|
- !ruby/object:Gem::Dependency
|
92
82
|
name: yard
|
93
|
-
requirement: &
|
83
|
+
requirement: &13262200 !ruby/object:Gem::Requirement
|
94
84
|
none: false
|
95
85
|
requirements:
|
96
86
|
- - ! '>='
|
@@ -98,10 +88,10 @@ dependencies:
|
|
98
88
|
version: '0'
|
99
89
|
type: :development
|
100
90
|
prerelease: false
|
101
|
-
version_requirements: *
|
91
|
+
version_requirements: *13262200
|
102
92
|
- !ruby/object:Gem::Dependency
|
103
93
|
name: rake
|
104
|
-
requirement: &
|
94
|
+
requirement: &13276680 !ruby/object:Gem::Requirement
|
105
95
|
none: false
|
106
96
|
requirements:
|
107
97
|
- - ! '>='
|
@@ -109,10 +99,10 @@ dependencies:
|
|
109
99
|
version: '0'
|
110
100
|
type: :development
|
111
101
|
prerelease: false
|
112
|
-
version_requirements: *
|
102
|
+
version_requirements: *13276680
|
113
103
|
- !ruby/object:Gem::Dependency
|
114
104
|
name: versionomy
|
115
|
-
requirement: &
|
105
|
+
requirement: &14225280 !ruby/object:Gem::Requirement
|
116
106
|
none: false
|
117
107
|
requirements:
|
118
108
|
- - ! '>='
|
@@ -120,10 +110,10 @@ dependencies:
|
|
120
110
|
version: '0'
|
121
111
|
type: :development
|
122
112
|
prerelease: false
|
123
|
-
version_requirements: *
|
113
|
+
version_requirements: *14225280
|
124
114
|
- !ruby/object:Gem::Dependency
|
125
115
|
name: mysql2
|
126
|
-
requirement: &
|
116
|
+
requirement: &14222420 !ruby/object:Gem::Requirement
|
127
117
|
none: false
|
128
118
|
requirements:
|
129
119
|
- - ! '>='
|
@@ -131,10 +121,10 @@ dependencies:
|
|
131
121
|
version: '0'
|
132
122
|
type: :development
|
133
123
|
prerelease: false
|
134
|
-
version_requirements: *
|
124
|
+
version_requirements: *14222420
|
135
125
|
- !ruby/object:Gem::Dependency
|
136
126
|
name: pry
|
137
|
-
requirement: &
|
127
|
+
requirement: &14220220 !ruby/object:Gem::Requirement
|
138
128
|
none: false
|
139
129
|
requirements:
|
140
130
|
- - ! '>='
|
@@ -142,10 +132,32 @@ dependencies:
|
|
142
132
|
version: '0'
|
143
133
|
type: :development
|
144
134
|
prerelease: false
|
145
|
-
version_requirements: *
|
135
|
+
version_requirements: *14220220
|
146
136
|
- !ruby/object:Gem::Dependency
|
147
137
|
name: rdiscount
|
148
|
-
requirement: &
|
138
|
+
requirement: &14218900 !ruby/object:Gem::Requirement
|
139
|
+
none: false
|
140
|
+
requirements:
|
141
|
+
- - ! '>='
|
142
|
+
- !ruby/object:Gem::Version
|
143
|
+
version: '0'
|
144
|
+
type: :development
|
145
|
+
prerelease: false
|
146
|
+
version_requirements: *14218900
|
147
|
+
- !ruby/object:Gem::Dependency
|
148
|
+
name: guard-test
|
149
|
+
requirement: &14217760 !ruby/object:Gem::Requirement
|
150
|
+
none: false
|
151
|
+
requirements:
|
152
|
+
- - ! '>='
|
153
|
+
- !ruby/object:Gem::Version
|
154
|
+
version: '0'
|
155
|
+
type: :development
|
156
|
+
prerelease: false
|
157
|
+
version_requirements: *14217760
|
158
|
+
- !ruby/object:Gem::Dependency
|
159
|
+
name: guard-yard
|
160
|
+
requirement: &14230100 !ruby/object:Gem::Requirement
|
149
161
|
none: false
|
150
162
|
requirements:
|
151
163
|
- - ! '>='
|
@@ -153,7 +165,7 @@ dependencies:
|
|
153
165
|
version: '0'
|
154
166
|
type: :development
|
155
167
|
prerelease: false
|
156
|
-
version_requirements: *
|
168
|
+
version_requirements: *14230100
|
157
169
|
description: Wraps Sequel to perform record linkage between one or two datasets
|
158
170
|
email: jeremy.f.stephens@vanderbilt.edu
|
159
171
|
executables: []
|
@@ -175,12 +187,13 @@ files:
|
|
175
187
|
- lib/linkage/configuration.rb
|
176
188
|
- lib/linkage/data.rb
|
177
189
|
- lib/linkage/dataset.rb
|
178
|
-
- lib/linkage/expectation.rb
|
179
190
|
- lib/linkage/field.rb
|
191
|
+
- lib/linkage/field_set.rb
|
180
192
|
- lib/linkage/function.rb
|
181
193
|
- lib/linkage/functions/trim.rb
|
182
194
|
- lib/linkage/group.rb
|
183
195
|
- lib/linkage/import_buffer.rb
|
196
|
+
- lib/linkage/result_set.rb
|
184
197
|
- lib/linkage/runner.rb
|
185
198
|
- lib/linkage/runner/single_threaded.rb
|
186
199
|
- lib/linkage/utils.rb
|
@@ -189,6 +202,7 @@ files:
|
|
189
202
|
- test/config.yml
|
190
203
|
- test/helper.rb
|
191
204
|
- test/integration/test_cross_linkage.rb
|
205
|
+
- test/integration/test_dataset.rb
|
192
206
|
- test/integration/test_dual_linkage.rb
|
193
207
|
- test/integration/test_self_linkage.rb
|
194
208
|
- test/unit/functions/test_trim.rb
|
@@ -196,14 +210,16 @@ files:
|
|
196
210
|
- test/unit/test_configuration.rb
|
197
211
|
- test/unit/test_data.rb
|
198
212
|
- test/unit/test_dataset.rb
|
199
|
-
- test/unit/test_expectation.rb
|
200
213
|
- test/unit/test_field.rb
|
214
|
+
- test/unit/test_field_set.rb
|
201
215
|
- test/unit/test_function.rb
|
202
216
|
- test/unit/test_group.rb
|
203
217
|
- test/unit/test_import_buffer.rb
|
204
218
|
- test/unit/test_linkage.rb
|
219
|
+
- test/unit/test_result_set.rb
|
205
220
|
- test/unit/test_runner.rb
|
206
221
|
- test/unit/test_utils.rb
|
222
|
+
has_rdoc: true
|
207
223
|
homepage: http://github.com/coupler/linkage
|
208
224
|
licenses:
|
209
225
|
- MIT
|
@@ -219,7 +235,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
219
235
|
version: '0'
|
220
236
|
segments:
|
221
237
|
- 0
|
222
|
-
hash:
|
238
|
+
hash: 3554989941562530888
|
223
239
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
224
240
|
none: false
|
225
241
|
requirements:
|
@@ -228,7 +244,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
228
244
|
version: '0'
|
229
245
|
requirements: []
|
230
246
|
rubyforge_project:
|
231
|
-
rubygems_version: 1.
|
247
|
+
rubygems_version: 1.3.9.4
|
232
248
|
signing_key:
|
233
249
|
specification_version: 3
|
234
250
|
summary: Sequel-based record linkage
|
data/lib/linkage/expectation.rb
DELETED
@@ -1,138 +0,0 @@
|
|
1
|
-
module Linkage
|
2
|
-
class Expectation
|
3
|
-
VALID_OPERATORS = [:==, :>, :<, :>=, :<=, :'!=']
|
4
|
-
|
5
|
-
def self.get(type)
|
6
|
-
TYPES[type]
|
7
|
-
end
|
8
|
-
|
9
|
-
attr_reader :operator, :field_1, :field_2
|
10
|
-
|
11
|
-
# @param [Symbol] operator Currently, only :==
|
12
|
-
# @param [Linkage::Field, Linkage::Function, Object] field_1
|
13
|
-
# @param [Linkage::Field, Linkage::Function, Object] field_2
|
14
|
-
# @param [Symbol] force_kind Manually set type of expectation (useful for
|
15
|
-
# a filter between two fields)
|
16
|
-
def initialize(operator, field_1, field_2, force_kind = nil)
|
17
|
-
if !((field_1.kind_of?(Data) && !field_1.static?) || (field_2.kind_of?(Data) && !field_2.static?))
|
18
|
-
raise ArgumentError, "You must have at least one data source (Linkage::Field or Linkage::Function)"
|
19
|
-
end
|
20
|
-
|
21
|
-
if !VALID_OPERATORS.include?(operator)
|
22
|
-
raise ArgumentError, "Invalid operator: #{operator.inspect}"
|
23
|
-
end
|
24
|
-
|
25
|
-
@operator = operator
|
26
|
-
@field_1 = field_1
|
27
|
-
@field_2 = field_2
|
28
|
-
@kind = force_kind
|
29
|
-
|
30
|
-
if kind == :filter
|
31
|
-
if @field_1.is_a?(Field)
|
32
|
-
@filter_field = @field_1
|
33
|
-
@filter_value = @field_2
|
34
|
-
else
|
35
|
-
@filter_field = @field_2
|
36
|
-
@filter_value = @field_1
|
37
|
-
end
|
38
|
-
elsif @operator != :==
|
39
|
-
raise ArgumentError, "Inequality operators are not allowed for non-filter expectations"
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
def ==(other)
|
44
|
-
if other.is_a?(Expectation)
|
45
|
-
@operator == other.operator && @field_1 == other.field_1 &&
|
46
|
-
@field_2 == other.field_2
|
47
|
-
else
|
48
|
-
super
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
# @return [Symbol] :self, :dual, :cross, or :filter
|
53
|
-
def kind
|
54
|
-
@kind ||=
|
55
|
-
if !(@field_1.is_a?(Data) && !@field_1.static? && @field_2.is_a?(Data) && !@field_2.static?)
|
56
|
-
:filter
|
57
|
-
elsif @field_1 == @field_2
|
58
|
-
:self
|
59
|
-
elsif @field_1.dataset == @field_2.dataset
|
60
|
-
:cross
|
61
|
-
else
|
62
|
-
:dual
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
# @return [Symbol] name of the merged field type
|
67
|
-
def name
|
68
|
-
merged_field.name
|
69
|
-
end
|
70
|
-
|
71
|
-
# @return [Linkage::Field] result of Field#merge between the two fields
|
72
|
-
def merged_field
|
73
|
-
@merged_field ||= @field_1.merge(@field_2)
|
74
|
-
end
|
75
|
-
|
76
|
-
# @return [Boolean] Whether or not this expectation involves a field in
|
77
|
-
# the given dataset (Only useful for :filter expressions)
|
78
|
-
def applies_to?(dataset)
|
79
|
-
if kind == :filter
|
80
|
-
@filter_field.belongs_to?(dataset)
|
81
|
-
else
|
82
|
-
@field_1.belongs_to?(dataset) || @field_2.belongs_to?(dataset)
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
# Apply changes to a dataset based on the expectation, such as calling
|
87
|
-
# {Dataset#add_order}, {Dataset#add_select}, and {Dataset#add_filter}
|
88
|
-
# with the appropriate arguments.
|
89
|
-
def apply_to(dataset)
|
90
|
-
case kind
|
91
|
-
when :filter
|
92
|
-
if @filter_field.belongs_to?(dataset)
|
93
|
-
dataset.add_filter(@filter_field, @operator, @filter_value)
|
94
|
-
end
|
95
|
-
else
|
96
|
-
as =
|
97
|
-
if kind == :self
|
98
|
-
@field_1.is_a?(Function) ? @field_1.name : nil
|
99
|
-
else
|
100
|
-
name != @field_1.name ? name : nil
|
101
|
-
end
|
102
|
-
|
103
|
-
if @field_1.belongs_to?(dataset)
|
104
|
-
dataset.add_order(@field_1)
|
105
|
-
dataset.add_select(@field_1, as)
|
106
|
-
end
|
107
|
-
if @field_2.belongs_to?(dataset)
|
108
|
-
dataset.add_order(@field_2)
|
109
|
-
dataset.add_select(@field_2, as)
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
class MustExpectation < Expectation
|
116
|
-
end
|
117
|
-
|
118
|
-
class MustNotExpectation < Expectation
|
119
|
-
OPERATOR_OPPOSITES = {
|
120
|
-
:== => :'!=',
|
121
|
-
:'!=' => :==,
|
122
|
-
:> => :<=,
|
123
|
-
:<= => :>,
|
124
|
-
:< => :>=,
|
125
|
-
:>= => :<
|
126
|
-
}
|
127
|
-
|
128
|
-
# Same as Expectation, except it negates the operator.
|
129
|
-
def initialize(operator, field_1, field_2, force_kind = nil)
|
130
|
-
super(OPERATOR_OPPOSITES[operator], field_1, field_2, force_kind)
|
131
|
-
end
|
132
|
-
end
|
133
|
-
|
134
|
-
Expectation::TYPES = {
|
135
|
-
:must => MustExpectation,
|
136
|
-
:must_not => MustNotExpectation
|
137
|
-
}
|
138
|
-
end
|