linkage 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -1
- data/Gemfile.lock +19 -8
- data/README.markdown +8 -5
- data/Rakefile +0 -8
- data/VERSION +1 -1
- data/lib/linkage/configuration.rb +245 -157
- data/lib/linkage/data.rb +0 -24
- data/lib/linkage/dataset.rb +26 -183
- data/lib/linkage/field.rb +0 -3
- data/lib/linkage/field_set.rb +16 -0
- data/lib/linkage/function.rb +0 -7
- data/lib/linkage/result_set.rb +68 -0
- data/lib/linkage/runner/single_threaded.rb +29 -39
- data/lib/linkage/runner.rb +8 -36
- data/lib/linkage.rb +3 -1
- data/linkage.gemspec +14 -17
- data/test/helper.rb +1 -1
- data/test/integration/test_cross_linkage.rb +6 -2
- data/test/integration/test_dataset.rb +30 -0
- data/test/integration/test_dual_linkage.rb +9 -4
- data/test/integration/test_self_linkage.rb +23 -8
- data/test/unit/test_configuration.rb +90 -72
- data/test/unit/test_data.rb +0 -61
- data/test/unit/test_dataset.rb +19 -319
- data/test/unit/test_field.rb +0 -6
- data/test/unit/test_field_set.rb +31 -0
- data/test/unit/test_function.rb +6 -30
- data/test/unit/test_result_set.rb +18 -0
- data/test/unit/test_runner.rb +20 -5
- metadata +57 -41
- data/lib/linkage/expectation.rb +0 -138
- data/test/unit/test_expectation.rb +0 -390
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2012-02-28 00:00:00.000000000 -06:00
|
13
|
+
default_executable:
|
13
14
|
dependencies:
|
14
15
|
- !ruby/object:Gem::Dependency
|
15
16
|
name: sequel
|
16
|
-
requirement: &
|
17
|
+
requirement: &13934860 !ruby/object:Gem::Requirement
|
17
18
|
none: false
|
18
19
|
requirements:
|
19
20
|
- - ! '>='
|
@@ -21,10 +22,10 @@ dependencies:
|
|
21
22
|
version: '0'
|
22
23
|
type: :runtime
|
23
24
|
prerelease: false
|
24
|
-
version_requirements: *
|
25
|
+
version_requirements: *13934860
|
25
26
|
- !ruby/object:Gem::Dependency
|
26
27
|
name: bundler
|
27
|
-
requirement: &
|
28
|
+
requirement: &13932740 !ruby/object:Gem::Requirement
|
28
29
|
none: false
|
29
30
|
requirements:
|
30
31
|
- - ~>
|
@@ -32,10 +33,10 @@ dependencies:
|
|
32
33
|
version: 1.0.0
|
33
34
|
type: :development
|
34
35
|
prerelease: false
|
35
|
-
version_requirements: *
|
36
|
+
version_requirements: *13932740
|
36
37
|
- !ruby/object:Gem::Dependency
|
37
38
|
name: jeweler
|
38
|
-
requirement: &
|
39
|
+
requirement: &13929520 !ruby/object:Gem::Requirement
|
39
40
|
none: false
|
40
41
|
requirements:
|
41
42
|
- - ~>
|
@@ -43,21 +44,10 @@ dependencies:
|
|
43
44
|
version: 1.6.4
|
44
45
|
type: :development
|
45
46
|
prerelease: false
|
46
|
-
version_requirements: *
|
47
|
-
- !ruby/object:Gem::Dependency
|
48
|
-
name: rcov
|
49
|
-
requirement: &7137480 !ruby/object:Gem::Requirement
|
50
|
-
none: false
|
51
|
-
requirements:
|
52
|
-
- - ! '>='
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
type: :development
|
56
|
-
prerelease: false
|
57
|
-
version_requirements: *7137480
|
47
|
+
version_requirements: *13929520
|
58
48
|
- !ruby/object:Gem::Dependency
|
59
49
|
name: test-unit
|
60
|
-
requirement: &
|
50
|
+
requirement: &13266740 !ruby/object:Gem::Requirement
|
61
51
|
none: false
|
62
52
|
requirements:
|
63
53
|
- - =
|
@@ -65,10 +55,10 @@ dependencies:
|
|
65
55
|
version: 2.3.2
|
66
56
|
type: :development
|
67
57
|
prerelease: false
|
68
|
-
version_requirements: *
|
58
|
+
version_requirements: *13266740
|
69
59
|
- !ruby/object:Gem::Dependency
|
70
60
|
name: mocha
|
71
|
-
requirement: &
|
61
|
+
requirement: &13265420 !ruby/object:Gem::Requirement
|
72
62
|
none: false
|
73
63
|
requirements:
|
74
64
|
- - ! '>='
|
@@ -76,10 +66,10 @@ dependencies:
|
|
76
66
|
version: '0'
|
77
67
|
type: :development
|
78
68
|
prerelease: false
|
79
|
-
version_requirements: *
|
69
|
+
version_requirements: *13265420
|
80
70
|
- !ruby/object:Gem::Dependency
|
81
71
|
name: sqlite3
|
82
|
-
requirement: &
|
72
|
+
requirement: &13264380 !ruby/object:Gem::Requirement
|
83
73
|
none: false
|
84
74
|
requirements:
|
85
75
|
- - ! '>='
|
@@ -87,10 +77,10 @@ dependencies:
|
|
87
77
|
version: '0'
|
88
78
|
type: :development
|
89
79
|
prerelease: false
|
90
|
-
version_requirements: *
|
80
|
+
version_requirements: *13264380
|
91
81
|
- !ruby/object:Gem::Dependency
|
92
82
|
name: yard
|
93
|
-
requirement: &
|
83
|
+
requirement: &13262200 !ruby/object:Gem::Requirement
|
94
84
|
none: false
|
95
85
|
requirements:
|
96
86
|
- - ! '>='
|
@@ -98,10 +88,10 @@ dependencies:
|
|
98
88
|
version: '0'
|
99
89
|
type: :development
|
100
90
|
prerelease: false
|
101
|
-
version_requirements: *
|
91
|
+
version_requirements: *13262200
|
102
92
|
- !ruby/object:Gem::Dependency
|
103
93
|
name: rake
|
104
|
-
requirement: &
|
94
|
+
requirement: &13276680 !ruby/object:Gem::Requirement
|
105
95
|
none: false
|
106
96
|
requirements:
|
107
97
|
- - ! '>='
|
@@ -109,10 +99,10 @@ dependencies:
|
|
109
99
|
version: '0'
|
110
100
|
type: :development
|
111
101
|
prerelease: false
|
112
|
-
version_requirements: *
|
102
|
+
version_requirements: *13276680
|
113
103
|
- !ruby/object:Gem::Dependency
|
114
104
|
name: versionomy
|
115
|
-
requirement: &
|
105
|
+
requirement: &14225280 !ruby/object:Gem::Requirement
|
116
106
|
none: false
|
117
107
|
requirements:
|
118
108
|
- - ! '>='
|
@@ -120,10 +110,10 @@ dependencies:
|
|
120
110
|
version: '0'
|
121
111
|
type: :development
|
122
112
|
prerelease: false
|
123
|
-
version_requirements: *
|
113
|
+
version_requirements: *14225280
|
124
114
|
- !ruby/object:Gem::Dependency
|
125
115
|
name: mysql2
|
126
|
-
requirement: &
|
116
|
+
requirement: &14222420 !ruby/object:Gem::Requirement
|
127
117
|
none: false
|
128
118
|
requirements:
|
129
119
|
- - ! '>='
|
@@ -131,10 +121,10 @@ dependencies:
|
|
131
121
|
version: '0'
|
132
122
|
type: :development
|
133
123
|
prerelease: false
|
134
|
-
version_requirements: *
|
124
|
+
version_requirements: *14222420
|
135
125
|
- !ruby/object:Gem::Dependency
|
136
126
|
name: pry
|
137
|
-
requirement: &
|
127
|
+
requirement: &14220220 !ruby/object:Gem::Requirement
|
138
128
|
none: false
|
139
129
|
requirements:
|
140
130
|
- - ! '>='
|
@@ -142,10 +132,32 @@ dependencies:
|
|
142
132
|
version: '0'
|
143
133
|
type: :development
|
144
134
|
prerelease: false
|
145
|
-
version_requirements: *
|
135
|
+
version_requirements: *14220220
|
146
136
|
- !ruby/object:Gem::Dependency
|
147
137
|
name: rdiscount
|
148
|
-
requirement: &
|
138
|
+
requirement: &14218900 !ruby/object:Gem::Requirement
|
139
|
+
none: false
|
140
|
+
requirements:
|
141
|
+
- - ! '>='
|
142
|
+
- !ruby/object:Gem::Version
|
143
|
+
version: '0'
|
144
|
+
type: :development
|
145
|
+
prerelease: false
|
146
|
+
version_requirements: *14218900
|
147
|
+
- !ruby/object:Gem::Dependency
|
148
|
+
name: guard-test
|
149
|
+
requirement: &14217760 !ruby/object:Gem::Requirement
|
150
|
+
none: false
|
151
|
+
requirements:
|
152
|
+
- - ! '>='
|
153
|
+
- !ruby/object:Gem::Version
|
154
|
+
version: '0'
|
155
|
+
type: :development
|
156
|
+
prerelease: false
|
157
|
+
version_requirements: *14217760
|
158
|
+
- !ruby/object:Gem::Dependency
|
159
|
+
name: guard-yard
|
160
|
+
requirement: &14230100 !ruby/object:Gem::Requirement
|
149
161
|
none: false
|
150
162
|
requirements:
|
151
163
|
- - ! '>='
|
@@ -153,7 +165,7 @@ dependencies:
|
|
153
165
|
version: '0'
|
154
166
|
type: :development
|
155
167
|
prerelease: false
|
156
|
-
version_requirements: *
|
168
|
+
version_requirements: *14230100
|
157
169
|
description: Wraps Sequel to perform record linkage between one or two datasets
|
158
170
|
email: jeremy.f.stephens@vanderbilt.edu
|
159
171
|
executables: []
|
@@ -175,12 +187,13 @@ files:
|
|
175
187
|
- lib/linkage/configuration.rb
|
176
188
|
- lib/linkage/data.rb
|
177
189
|
- lib/linkage/dataset.rb
|
178
|
-
- lib/linkage/expectation.rb
|
179
190
|
- lib/linkage/field.rb
|
191
|
+
- lib/linkage/field_set.rb
|
180
192
|
- lib/linkage/function.rb
|
181
193
|
- lib/linkage/functions/trim.rb
|
182
194
|
- lib/linkage/group.rb
|
183
195
|
- lib/linkage/import_buffer.rb
|
196
|
+
- lib/linkage/result_set.rb
|
184
197
|
- lib/linkage/runner.rb
|
185
198
|
- lib/linkage/runner/single_threaded.rb
|
186
199
|
- lib/linkage/utils.rb
|
@@ -189,6 +202,7 @@ files:
|
|
189
202
|
- test/config.yml
|
190
203
|
- test/helper.rb
|
191
204
|
- test/integration/test_cross_linkage.rb
|
205
|
+
- test/integration/test_dataset.rb
|
192
206
|
- test/integration/test_dual_linkage.rb
|
193
207
|
- test/integration/test_self_linkage.rb
|
194
208
|
- test/unit/functions/test_trim.rb
|
@@ -196,14 +210,16 @@ files:
|
|
196
210
|
- test/unit/test_configuration.rb
|
197
211
|
- test/unit/test_data.rb
|
198
212
|
- test/unit/test_dataset.rb
|
199
|
-
- test/unit/test_expectation.rb
|
200
213
|
- test/unit/test_field.rb
|
214
|
+
- test/unit/test_field_set.rb
|
201
215
|
- test/unit/test_function.rb
|
202
216
|
- test/unit/test_group.rb
|
203
217
|
- test/unit/test_import_buffer.rb
|
204
218
|
- test/unit/test_linkage.rb
|
219
|
+
- test/unit/test_result_set.rb
|
205
220
|
- test/unit/test_runner.rb
|
206
221
|
- test/unit/test_utils.rb
|
222
|
+
has_rdoc: true
|
207
223
|
homepage: http://github.com/coupler/linkage
|
208
224
|
licenses:
|
209
225
|
- MIT
|
@@ -219,7 +235,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
219
235
|
version: '0'
|
220
236
|
segments:
|
221
237
|
- 0
|
222
|
-
hash:
|
238
|
+
hash: 3554989941562530888
|
223
239
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
224
240
|
none: false
|
225
241
|
requirements:
|
@@ -228,7 +244,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
228
244
|
version: '0'
|
229
245
|
requirements: []
|
230
246
|
rubyforge_project:
|
231
|
-
rubygems_version: 1.
|
247
|
+
rubygems_version: 1.3.9.4
|
232
248
|
signing_key:
|
233
249
|
specification_version: 3
|
234
250
|
summary: Sequel-based record linkage
|
data/lib/linkage/expectation.rb
DELETED
@@ -1,138 +0,0 @@
|
|
1
|
-
module Linkage
|
2
|
-
class Expectation
|
3
|
-
VALID_OPERATORS = [:==, :>, :<, :>=, :<=, :'!=']
|
4
|
-
|
5
|
-
def self.get(type)
|
6
|
-
TYPES[type]
|
7
|
-
end
|
8
|
-
|
9
|
-
attr_reader :operator, :field_1, :field_2
|
10
|
-
|
11
|
-
# @param [Symbol] operator Currently, only :==
|
12
|
-
# @param [Linkage::Field, Linkage::Function, Object] field_1
|
13
|
-
# @param [Linkage::Field, Linkage::Function, Object] field_2
|
14
|
-
# @param [Symbol] force_kind Manually set type of expectation (useful for
|
15
|
-
# a filter between two fields)
|
16
|
-
def initialize(operator, field_1, field_2, force_kind = nil)
|
17
|
-
if !((field_1.kind_of?(Data) && !field_1.static?) || (field_2.kind_of?(Data) && !field_2.static?))
|
18
|
-
raise ArgumentError, "You must have at least one data source (Linkage::Field or Linkage::Function)"
|
19
|
-
end
|
20
|
-
|
21
|
-
if !VALID_OPERATORS.include?(operator)
|
22
|
-
raise ArgumentError, "Invalid operator: #{operator.inspect}"
|
23
|
-
end
|
24
|
-
|
25
|
-
@operator = operator
|
26
|
-
@field_1 = field_1
|
27
|
-
@field_2 = field_2
|
28
|
-
@kind = force_kind
|
29
|
-
|
30
|
-
if kind == :filter
|
31
|
-
if @field_1.is_a?(Field)
|
32
|
-
@filter_field = @field_1
|
33
|
-
@filter_value = @field_2
|
34
|
-
else
|
35
|
-
@filter_field = @field_2
|
36
|
-
@filter_value = @field_1
|
37
|
-
end
|
38
|
-
elsif @operator != :==
|
39
|
-
raise ArgumentError, "Inequality operators are not allowed for non-filter expectations"
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
def ==(other)
|
44
|
-
if other.is_a?(Expectation)
|
45
|
-
@operator == other.operator && @field_1 == other.field_1 &&
|
46
|
-
@field_2 == other.field_2
|
47
|
-
else
|
48
|
-
super
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
# @return [Symbol] :self, :dual, :cross, or :filter
|
53
|
-
def kind
|
54
|
-
@kind ||=
|
55
|
-
if !(@field_1.is_a?(Data) && !@field_1.static? && @field_2.is_a?(Data) && !@field_2.static?)
|
56
|
-
:filter
|
57
|
-
elsif @field_1 == @field_2
|
58
|
-
:self
|
59
|
-
elsif @field_1.dataset == @field_2.dataset
|
60
|
-
:cross
|
61
|
-
else
|
62
|
-
:dual
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
# @return [Symbol] name of the merged field type
|
67
|
-
def name
|
68
|
-
merged_field.name
|
69
|
-
end
|
70
|
-
|
71
|
-
# @return [Linkage::Field] result of Field#merge between the two fields
|
72
|
-
def merged_field
|
73
|
-
@merged_field ||= @field_1.merge(@field_2)
|
74
|
-
end
|
75
|
-
|
76
|
-
# @return [Boolean] Whether or not this expectation involves a field in
|
77
|
-
# the given dataset (Only useful for :filter expressions)
|
78
|
-
def applies_to?(dataset)
|
79
|
-
if kind == :filter
|
80
|
-
@filter_field.belongs_to?(dataset)
|
81
|
-
else
|
82
|
-
@field_1.belongs_to?(dataset) || @field_2.belongs_to?(dataset)
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
# Apply changes to a dataset based on the expectation, such as calling
|
87
|
-
# {Dataset#add_order}, {Dataset#add_select}, and {Dataset#add_filter}
|
88
|
-
# with the appropriate arguments.
|
89
|
-
def apply_to(dataset)
|
90
|
-
case kind
|
91
|
-
when :filter
|
92
|
-
if @filter_field.belongs_to?(dataset)
|
93
|
-
dataset.add_filter(@filter_field, @operator, @filter_value)
|
94
|
-
end
|
95
|
-
else
|
96
|
-
as =
|
97
|
-
if kind == :self
|
98
|
-
@field_1.is_a?(Function) ? @field_1.name : nil
|
99
|
-
else
|
100
|
-
name != @field_1.name ? name : nil
|
101
|
-
end
|
102
|
-
|
103
|
-
if @field_1.belongs_to?(dataset)
|
104
|
-
dataset.add_order(@field_1)
|
105
|
-
dataset.add_select(@field_1, as)
|
106
|
-
end
|
107
|
-
if @field_2.belongs_to?(dataset)
|
108
|
-
dataset.add_order(@field_2)
|
109
|
-
dataset.add_select(@field_2, as)
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
class MustExpectation < Expectation
|
116
|
-
end
|
117
|
-
|
118
|
-
class MustNotExpectation < Expectation
|
119
|
-
OPERATOR_OPPOSITES = {
|
120
|
-
:== => :'!=',
|
121
|
-
:'!=' => :==,
|
122
|
-
:> => :<=,
|
123
|
-
:<= => :>,
|
124
|
-
:< => :>=,
|
125
|
-
:>= => :<
|
126
|
-
}
|
127
|
-
|
128
|
-
# Same as Expectation, except it negates the operator.
|
129
|
-
def initialize(operator, field_1, field_2, force_kind = nil)
|
130
|
-
super(OPERATOR_OPPOSITES[operator], field_1, field_2, force_kind)
|
131
|
-
end
|
132
|
-
end
|
133
|
-
|
134
|
-
Expectation::TYPES = {
|
135
|
-
:must => MustExpectation,
|
136
|
-
:must_not => MustNotExpectation
|
137
|
-
}
|
138
|
-
end
|