linkage 0.0.6 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +10 -0
- data/Gemfile +15 -13
- data/Gemfile.lock +67 -37
- data/Guardfile +0 -2
- data/Rakefile +122 -25
- data/lib/linkage/comparator.rb +172 -0
- data/lib/linkage/comparators/binary.rb +12 -0
- data/lib/linkage/comparators/compare.rb +46 -0
- data/lib/linkage/comparators/within.rb +32 -0
- data/lib/linkage/configuration.rb +285 -153
- data/lib/linkage/data.rb +32 -7
- data/lib/linkage/dataset.rb +107 -32
- data/lib/linkage/decollation.rb +93 -0
- data/lib/linkage/expectation.rb +21 -0
- data/lib/linkage/expectations/exhaustive.rb +63 -0
- data/lib/linkage/expectations/simple.rb +168 -0
- data/lib/linkage/field.rb +30 -4
- data/lib/linkage/field_set.rb +6 -3
- data/lib/linkage/function.rb +50 -3
- data/lib/linkage/functions/binary.rb +30 -0
- data/lib/linkage/functions/cast.rb +54 -0
- data/lib/linkage/functions/length.rb +29 -0
- data/lib/linkage/functions/strftime.rb +12 -11
- data/lib/linkage/functions/trim.rb +8 -0
- data/lib/linkage/group.rb +20 -0
- data/lib/linkage/import_buffer.rb +5 -16
- data/lib/linkage/meta_object.rb +139 -0
- data/lib/linkage/result_set.rb +74 -17
- data/lib/linkage/runner/single_threaded.rb +125 -10
- data/lib/linkage/version.rb +3 -0
- data/lib/linkage.rb +11 -0
- data/linkage.gemspec +16 -121
- data/test/config.yml +5 -0
- data/test/helper.rb +73 -8
- data/test/integration/test_collation.rb +45 -0
- data/test/integration/test_configuration.rb +268 -0
- data/test/integration/test_cross_linkage.rb +4 -17
- data/test/integration/test_dataset.rb +45 -2
- data/test/integration/test_dual_linkage.rb +40 -24
- data/test/integration/test_functions.rb +22 -0
- data/test/integration/test_result_set.rb +85 -0
- data/test/integration/test_scoring.rb +84 -0
- data/test/integration/test_self_linkage.rb +5 -0
- data/test/integration/test_within_comparator.rb +100 -0
- data/test/unit/comparators/test_compare.rb +105 -0
- data/test/unit/comparators/test_within.rb +57 -0
- data/test/unit/expectations/test_exhaustive.rb +111 -0
- data/test/unit/expectations/test_simple.rb +303 -0
- data/test/unit/functions/test_binary.rb +54 -0
- data/test/unit/functions/test_cast.rb +98 -0
- data/test/unit/functions/test_length.rb +52 -0
- data/test/unit/functions/test_strftime.rb +17 -13
- data/test/unit/functions/test_trim.rb +11 -4
- data/test/unit/test_comparator.rb +124 -0
- data/test/unit/test_configuration.rb +137 -175
- data/test/unit/test_data.rb +44 -0
- data/test/unit/test_dataset.rb +73 -21
- data/test/unit/test_decollation.rb +201 -0
- data/test/unit/test_field.rb +38 -14
- data/test/unit/test_field_set.rb +12 -8
- data/test/unit/test_function.rb +83 -16
- data/test/unit/test_group.rb +28 -0
- data/test/unit/test_import_buffer.rb +13 -27
- data/test/unit/test_meta_object.rb +208 -0
- data/test/unit/test_result_set.rb +221 -3
- metadata +82 -190
@@ -1,7 +1,6 @@
|
|
1
1
|
module Linkage
|
2
2
|
class Configuration
|
3
3
|
class DSL
|
4
|
-
|
5
4
|
# Class for visually comparing matched records
|
6
5
|
class VisualComparisonWrapper
|
7
6
|
attr_reader :dsl, :lhs, :rhs
|
@@ -33,119 +32,97 @@ module Linkage
|
|
33
32
|
:>= => :<
|
34
33
|
}
|
35
34
|
|
36
|
-
|
37
|
-
|
38
|
-
def initialize(dsl, type, lhs)
|
35
|
+
def initialize(dsl, type, lhs, *args)
|
39
36
|
@dsl = dsl
|
40
37
|
@type = type
|
41
38
|
@lhs = lhs
|
42
|
-
@rhs = nil
|
43
|
-
@side = nil
|
44
|
-
@kind = nil
|
45
39
|
end
|
46
40
|
|
47
|
-
|
48
|
-
|
49
|
-
# NOTE: lhs is always a DataWrapper
|
50
|
-
|
51
|
-
@rhs = rhs
|
52
|
-
if !@rhs.is_a?(DataWrapper) || @lhs.static? || @rhs.static? || @lhs.side == @rhs.side
|
53
|
-
@side = @lhs.side
|
54
|
-
@side = @rhs.side if @side.nil? && @rhs.is_a?(DataWrapper)
|
55
|
-
@kind = :filter
|
56
|
-
elsif @lhs.same_except_side?(@rhs)
|
57
|
-
@kind = :self
|
58
|
-
elsif @lhs.dataset == @rhs.dataset
|
59
|
-
@kind = :cross
|
60
|
-
else
|
61
|
-
@kind = :dual
|
62
|
-
end
|
63
|
-
@operator = @type == :must_not ? OPERATOR_OPPOSITES[operator] : operator
|
64
|
-
@dsl.add_expectation(self)
|
65
|
-
end
|
66
|
-
end
|
41
|
+
def compare_with(operator, rhs)
|
42
|
+
# NOTE: lhs is always a DataWrapper
|
67
43
|
|
68
|
-
|
69
|
-
|
70
|
-
end
|
44
|
+
if !rhs.is_a?(DataWrapper) || @lhs.static? || rhs.static? || @lhs.side == rhs.side
|
45
|
+
@side = !@lhs.static? ? @lhs.side : rhs.side
|
71
46
|
|
72
|
-
|
73
|
-
|
74
|
-
if @lhs.is_a?(
|
75
|
-
|
76
|
-
|
77
|
-
elsif @
|
78
|
-
|
79
|
-
|
80
|
-
else
|
81
|
-
raise "Wonky filter"
|
47
|
+
# If one of the objects in this comparison is a static function, we need to set the side
|
48
|
+
# and the dataset based on the other object
|
49
|
+
if rhs.is_a?(DataWrapper) && !rhs.static? && @lhs.is_a?(FunctionWrapper) && @lhs.static?
|
50
|
+
@lhs.dataset = rhs.dataset
|
51
|
+
@lhs.side = @side
|
52
|
+
elsif @lhs.is_a?(DataWrapper) && !@lhs.static? && rhs.is_a?(FunctionWrapper) && rhs.static?
|
53
|
+
rhs.dataset = @lhs.dataset
|
54
|
+
rhs.side = @side
|
82
55
|
end
|
56
|
+
elsif rhs.is_a?(DataWrapper) && operator != :==
|
57
|
+
# create an exhaustive expectation with the Compare comparator instead
|
58
|
+
comparator = Comparators::Compare.new(@lhs.meta_object,
|
59
|
+
MetaObject.new(operator.to_s), rhs.meta_object)
|
83
60
|
|
84
|
-
|
85
|
-
|
86
|
-
@filter_expr =
|
87
|
-
case @operator
|
88
|
-
when :==
|
89
|
-
{ arg1 => arg2 }
|
90
|
-
when :'!='
|
91
|
-
~{ arg1 => arg2 }
|
92
|
-
else
|
93
|
-
arg1 = Sequel::SQL::Identifier.new(arg1)
|
94
|
-
arg2 = arg2.is_a?(Symbol) ? Sequel::SQL::Identifier.new(arg2) : arg2
|
95
|
-
Sequel::SQL::BooleanExpression.new(@operator, arg1, arg2)
|
96
|
-
end
|
97
|
-
end
|
98
|
-
@filter_expr
|
99
|
-
end
|
61
|
+
score_range = Comparators::Compare.score_range
|
62
|
+
threshold = @type == :must ? score_range.last : score_range.first
|
100
63
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
return dataset.filter(filter_expr)
|
105
|
-
else
|
106
|
-
# Doesn't apply
|
107
|
-
return dataset
|
108
|
-
end
|
64
|
+
expectation = Expectations::Exhaustive.new(comparator, threshold, :equal)
|
65
|
+
@dsl.add_exhaustive_expectation(expectation)
|
66
|
+
return self
|
109
67
|
end
|
110
68
|
|
111
|
-
|
112
|
-
target = @lhs
|
113
|
-
elsif @rhs.is_a?(DataWrapper) && @rhs.side == side
|
114
|
-
target = @rhs
|
115
|
-
else
|
116
|
-
raise "Wonky expectation"
|
117
|
-
end
|
69
|
+
exp_operator = @type == :must_not ? OPERATOR_OPPOSITES[operator] : operator
|
118
70
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
71
|
+
rhs_meta_object = rhs.is_a?(DataWrapper) ? rhs.meta_object : MetaObject.new(rhs)
|
72
|
+
@expectation = Expectations::Simple.create(@lhs.meta_object,
|
73
|
+
rhs_meta_object, exp_operator)
|
74
|
+
@dsl.add_simple_expectation(@expectation)
|
75
|
+
self
|
76
|
+
end
|
124
77
|
|
125
|
-
|
78
|
+
VALID_OPERATORS.each do |operator|
|
79
|
+
define_method(operator) do |rhs|
|
80
|
+
compare_with(operator, rhs)
|
81
|
+
end
|
126
82
|
end
|
127
83
|
|
128
|
-
def
|
129
|
-
|
84
|
+
def exactly
|
85
|
+
if !@exact_match
|
86
|
+
@expectation.exactly!
|
87
|
+
end
|
130
88
|
end
|
131
89
|
end
|
132
90
|
|
133
91
|
class DataWrapper
|
134
|
-
attr_reader :
|
92
|
+
attr_reader :meta_object
|
135
93
|
|
136
94
|
def initialize
|
137
95
|
raise NotImplementedError
|
138
96
|
end
|
139
97
|
|
140
98
|
[:must, :must_not].each do |type|
|
141
|
-
define_method(type) do
|
142
|
-
|
99
|
+
define_method(type) do |*args|
|
100
|
+
if args.length > 0
|
101
|
+
wrapper = args[0]
|
102
|
+
comparator = wrapper.to_comparator(self)
|
103
|
+
|
104
|
+
score_range = wrapper.klass.score_range
|
105
|
+
threshold = type == :must ? score_range.last : score_range.first
|
106
|
+
|
107
|
+
expectation = Expectations::Exhaustive.new(comparator, threshold, :equal)
|
108
|
+
@dsl.add_exhaustive_expectation(expectation)
|
109
|
+
else
|
110
|
+
ExpectationWrapper.new(@dsl, type, self)
|
111
|
+
end
|
143
112
|
end
|
144
113
|
end
|
145
114
|
|
146
115
|
def compare_with(other)
|
147
116
|
VisualComparisonWrapper.new(@dsl, self, other)
|
148
117
|
end
|
118
|
+
|
119
|
+
def method_missing(m, *args, &block)
|
120
|
+
if meta_object.respond_to?(m)
|
121
|
+
meta_object.send(m, *args, &block)
|
122
|
+
else
|
123
|
+
super(m, *args, &block)
|
124
|
+
end
|
125
|
+
end
|
149
126
|
end
|
150
127
|
|
151
128
|
class FieldWrapper < DataWrapper
|
@@ -153,80 +130,51 @@ module Linkage
|
|
153
130
|
|
154
131
|
def initialize(dsl, side, dataset, name)
|
155
132
|
@dsl = dsl
|
156
|
-
@
|
157
|
-
@dataset = dataset
|
158
|
-
@name = name
|
159
|
-
end
|
160
|
-
|
161
|
-
def static?
|
162
|
-
false
|
163
|
-
end
|
164
|
-
|
165
|
-
def same_except_side?(other)
|
166
|
-
other.is_a?(FieldWrapper) && name == other.name
|
167
|
-
end
|
168
|
-
|
169
|
-
def data
|
170
|
-
@dataset.field_set[@name]
|
171
|
-
end
|
172
|
-
|
173
|
-
def to_expr(side = nil)
|
174
|
-
data.to_expr
|
133
|
+
@meta_object = MetaObject.new(dataset.field_set[name], side)
|
175
134
|
end
|
176
135
|
end
|
177
136
|
|
178
137
|
class FunctionWrapper < DataWrapper
|
179
|
-
attr_reader :klass, :args
|
180
|
-
|
181
138
|
def initialize(dsl, klass, args)
|
182
139
|
@dsl = dsl
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
140
|
+
|
141
|
+
side = dataset = nil
|
142
|
+
static = true
|
143
|
+
function_args = []
|
187
144
|
args.each do |arg|
|
188
145
|
if arg.kind_of?(DataWrapper)
|
189
|
-
raise "conflicting sides" if
|
190
|
-
|
191
|
-
|
146
|
+
raise "conflicting sides" if side && side != arg.side
|
147
|
+
side = arg.side
|
148
|
+
static &&= arg.static?
|
149
|
+
dataset = arg.dataset
|
150
|
+
function_args << arg.object
|
151
|
+
else
|
152
|
+
function_args << arg
|
192
153
|
end
|
193
154
|
end
|
155
|
+
@meta_object = MetaObject.new(klass.new(*function_args), side)
|
194
156
|
end
|
157
|
+
end
|
195
158
|
|
196
|
-
|
197
|
-
|
198
|
-
end
|
199
|
-
|
200
|
-
def to_expr(side)
|
201
|
-
dataset = side == :lhs ? @dsl.lhs : @dsl.rhs
|
202
|
-
data.to_expr(dataset.dataset.adapter_scheme)
|
203
|
-
end
|
159
|
+
class ComparatorWrapper
|
160
|
+
attr_reader :klass, :args
|
204
161
|
|
205
|
-
def
|
206
|
-
|
162
|
+
def initialize(dsl, klass, args)
|
163
|
+
@dsl = dsl
|
164
|
+
@klass = klass
|
165
|
+
@args = args
|
207
166
|
end
|
208
167
|
|
209
|
-
def
|
210
|
-
@
|
168
|
+
def of(*args)
|
169
|
+
@args.push(*args)
|
170
|
+
self
|
211
171
|
end
|
212
172
|
|
213
|
-
def
|
214
|
-
|
215
|
-
|
216
|
-
other_arg = other.args[i]
|
217
|
-
if arg.is_a?(DataWrapper) && other_arg.is_a?(DataWrapper)
|
218
|
-
if !arg.same_except_side?(other_arg)
|
219
|
-
return false
|
220
|
-
end
|
221
|
-
else
|
222
|
-
if arg != other_arg
|
223
|
-
return false
|
224
|
-
end
|
225
|
-
end
|
226
|
-
end
|
227
|
-
return true
|
173
|
+
def to_comparator(receiver)
|
174
|
+
comparator_args = ([receiver] + @args).collect do |arg|
|
175
|
+
arg.is_a?(DataWrapper) ? arg.meta_object : MetaObject.new(arg)
|
228
176
|
end
|
229
|
-
|
177
|
+
comparator = klass.new(*comparator_args)
|
230
178
|
end
|
231
179
|
end
|
232
180
|
|
@@ -268,8 +216,12 @@ module Linkage
|
|
268
216
|
@config.results_uri_options = options
|
269
217
|
end
|
270
218
|
|
271
|
-
def
|
272
|
-
@config.
|
219
|
+
def set_record_cache_size(num)
|
220
|
+
@config.record_cache_size = num
|
221
|
+
end
|
222
|
+
|
223
|
+
def add_simple_expectation(expectation)
|
224
|
+
@config.add_simple_expectation(expectation)
|
273
225
|
|
274
226
|
if @config.linkage_type == :self
|
275
227
|
case expectation.kind
|
@@ -288,7 +240,7 @@ module Linkage
|
|
288
240
|
|
289
241
|
these_filters << expectation
|
290
242
|
other_filters.each do |other|
|
291
|
-
if !expectation.
|
243
|
+
if !expectation.same_except_side?(other)
|
292
244
|
@config.linkage_type = :cross
|
293
245
|
break
|
294
246
|
end
|
@@ -297,36 +249,98 @@ module Linkage
|
|
297
249
|
end
|
298
250
|
end
|
299
251
|
|
252
|
+
def add_exhaustive_expectation(expectation)
|
253
|
+
@config.add_exhaustive_expectation(expectation)
|
254
|
+
if @config.linkage_type == :self
|
255
|
+
@config.linkage_type = expectation.kind
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
300
259
|
def add_visual_comparison(visual_comparison)
|
301
260
|
@config.visual_comparisons << visual_comparison
|
302
261
|
end
|
303
262
|
|
304
|
-
|
263
|
+
def groups_table_name(new_name)
|
264
|
+
@config.groups_table_name = new_name
|
265
|
+
end
|
266
|
+
|
267
|
+
def original_groups_table_name(new_name)
|
268
|
+
@config.original_groups_table_name = new_name
|
269
|
+
end
|
270
|
+
|
271
|
+
def scores_table_name(new_name)
|
272
|
+
@config.scores_table_name = new_name
|
273
|
+
end
|
274
|
+
|
275
|
+
def matches_table_name(new_name)
|
276
|
+
@config.matches_table_name = new_name
|
277
|
+
end
|
278
|
+
|
305
279
|
def method_missing(name, *args, &block)
|
306
|
-
|
307
|
-
|
308
|
-
|
280
|
+
# check for comparators
|
281
|
+
md = name.to_s.match(/^be_(.+)$/)
|
282
|
+
if md
|
283
|
+
klass = Comparator[md[1]]
|
284
|
+
if klass
|
285
|
+
ComparatorWrapper.new(self, klass, args)
|
286
|
+
else
|
287
|
+
super
|
288
|
+
end
|
309
289
|
else
|
310
|
-
|
290
|
+
# check for functions
|
291
|
+
klass = Function[name.to_s]
|
292
|
+
if klass
|
293
|
+
FunctionWrapper.new(self, klass, args)
|
294
|
+
else
|
295
|
+
super
|
296
|
+
end
|
311
297
|
end
|
312
298
|
end
|
313
299
|
end
|
314
300
|
|
315
|
-
attr_reader :dataset_1, :dataset_2, :
|
316
|
-
|
301
|
+
attr_reader :dataset_1, :dataset_2, :simple_expectations,
|
302
|
+
:exhaustive_expectations, :visual_comparisons
|
303
|
+
attr_accessor :linkage_type, :results_uri, :results_uri_options,
|
304
|
+
:record_cache_size, :groups_table_name, :original_groups_table_name,
|
305
|
+
:scores_table_name, :matches_table_name
|
317
306
|
|
318
307
|
def initialize(dataset_1, dataset_2)
|
319
308
|
@dataset_1 = dataset_1
|
320
309
|
@dataset_2 = dataset_2
|
321
310
|
@linkage_type = dataset_1 == dataset_2 ? :self : :dual
|
322
|
-
@
|
311
|
+
@simple_expectations = []
|
312
|
+
@exhaustive_expectations = []
|
323
313
|
@visual_comparisons = []
|
314
|
+
@results_uri_options = {}
|
315
|
+
@decollation_needed = false
|
316
|
+
@record_cache_size = 10_000
|
317
|
+
@groups_table_name = :groups
|
318
|
+
@original_groups_table_name = :original_groups
|
319
|
+
@scores_table_name = :scores
|
320
|
+
@matches_table_name = :matches
|
324
321
|
end
|
325
322
|
|
326
323
|
def configure(&block)
|
327
324
|
DSL.new(self, &block)
|
328
325
|
end
|
329
326
|
|
327
|
+
def results_uri=(uri)
|
328
|
+
@results_uri = uri
|
329
|
+
if !@decollation_needed
|
330
|
+
@simple_expectations.each do |expectation|
|
331
|
+
if decollation_needed_for_simple_expectation?(expectation)
|
332
|
+
@decollation_needed = true
|
333
|
+
break
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|
337
|
+
uri
|
338
|
+
end
|
339
|
+
|
340
|
+
def decollation_needed?
|
341
|
+
@decollation_needed
|
342
|
+
end
|
343
|
+
|
330
344
|
def groups_table_schema
|
331
345
|
schema = []
|
332
346
|
|
@@ -334,29 +348,147 @@ module Linkage
|
|
334
348
|
schema << [:id, Integer, {:primary_key => true}]
|
335
349
|
|
336
350
|
# add values
|
337
|
-
@
|
351
|
+
@simple_expectations.each do |exp|
|
338
352
|
next if exp.kind == :filter
|
339
353
|
|
340
354
|
merged_field = exp.merged_field
|
341
355
|
merged_type = merged_field.ruby_type
|
342
|
-
|
356
|
+
|
357
|
+
# if the merged field's database type is different than the result
|
358
|
+
# database, strip collation information
|
359
|
+
result_db_type = nil
|
360
|
+
result_set.database do |db|
|
361
|
+
result_db_type = db.database_type
|
362
|
+
end
|
363
|
+
if merged_field.database_type != result_db_type && merged_type.has_key?(:opts)
|
364
|
+
new_opts = merged_type[:opts].reject { |k, v| k == :collate }
|
365
|
+
merged_type = merged_type.merge(:opts => new_opts)
|
366
|
+
end
|
367
|
+
|
368
|
+
col = [merged_field.name, merged_type[:type], merged_type[:opts] || {}]
|
369
|
+
schema << col
|
343
370
|
end
|
344
371
|
|
345
372
|
schema
|
346
373
|
end
|
347
374
|
|
375
|
+
def scores_table_schema
|
376
|
+
schema = []
|
377
|
+
|
378
|
+
# add id
|
379
|
+
schema << [:id, Integer, {:primary_key => true}]
|
380
|
+
|
381
|
+
# add comparator id
|
382
|
+
schema << [:comparator_id, Integer, {}]
|
383
|
+
|
384
|
+
# add record ids
|
385
|
+
pk = dataset_1.field_set.primary_key
|
386
|
+
ruby_type = pk.ruby_type
|
387
|
+
schema << [:record_1_id, ruby_type[:type], ruby_type[:opts] || {}]
|
388
|
+
|
389
|
+
pk = dataset_2.field_set.primary_key
|
390
|
+
ruby_type = pk.ruby_type
|
391
|
+
schema << [:record_2_id, ruby_type[:type], ruby_type[:opts] || {}]
|
392
|
+
|
393
|
+
# add score
|
394
|
+
schema << [:score, Integer, {}]
|
395
|
+
|
396
|
+
schema
|
397
|
+
end
|
398
|
+
|
399
|
+
def matches_table_schema
|
400
|
+
schema = []
|
401
|
+
|
402
|
+
# add id
|
403
|
+
schema << [:id, Integer, {:primary_key => true}]
|
404
|
+
|
405
|
+
# add record ids
|
406
|
+
pk = dataset_1.field_set.primary_key
|
407
|
+
ruby_type = pk.ruby_type
|
408
|
+
schema << [:record_1_id, ruby_type[:type], ruby_type[:opts] || {}]
|
409
|
+
|
410
|
+
pk = dataset_2.field_set.primary_key
|
411
|
+
ruby_type = pk.ruby_type
|
412
|
+
schema << [:record_2_id, ruby_type[:type], ruby_type[:opts] || {}]
|
413
|
+
|
414
|
+
# add score
|
415
|
+
schema << [:total_score, Integer, {}]
|
416
|
+
|
417
|
+
schema
|
418
|
+
end
|
419
|
+
|
420
|
+
def add_simple_expectation(expectation)
|
421
|
+
@simple_expectations << expectation
|
422
|
+
@decollation_needed ||= decollation_needed_for_simple_expectation?(expectation)
|
423
|
+
expectation
|
424
|
+
end
|
425
|
+
|
426
|
+
def add_exhaustive_expectation(expectation)
|
427
|
+
@exhaustive_expectations << expectation
|
428
|
+
expectation
|
429
|
+
end
|
430
|
+
|
348
431
|
def result_set
|
349
432
|
@result_set ||= ResultSet.new(self)
|
350
433
|
end
|
351
434
|
|
352
|
-
def
|
435
|
+
def datasets_with_applied_simple_expectations
|
353
436
|
dataset_1 = @dataset_1
|
354
437
|
dataset_2 = @dataset_2
|
355
|
-
@
|
438
|
+
@simple_expectations.each do |exp|
|
356
439
|
dataset_1 = exp.apply_to(dataset_1, :lhs)
|
357
440
|
dataset_2 = exp.apply_to(dataset_2, :rhs) if @linkage_type != :self
|
358
441
|
end
|
359
442
|
@linkage_type == :self ? [dataset_1, dataset_1] : [dataset_1, dataset_2]
|
360
443
|
end
|
444
|
+
|
445
|
+
def datasets_with_applied_exhaustive_expectations
|
446
|
+
apply_exhaustive_expectations(@dataset_1, @dataset_2)
|
447
|
+
end
|
448
|
+
|
449
|
+
def apply_exhaustive_expectations(dataset_1, dataset_2)
|
450
|
+
dataset_1 = dataset_1.select(dataset_1.field_set.primary_key.to_expr)
|
451
|
+
dataset_2 = dataset_2.select(dataset_2.field_set.primary_key.to_expr)
|
452
|
+
@exhaustive_expectations.each do |exp|
|
453
|
+
dataset_1 = exp.apply_to(dataset_1, :lhs)
|
454
|
+
dataset_2 = exp.apply_to(dataset_2, :rhs)
|
455
|
+
end
|
456
|
+
[dataset_1, dataset_2]
|
457
|
+
end
|
458
|
+
|
459
|
+
def groups_table_needed?
|
460
|
+
has_simple_expectations?
|
461
|
+
end
|
462
|
+
|
463
|
+
def scores_table_needed?
|
464
|
+
has_exhaustive_expectations?
|
465
|
+
end
|
466
|
+
|
467
|
+
def has_simple_expectations?
|
468
|
+
!@simple_expectations.empty?
|
469
|
+
end
|
470
|
+
|
471
|
+
def has_exhaustive_expectations?
|
472
|
+
!@exhaustive_expectations.empty?
|
473
|
+
end
|
474
|
+
|
475
|
+
private
|
476
|
+
|
477
|
+
def decollation_needed_for_simple_expectation?(expectation)
|
478
|
+
if expectation.decollation_needed?
|
479
|
+
true
|
480
|
+
elsif results_uri && expectation.kind != :filter
|
481
|
+
result_set_database_type = ResultSet.new(self).database.database_type
|
482
|
+
database_types_differ =
|
483
|
+
result_set_database_type != dataset_1.database_type ||
|
484
|
+
result_set_database_type != dataset_2.database_type
|
485
|
+
|
486
|
+
merged_field = expectation.merged_field
|
487
|
+
merged_field.ruby_type[:type] == String &&
|
488
|
+
!merged_field.collation.nil? && database_types_differ
|
489
|
+
else
|
490
|
+
false
|
491
|
+
end
|
492
|
+
end
|
361
493
|
end
|
362
494
|
end
|
data/lib/linkage/data.rb
CHANGED
@@ -17,9 +17,14 @@ module Linkage
|
|
17
17
|
File => nil
|
18
18
|
}
|
19
19
|
|
20
|
-
#
|
20
|
+
# @!attribute [r] name
|
21
|
+
# @return [Symbol] This object's name
|
21
22
|
attr_reader :name
|
22
23
|
|
24
|
+
# @!attribute [r] dataset
|
25
|
+
# @return [Linkage::Dataset, nil] This object's dataset, or nil
|
26
|
+
attr_reader :dataset
|
27
|
+
|
23
28
|
def initialize(name)
|
24
29
|
@name = name
|
25
30
|
end
|
@@ -28,20 +33,35 @@ module Linkage
|
|
28
33
|
raise NotImplementedError
|
29
34
|
end
|
30
35
|
|
31
|
-
def to_expr
|
36
|
+
def to_expr
|
37
|
+
raise NotImplementedError
|
38
|
+
end
|
39
|
+
|
40
|
+
def collation
|
41
|
+
nil
|
42
|
+
end
|
43
|
+
|
44
|
+
def database_type
|
45
|
+
ds = dataset
|
46
|
+
ds ? ds.database_type : nil
|
47
|
+
end
|
48
|
+
|
49
|
+
def static?
|
32
50
|
raise NotImplementedError
|
33
51
|
end
|
34
52
|
|
35
|
-
# Create a
|
36
|
-
# have different types, the resulting type is determined via a
|
53
|
+
# Create a merge field that can hold data from two data sources. If the
|
54
|
+
# fields have different types, the resulting type is determined via a
|
37
55
|
# type-conversion tree.
|
38
56
|
#
|
39
57
|
# @param [Linkage::Data] other
|
40
|
-
# @return [Linkage::
|
58
|
+
# @return [Linkage::MergeField]
|
41
59
|
def merge(other, new_name = nil)
|
42
60
|
schema_1 = self.ruby_type
|
61
|
+
db_type_1 = self.database_type
|
43
62
|
schema_2 = other.ruby_type
|
44
|
-
|
63
|
+
db_type_2 = other.database_type
|
64
|
+
if schema_1 == schema_2 && db_type_1 == db_type_2
|
45
65
|
result = schema_1
|
46
66
|
else
|
47
67
|
type_1 = schema_1[:type]
|
@@ -113,6 +133,11 @@ module Linkage
|
|
113
133
|
result_opts[:fixed] = true
|
114
134
|
end
|
115
135
|
|
136
|
+
# collation
|
137
|
+
if opts_1[:collate] != opts_2[:collate] || db_type_1 != db_type_2
|
138
|
+
result_opts.delete(:collate)
|
139
|
+
end
|
140
|
+
|
116
141
|
result = {:type => result_type}
|
117
142
|
result[:opts] = result_opts unless result_opts.empty?
|
118
143
|
end
|
@@ -122,7 +147,7 @@ module Linkage
|
|
122
147
|
else
|
123
148
|
name = self.name == other.name ? self.name : :"#{self.name}_#{other.name}"
|
124
149
|
end
|
125
|
-
|
150
|
+
MergeField.new(name, result, db_type_1 == db_type_2 ? db_type_1 : nil)
|
126
151
|
end
|
127
152
|
|
128
153
|
private
|