linkage 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +10 -0
- data/Gemfile +15 -13
- data/Gemfile.lock +67 -37
- data/Guardfile +0 -2
- data/Rakefile +122 -25
- data/lib/linkage/comparator.rb +172 -0
- data/lib/linkage/comparators/binary.rb +12 -0
- data/lib/linkage/comparators/compare.rb +46 -0
- data/lib/linkage/comparators/within.rb +32 -0
- data/lib/linkage/configuration.rb +285 -153
- data/lib/linkage/data.rb +32 -7
- data/lib/linkage/dataset.rb +107 -32
- data/lib/linkage/decollation.rb +93 -0
- data/lib/linkage/expectation.rb +21 -0
- data/lib/linkage/expectations/exhaustive.rb +63 -0
- data/lib/linkage/expectations/simple.rb +168 -0
- data/lib/linkage/field.rb +30 -4
- data/lib/linkage/field_set.rb +6 -3
- data/lib/linkage/function.rb +50 -3
- data/lib/linkage/functions/binary.rb +30 -0
- data/lib/linkage/functions/cast.rb +54 -0
- data/lib/linkage/functions/length.rb +29 -0
- data/lib/linkage/functions/strftime.rb +12 -11
- data/lib/linkage/functions/trim.rb +8 -0
- data/lib/linkage/group.rb +20 -0
- data/lib/linkage/import_buffer.rb +5 -16
- data/lib/linkage/meta_object.rb +139 -0
- data/lib/linkage/result_set.rb +74 -17
- data/lib/linkage/runner/single_threaded.rb +125 -10
- data/lib/linkage/version.rb +3 -0
- data/lib/linkage.rb +11 -0
- data/linkage.gemspec +16 -121
- data/test/config.yml +5 -0
- data/test/helper.rb +73 -8
- data/test/integration/test_collation.rb +45 -0
- data/test/integration/test_configuration.rb +268 -0
- data/test/integration/test_cross_linkage.rb +4 -17
- data/test/integration/test_dataset.rb +45 -2
- data/test/integration/test_dual_linkage.rb +40 -24
- data/test/integration/test_functions.rb +22 -0
- data/test/integration/test_result_set.rb +85 -0
- data/test/integration/test_scoring.rb +84 -0
- data/test/integration/test_self_linkage.rb +5 -0
- data/test/integration/test_within_comparator.rb +100 -0
- data/test/unit/comparators/test_compare.rb +105 -0
- data/test/unit/comparators/test_within.rb +57 -0
- data/test/unit/expectations/test_exhaustive.rb +111 -0
- data/test/unit/expectations/test_simple.rb +303 -0
- data/test/unit/functions/test_binary.rb +54 -0
- data/test/unit/functions/test_cast.rb +98 -0
- data/test/unit/functions/test_length.rb +52 -0
- data/test/unit/functions/test_strftime.rb +17 -13
- data/test/unit/functions/test_trim.rb +11 -4
- data/test/unit/test_comparator.rb +124 -0
- data/test/unit/test_configuration.rb +137 -175
- data/test/unit/test_data.rb +44 -0
- data/test/unit/test_dataset.rb +73 -21
- data/test/unit/test_decollation.rb +201 -0
- data/test/unit/test_field.rb +38 -14
- data/test/unit/test_field_set.rb +12 -8
- data/test/unit/test_function.rb +83 -16
- data/test/unit/test_group.rb +28 -0
- data/test/unit/test_import_buffer.rb +13 -27
- data/test/unit/test_meta_object.rb +208 -0
- data/test/unit/test_result_set.rb +221 -3
- metadata +82 -190
@@ -1,7 +1,6 @@
|
|
1
1
|
module Linkage
|
2
2
|
class Configuration
|
3
3
|
class DSL
|
4
|
-
|
5
4
|
# Class for visually comparing matched records
|
6
5
|
class VisualComparisonWrapper
|
7
6
|
attr_reader :dsl, :lhs, :rhs
|
@@ -33,119 +32,97 @@ module Linkage
|
|
33
32
|
:>= => :<
|
34
33
|
}
|
35
34
|
|
36
|
-
|
37
|
-
|
38
|
-
def initialize(dsl, type, lhs)
|
35
|
+
def initialize(dsl, type, lhs, *args)
|
39
36
|
@dsl = dsl
|
40
37
|
@type = type
|
41
38
|
@lhs = lhs
|
42
|
-
@rhs = nil
|
43
|
-
@side = nil
|
44
|
-
@kind = nil
|
45
39
|
end
|
46
40
|
|
47
|
-
|
48
|
-
|
49
|
-
# NOTE: lhs is always a DataWrapper
|
50
|
-
|
51
|
-
@rhs = rhs
|
52
|
-
if !@rhs.is_a?(DataWrapper) || @lhs.static? || @rhs.static? || @lhs.side == @rhs.side
|
53
|
-
@side = @lhs.side
|
54
|
-
@side = @rhs.side if @side.nil? && @rhs.is_a?(DataWrapper)
|
55
|
-
@kind = :filter
|
56
|
-
elsif @lhs.same_except_side?(@rhs)
|
57
|
-
@kind = :self
|
58
|
-
elsif @lhs.dataset == @rhs.dataset
|
59
|
-
@kind = :cross
|
60
|
-
else
|
61
|
-
@kind = :dual
|
62
|
-
end
|
63
|
-
@operator = @type == :must_not ? OPERATOR_OPPOSITES[operator] : operator
|
64
|
-
@dsl.add_expectation(self)
|
65
|
-
end
|
66
|
-
end
|
41
|
+
def compare_with(operator, rhs)
|
42
|
+
# NOTE: lhs is always a DataWrapper
|
67
43
|
|
68
|
-
|
69
|
-
|
70
|
-
end
|
44
|
+
if !rhs.is_a?(DataWrapper) || @lhs.static? || rhs.static? || @lhs.side == rhs.side
|
45
|
+
@side = !@lhs.static? ? @lhs.side : rhs.side
|
71
46
|
|
72
|
-
|
73
|
-
|
74
|
-
if @lhs.is_a?(
|
75
|
-
|
76
|
-
|
77
|
-
elsif @
|
78
|
-
|
79
|
-
|
80
|
-
else
|
81
|
-
raise "Wonky filter"
|
47
|
+
# If one of the objects in this comparison is a static function, we need to set the side
|
48
|
+
# and the dataset based on the other object
|
49
|
+
if rhs.is_a?(DataWrapper) && !rhs.static? && @lhs.is_a?(FunctionWrapper) && @lhs.static?
|
50
|
+
@lhs.dataset = rhs.dataset
|
51
|
+
@lhs.side = @side
|
52
|
+
elsif @lhs.is_a?(DataWrapper) && !@lhs.static? && rhs.is_a?(FunctionWrapper) && rhs.static?
|
53
|
+
rhs.dataset = @lhs.dataset
|
54
|
+
rhs.side = @side
|
82
55
|
end
|
56
|
+
elsif rhs.is_a?(DataWrapper) && operator != :==
|
57
|
+
# create an exhaustive expectation with the Compare comparator instead
|
58
|
+
comparator = Comparators::Compare.new(@lhs.meta_object,
|
59
|
+
MetaObject.new(operator.to_s), rhs.meta_object)
|
83
60
|
|
84
|
-
|
85
|
-
|
86
|
-
@filter_expr =
|
87
|
-
case @operator
|
88
|
-
when :==
|
89
|
-
{ arg1 => arg2 }
|
90
|
-
when :'!='
|
91
|
-
~{ arg1 => arg2 }
|
92
|
-
else
|
93
|
-
arg1 = Sequel::SQL::Identifier.new(arg1)
|
94
|
-
arg2 = arg2.is_a?(Symbol) ? Sequel::SQL::Identifier.new(arg2) : arg2
|
95
|
-
Sequel::SQL::BooleanExpression.new(@operator, arg1, arg2)
|
96
|
-
end
|
97
|
-
end
|
98
|
-
@filter_expr
|
99
|
-
end
|
61
|
+
score_range = Comparators::Compare.score_range
|
62
|
+
threshold = @type == :must ? score_range.last : score_range.first
|
100
63
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
return dataset.filter(filter_expr)
|
105
|
-
else
|
106
|
-
# Doesn't apply
|
107
|
-
return dataset
|
108
|
-
end
|
64
|
+
expectation = Expectations::Exhaustive.new(comparator, threshold, :equal)
|
65
|
+
@dsl.add_exhaustive_expectation(expectation)
|
66
|
+
return self
|
109
67
|
end
|
110
68
|
|
111
|
-
|
112
|
-
target = @lhs
|
113
|
-
elsif @rhs.is_a?(DataWrapper) && @rhs.side == side
|
114
|
-
target = @rhs
|
115
|
-
else
|
116
|
-
raise "Wonky expectation"
|
117
|
-
end
|
69
|
+
exp_operator = @type == :must_not ? OPERATOR_OPPOSITES[operator] : operator
|
118
70
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
71
|
+
rhs_meta_object = rhs.is_a?(DataWrapper) ? rhs.meta_object : MetaObject.new(rhs)
|
72
|
+
@expectation = Expectations::Simple.create(@lhs.meta_object,
|
73
|
+
rhs_meta_object, exp_operator)
|
74
|
+
@dsl.add_simple_expectation(@expectation)
|
75
|
+
self
|
76
|
+
end
|
124
77
|
|
125
|
-
|
78
|
+
VALID_OPERATORS.each do |operator|
|
79
|
+
define_method(operator) do |rhs|
|
80
|
+
compare_with(operator, rhs)
|
81
|
+
end
|
126
82
|
end
|
127
83
|
|
128
|
-
def
|
129
|
-
|
84
|
+
def exactly
|
85
|
+
if !@exact_match
|
86
|
+
@expectation.exactly!
|
87
|
+
end
|
130
88
|
end
|
131
89
|
end
|
132
90
|
|
133
91
|
class DataWrapper
|
134
|
-
attr_reader :
|
92
|
+
attr_reader :meta_object
|
135
93
|
|
136
94
|
def initialize
|
137
95
|
raise NotImplementedError
|
138
96
|
end
|
139
97
|
|
140
98
|
[:must, :must_not].each do |type|
|
141
|
-
define_method(type) do
|
142
|
-
|
99
|
+
define_method(type) do |*args|
|
100
|
+
if args.length > 0
|
101
|
+
wrapper = args[0]
|
102
|
+
comparator = wrapper.to_comparator(self)
|
103
|
+
|
104
|
+
score_range = wrapper.klass.score_range
|
105
|
+
threshold = type == :must ? score_range.last : score_range.first
|
106
|
+
|
107
|
+
expectation = Expectations::Exhaustive.new(comparator, threshold, :equal)
|
108
|
+
@dsl.add_exhaustive_expectation(expectation)
|
109
|
+
else
|
110
|
+
ExpectationWrapper.new(@dsl, type, self)
|
111
|
+
end
|
143
112
|
end
|
144
113
|
end
|
145
114
|
|
146
115
|
def compare_with(other)
|
147
116
|
VisualComparisonWrapper.new(@dsl, self, other)
|
148
117
|
end
|
118
|
+
|
119
|
+
def method_missing(m, *args, &block)
|
120
|
+
if meta_object.respond_to?(m)
|
121
|
+
meta_object.send(m, *args, &block)
|
122
|
+
else
|
123
|
+
super(m, *args, &block)
|
124
|
+
end
|
125
|
+
end
|
149
126
|
end
|
150
127
|
|
151
128
|
class FieldWrapper < DataWrapper
|
@@ -153,80 +130,51 @@ module Linkage
|
|
153
130
|
|
154
131
|
def initialize(dsl, side, dataset, name)
|
155
132
|
@dsl = dsl
|
156
|
-
@
|
157
|
-
@dataset = dataset
|
158
|
-
@name = name
|
159
|
-
end
|
160
|
-
|
161
|
-
def static?
|
162
|
-
false
|
163
|
-
end
|
164
|
-
|
165
|
-
def same_except_side?(other)
|
166
|
-
other.is_a?(FieldWrapper) && name == other.name
|
167
|
-
end
|
168
|
-
|
169
|
-
def data
|
170
|
-
@dataset.field_set[@name]
|
171
|
-
end
|
172
|
-
|
173
|
-
def to_expr(side = nil)
|
174
|
-
data.to_expr
|
133
|
+
@meta_object = MetaObject.new(dataset.field_set[name], side)
|
175
134
|
end
|
176
135
|
end
|
177
136
|
|
178
137
|
class FunctionWrapper < DataWrapper
|
179
|
-
attr_reader :klass, :args
|
180
|
-
|
181
138
|
def initialize(dsl, klass, args)
|
182
139
|
@dsl = dsl
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
140
|
+
|
141
|
+
side = dataset = nil
|
142
|
+
static = true
|
143
|
+
function_args = []
|
187
144
|
args.each do |arg|
|
188
145
|
if arg.kind_of?(DataWrapper)
|
189
|
-
raise "conflicting sides" if
|
190
|
-
|
191
|
-
|
146
|
+
raise "conflicting sides" if side && side != arg.side
|
147
|
+
side = arg.side
|
148
|
+
static &&= arg.static?
|
149
|
+
dataset = arg.dataset
|
150
|
+
function_args << arg.object
|
151
|
+
else
|
152
|
+
function_args << arg
|
192
153
|
end
|
193
154
|
end
|
155
|
+
@meta_object = MetaObject.new(klass.new(*function_args), side)
|
194
156
|
end
|
157
|
+
end
|
195
158
|
|
196
|
-
|
197
|
-
|
198
|
-
end
|
199
|
-
|
200
|
-
def to_expr(side)
|
201
|
-
dataset = side == :lhs ? @dsl.lhs : @dsl.rhs
|
202
|
-
data.to_expr(dataset.dataset.adapter_scheme)
|
203
|
-
end
|
159
|
+
class ComparatorWrapper
|
160
|
+
attr_reader :klass, :args
|
204
161
|
|
205
|
-
def
|
206
|
-
|
162
|
+
def initialize(dsl, klass, args)
|
163
|
+
@dsl = dsl
|
164
|
+
@klass = klass
|
165
|
+
@args = args
|
207
166
|
end
|
208
167
|
|
209
|
-
def
|
210
|
-
@
|
168
|
+
def of(*args)
|
169
|
+
@args.push(*args)
|
170
|
+
self
|
211
171
|
end
|
212
172
|
|
213
|
-
def
|
214
|
-
|
215
|
-
|
216
|
-
other_arg = other.args[i]
|
217
|
-
if arg.is_a?(DataWrapper) && other_arg.is_a?(DataWrapper)
|
218
|
-
if !arg.same_except_side?(other_arg)
|
219
|
-
return false
|
220
|
-
end
|
221
|
-
else
|
222
|
-
if arg != other_arg
|
223
|
-
return false
|
224
|
-
end
|
225
|
-
end
|
226
|
-
end
|
227
|
-
return true
|
173
|
+
def to_comparator(receiver)
|
174
|
+
comparator_args = ([receiver] + @args).collect do |arg|
|
175
|
+
arg.is_a?(DataWrapper) ? arg.meta_object : MetaObject.new(arg)
|
228
176
|
end
|
229
|
-
|
177
|
+
comparator = klass.new(*comparator_args)
|
230
178
|
end
|
231
179
|
end
|
232
180
|
|
@@ -268,8 +216,12 @@ module Linkage
|
|
268
216
|
@config.results_uri_options = options
|
269
217
|
end
|
270
218
|
|
271
|
-
def
|
272
|
-
@config.
|
219
|
+
def set_record_cache_size(num)
|
220
|
+
@config.record_cache_size = num
|
221
|
+
end
|
222
|
+
|
223
|
+
def add_simple_expectation(expectation)
|
224
|
+
@config.add_simple_expectation(expectation)
|
273
225
|
|
274
226
|
if @config.linkage_type == :self
|
275
227
|
case expectation.kind
|
@@ -288,7 +240,7 @@ module Linkage
|
|
288
240
|
|
289
241
|
these_filters << expectation
|
290
242
|
other_filters.each do |other|
|
291
|
-
if !expectation.
|
243
|
+
if !expectation.same_except_side?(other)
|
292
244
|
@config.linkage_type = :cross
|
293
245
|
break
|
294
246
|
end
|
@@ -297,36 +249,98 @@ module Linkage
|
|
297
249
|
end
|
298
250
|
end
|
299
251
|
|
252
|
+
def add_exhaustive_expectation(expectation)
|
253
|
+
@config.add_exhaustive_expectation(expectation)
|
254
|
+
if @config.linkage_type == :self
|
255
|
+
@config.linkage_type = expectation.kind
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
300
259
|
def add_visual_comparison(visual_comparison)
|
301
260
|
@config.visual_comparisons << visual_comparison
|
302
261
|
end
|
303
262
|
|
304
|
-
|
263
|
+
def groups_table_name(new_name)
|
264
|
+
@config.groups_table_name = new_name
|
265
|
+
end
|
266
|
+
|
267
|
+
def original_groups_table_name(new_name)
|
268
|
+
@config.original_groups_table_name = new_name
|
269
|
+
end
|
270
|
+
|
271
|
+
def scores_table_name(new_name)
|
272
|
+
@config.scores_table_name = new_name
|
273
|
+
end
|
274
|
+
|
275
|
+
def matches_table_name(new_name)
|
276
|
+
@config.matches_table_name = new_name
|
277
|
+
end
|
278
|
+
|
305
279
|
def method_missing(name, *args, &block)
|
306
|
-
|
307
|
-
|
308
|
-
|
280
|
+
# check for comparators
|
281
|
+
md = name.to_s.match(/^be_(.+)$/)
|
282
|
+
if md
|
283
|
+
klass = Comparator[md[1]]
|
284
|
+
if klass
|
285
|
+
ComparatorWrapper.new(self, klass, args)
|
286
|
+
else
|
287
|
+
super
|
288
|
+
end
|
309
289
|
else
|
310
|
-
|
290
|
+
# check for functions
|
291
|
+
klass = Function[name.to_s]
|
292
|
+
if klass
|
293
|
+
FunctionWrapper.new(self, klass, args)
|
294
|
+
else
|
295
|
+
super
|
296
|
+
end
|
311
297
|
end
|
312
298
|
end
|
313
299
|
end
|
314
300
|
|
315
|
-
attr_reader :dataset_1, :dataset_2, :
|
316
|
-
|
301
|
+
attr_reader :dataset_1, :dataset_2, :simple_expectations,
|
302
|
+
:exhaustive_expectations, :visual_comparisons
|
303
|
+
attr_accessor :linkage_type, :results_uri, :results_uri_options,
|
304
|
+
:record_cache_size, :groups_table_name, :original_groups_table_name,
|
305
|
+
:scores_table_name, :matches_table_name
|
317
306
|
|
318
307
|
def initialize(dataset_1, dataset_2)
|
319
308
|
@dataset_1 = dataset_1
|
320
309
|
@dataset_2 = dataset_2
|
321
310
|
@linkage_type = dataset_1 == dataset_2 ? :self : :dual
|
322
|
-
@
|
311
|
+
@simple_expectations = []
|
312
|
+
@exhaustive_expectations = []
|
323
313
|
@visual_comparisons = []
|
314
|
+
@results_uri_options = {}
|
315
|
+
@decollation_needed = false
|
316
|
+
@record_cache_size = 10_000
|
317
|
+
@groups_table_name = :groups
|
318
|
+
@original_groups_table_name = :original_groups
|
319
|
+
@scores_table_name = :scores
|
320
|
+
@matches_table_name = :matches
|
324
321
|
end
|
325
322
|
|
326
323
|
def configure(&block)
|
327
324
|
DSL.new(self, &block)
|
328
325
|
end
|
329
326
|
|
327
|
+
def results_uri=(uri)
|
328
|
+
@results_uri = uri
|
329
|
+
if !@decollation_needed
|
330
|
+
@simple_expectations.each do |expectation|
|
331
|
+
if decollation_needed_for_simple_expectation?(expectation)
|
332
|
+
@decollation_needed = true
|
333
|
+
break
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|
337
|
+
uri
|
338
|
+
end
|
339
|
+
|
340
|
+
def decollation_needed?
|
341
|
+
@decollation_needed
|
342
|
+
end
|
343
|
+
|
330
344
|
def groups_table_schema
|
331
345
|
schema = []
|
332
346
|
|
@@ -334,29 +348,147 @@ module Linkage
|
|
334
348
|
schema << [:id, Integer, {:primary_key => true}]
|
335
349
|
|
336
350
|
# add values
|
337
|
-
@
|
351
|
+
@simple_expectations.each do |exp|
|
338
352
|
next if exp.kind == :filter
|
339
353
|
|
340
354
|
merged_field = exp.merged_field
|
341
355
|
merged_type = merged_field.ruby_type
|
342
|
-
|
356
|
+
|
357
|
+
# if the merged field's database type is different than the result
|
358
|
+
# database, strip collation information
|
359
|
+
result_db_type = nil
|
360
|
+
result_set.database do |db|
|
361
|
+
result_db_type = db.database_type
|
362
|
+
end
|
363
|
+
if merged_field.database_type != result_db_type && merged_type.has_key?(:opts)
|
364
|
+
new_opts = merged_type[:opts].reject { |k, v| k == :collate }
|
365
|
+
merged_type = merged_type.merge(:opts => new_opts)
|
366
|
+
end
|
367
|
+
|
368
|
+
col = [merged_field.name, merged_type[:type], merged_type[:opts] || {}]
|
369
|
+
schema << col
|
343
370
|
end
|
344
371
|
|
345
372
|
schema
|
346
373
|
end
|
347
374
|
|
375
|
+
def scores_table_schema
|
376
|
+
schema = []
|
377
|
+
|
378
|
+
# add id
|
379
|
+
schema << [:id, Integer, {:primary_key => true}]
|
380
|
+
|
381
|
+
# add comparator id
|
382
|
+
schema << [:comparator_id, Integer, {}]
|
383
|
+
|
384
|
+
# add record ids
|
385
|
+
pk = dataset_1.field_set.primary_key
|
386
|
+
ruby_type = pk.ruby_type
|
387
|
+
schema << [:record_1_id, ruby_type[:type], ruby_type[:opts] || {}]
|
388
|
+
|
389
|
+
pk = dataset_2.field_set.primary_key
|
390
|
+
ruby_type = pk.ruby_type
|
391
|
+
schema << [:record_2_id, ruby_type[:type], ruby_type[:opts] || {}]
|
392
|
+
|
393
|
+
# add score
|
394
|
+
schema << [:score, Integer, {}]
|
395
|
+
|
396
|
+
schema
|
397
|
+
end
|
398
|
+
|
399
|
+
def matches_table_schema
|
400
|
+
schema = []
|
401
|
+
|
402
|
+
# add id
|
403
|
+
schema << [:id, Integer, {:primary_key => true}]
|
404
|
+
|
405
|
+
# add record ids
|
406
|
+
pk = dataset_1.field_set.primary_key
|
407
|
+
ruby_type = pk.ruby_type
|
408
|
+
schema << [:record_1_id, ruby_type[:type], ruby_type[:opts] || {}]
|
409
|
+
|
410
|
+
pk = dataset_2.field_set.primary_key
|
411
|
+
ruby_type = pk.ruby_type
|
412
|
+
schema << [:record_2_id, ruby_type[:type], ruby_type[:opts] || {}]
|
413
|
+
|
414
|
+
# add score
|
415
|
+
schema << [:total_score, Integer, {}]
|
416
|
+
|
417
|
+
schema
|
418
|
+
end
|
419
|
+
|
420
|
+
def add_simple_expectation(expectation)
|
421
|
+
@simple_expectations << expectation
|
422
|
+
@decollation_needed ||= decollation_needed_for_simple_expectation?(expectation)
|
423
|
+
expectation
|
424
|
+
end
|
425
|
+
|
426
|
+
def add_exhaustive_expectation(expectation)
|
427
|
+
@exhaustive_expectations << expectation
|
428
|
+
expectation
|
429
|
+
end
|
430
|
+
|
348
431
|
def result_set
|
349
432
|
@result_set ||= ResultSet.new(self)
|
350
433
|
end
|
351
434
|
|
352
|
-
def
|
435
|
+
def datasets_with_applied_simple_expectations
|
353
436
|
dataset_1 = @dataset_1
|
354
437
|
dataset_2 = @dataset_2
|
355
|
-
@
|
438
|
+
@simple_expectations.each do |exp|
|
356
439
|
dataset_1 = exp.apply_to(dataset_1, :lhs)
|
357
440
|
dataset_2 = exp.apply_to(dataset_2, :rhs) if @linkage_type != :self
|
358
441
|
end
|
359
442
|
@linkage_type == :self ? [dataset_1, dataset_1] : [dataset_1, dataset_2]
|
360
443
|
end
|
444
|
+
|
445
|
+
def datasets_with_applied_exhaustive_expectations
|
446
|
+
apply_exhaustive_expectations(@dataset_1, @dataset_2)
|
447
|
+
end
|
448
|
+
|
449
|
+
def apply_exhaustive_expectations(dataset_1, dataset_2)
|
450
|
+
dataset_1 = dataset_1.select(dataset_1.field_set.primary_key.to_expr)
|
451
|
+
dataset_2 = dataset_2.select(dataset_2.field_set.primary_key.to_expr)
|
452
|
+
@exhaustive_expectations.each do |exp|
|
453
|
+
dataset_1 = exp.apply_to(dataset_1, :lhs)
|
454
|
+
dataset_2 = exp.apply_to(dataset_2, :rhs)
|
455
|
+
end
|
456
|
+
[dataset_1, dataset_2]
|
457
|
+
end
|
458
|
+
|
459
|
+
def groups_table_needed?
|
460
|
+
has_simple_expectations?
|
461
|
+
end
|
462
|
+
|
463
|
+
def scores_table_needed?
|
464
|
+
has_exhaustive_expectations?
|
465
|
+
end
|
466
|
+
|
467
|
+
def has_simple_expectations?
|
468
|
+
!@simple_expectations.empty?
|
469
|
+
end
|
470
|
+
|
471
|
+
def has_exhaustive_expectations?
|
472
|
+
!@exhaustive_expectations.empty?
|
473
|
+
end
|
474
|
+
|
475
|
+
private
|
476
|
+
|
477
|
+
def decollation_needed_for_simple_expectation?(expectation)
|
478
|
+
if expectation.decollation_needed?
|
479
|
+
true
|
480
|
+
elsif results_uri && expectation.kind != :filter
|
481
|
+
result_set_database_type = ResultSet.new(self).database.database_type
|
482
|
+
database_types_differ =
|
483
|
+
result_set_database_type != dataset_1.database_type ||
|
484
|
+
result_set_database_type != dataset_2.database_type
|
485
|
+
|
486
|
+
merged_field = expectation.merged_field
|
487
|
+
merged_field.ruby_type[:type] == String &&
|
488
|
+
!merged_field.collation.nil? && database_types_differ
|
489
|
+
else
|
490
|
+
false
|
491
|
+
end
|
492
|
+
end
|
361
493
|
end
|
362
494
|
end
|
data/lib/linkage/data.rb
CHANGED
@@ -17,9 +17,14 @@ module Linkage
|
|
17
17
|
File => nil
|
18
18
|
}
|
19
19
|
|
20
|
-
#
|
20
|
+
# @!attribute [r] name
|
21
|
+
# @return [Symbol] This object's name
|
21
22
|
attr_reader :name
|
22
23
|
|
24
|
+
# @!attribute [r] dataset
|
25
|
+
# @return [Linkage::Dataset, nil] This object's dataset, or nil
|
26
|
+
attr_reader :dataset
|
27
|
+
|
23
28
|
def initialize(name)
|
24
29
|
@name = name
|
25
30
|
end
|
@@ -28,20 +33,35 @@ module Linkage
|
|
28
33
|
raise NotImplementedError
|
29
34
|
end
|
30
35
|
|
31
|
-
def to_expr
|
36
|
+
def to_expr
|
37
|
+
raise NotImplementedError
|
38
|
+
end
|
39
|
+
|
40
|
+
def collation
|
41
|
+
nil
|
42
|
+
end
|
43
|
+
|
44
|
+
def database_type
|
45
|
+
ds = dataset
|
46
|
+
ds ? ds.database_type : nil
|
47
|
+
end
|
48
|
+
|
49
|
+
def static?
|
32
50
|
raise NotImplementedError
|
33
51
|
end
|
34
52
|
|
35
|
-
# Create a
|
36
|
-
# have different types, the resulting type is determined via a
|
53
|
+
# Create a merge field that can hold data from two data sources. If the
|
54
|
+
# fields have different types, the resulting type is determined via a
|
37
55
|
# type-conversion tree.
|
38
56
|
#
|
39
57
|
# @param [Linkage::Data] other
|
40
|
-
# @return [Linkage::
|
58
|
+
# @return [Linkage::MergeField]
|
41
59
|
def merge(other, new_name = nil)
|
42
60
|
schema_1 = self.ruby_type
|
61
|
+
db_type_1 = self.database_type
|
43
62
|
schema_2 = other.ruby_type
|
44
|
-
|
63
|
+
db_type_2 = other.database_type
|
64
|
+
if schema_1 == schema_2 && db_type_1 == db_type_2
|
45
65
|
result = schema_1
|
46
66
|
else
|
47
67
|
type_1 = schema_1[:type]
|
@@ -113,6 +133,11 @@ module Linkage
|
|
113
133
|
result_opts[:fixed] = true
|
114
134
|
end
|
115
135
|
|
136
|
+
# collation
|
137
|
+
if opts_1[:collate] != opts_2[:collate] || db_type_1 != db_type_2
|
138
|
+
result_opts.delete(:collate)
|
139
|
+
end
|
140
|
+
|
116
141
|
result = {:type => result_type}
|
117
142
|
result[:opts] = result_opts unless result_opts.empty?
|
118
143
|
end
|
@@ -122,7 +147,7 @@ module Linkage
|
|
122
147
|
else
|
123
148
|
name = self.name == other.name ? self.name : :"#{self.name}_#{other.name}"
|
124
149
|
end
|
125
|
-
|
150
|
+
MergeField.new(name, result, db_type_1 == db_type_2 ? db_type_1 : nil)
|
126
151
|
end
|
127
152
|
|
128
153
|
private
|