linkage 0.0.8 → 0.1.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.yardopts +1 -0
- data/Gemfile +1 -19
- data/Gemfile-java +3 -0
- data/README.markdown +88 -34
- data/Rakefile +16 -15
- data/TODO +4 -0
- data/lib/linkage/comparator.rb +139 -144
- data/lib/linkage/comparators/compare.rb +236 -29
- data/lib/linkage/comparators/strcompare.rb +85 -0
- data/lib/linkage/comparators/within.rb +24 -20
- data/lib/linkage/configuration.rb +44 -466
- data/lib/linkage/dataset.rb +28 -127
- data/lib/linkage/exceptions.rb +5 -0
- data/lib/linkage/field.rb +6 -37
- data/lib/linkage/field_set.rb +3 -3
- data/lib/linkage/match_recorder.rb +22 -0
- data/lib/linkage/match_set.rb +34 -0
- data/lib/linkage/match_sets/csv.rb +39 -0
- data/lib/linkage/match_sets/database.rb +45 -0
- data/lib/linkage/matcher.rb +30 -0
- data/lib/linkage/result_set.rb +25 -110
- data/lib/linkage/result_sets/csv.rb +54 -0
- data/lib/linkage/result_sets/database.rb +42 -0
- data/lib/linkage/runner.rb +57 -16
- data/lib/linkage/score_recorder.rb +30 -0
- data/lib/linkage/score_set.rb +49 -0
- data/lib/linkage/score_sets/csv.rb +64 -0
- data/lib/linkage/score_sets/database.rb +77 -0
- data/lib/linkage/version.rb +1 -1
- data/lib/linkage.rb +14 -17
- data/linkage.gemspec +13 -1
- data/linkage.gemspec-java +32 -0
- data/test/helper.rb +30 -23
- data/test/integration/test_cross_linkage.rb +46 -25
- data/test/integration/test_database_result_set.rb +55 -0
- data/test/integration/test_dual_linkage.rb +19 -94
- data/test/integration/test_self_linkage.rb +100 -203
- data/test/integration/test_within_comparator.rb +24 -77
- data/test/unit/comparators/test_compare.rb +254 -50
- data/test/unit/comparators/test_strcompare.rb +45 -0
- data/test/unit/comparators/test_within.rb +14 -26
- data/test/unit/match_sets/test_csv.rb +78 -0
- data/test/unit/match_sets/test_database.rb +63 -0
- data/test/unit/result_sets/test_csv.rb +111 -0
- data/test/unit/result_sets/test_database.rb +68 -0
- data/test/unit/score_sets/test_csv.rb +151 -0
- data/test/unit/score_sets/test_database.rb +149 -0
- data/test/unit/test_comparator.rb +46 -83
- data/test/unit/test_comparators.rb +4 -0
- data/test/unit/test_configuration.rb +99 -145
- data/test/unit/test_dataset.rb +52 -73
- data/test/unit/test_field.rb +4 -55
- data/test/unit/test_field_set.rb +6 -6
- data/test/unit/test_match_recorder.rb +23 -0
- data/test/unit/test_match_set.rb +23 -0
- data/test/unit/test_match_sets.rb +4 -0
- data/test/unit/test_matcher.rb +44 -0
- data/test/unit/test_result_set.rb +24 -223
- data/test/unit/test_result_sets.rb +4 -0
- data/test/unit/test_runner.rb +122 -17
- data/test/unit/test_runners.rb +4 -0
- data/test/unit/test_score_recorder.rb +25 -0
- data/test/unit/test_score_set.rb +37 -0
- data/test/unit/test_score_sets.rb +4 -0
- metadata +183 -90
- data/Gemfile.lock +0 -92
- data/lib/linkage/comparators/binary.rb +0 -12
- data/lib/linkage/data.rb +0 -175
- data/lib/linkage/decollation.rb +0 -93
- data/lib/linkage/expectation.rb +0 -21
- data/lib/linkage/expectations/exhaustive.rb +0 -63
- data/lib/linkage/expectations/simple.rb +0 -168
- data/lib/linkage/function.rb +0 -148
- data/lib/linkage/functions/binary.rb +0 -30
- data/lib/linkage/functions/cast.rb +0 -54
- data/lib/linkage/functions/length.rb +0 -29
- data/lib/linkage/functions/strftime.rb +0 -33
- data/lib/linkage/functions/trim.rb +0 -30
- data/lib/linkage/group.rb +0 -55
- data/lib/linkage/meta_object.rb +0 -139
- data/lib/linkage/runner/single_threaded.rb +0 -187
- data/lib/linkage/utils.rb +0 -164
- data/lib/linkage/warnings.rb +0 -5
- data/test/integration/test_collation.rb +0 -45
- data/test/integration/test_configuration.rb +0 -268
- data/test/integration/test_dataset.rb +0 -116
- data/test/integration/test_functions.rb +0 -88
- data/test/integration/test_result_set.rb +0 -85
- data/test/integration/test_scoring.rb +0 -84
- data/test/unit/expectations/test_exhaustive.rb +0 -111
- data/test/unit/expectations/test_simple.rb +0 -303
- data/test/unit/functions/test_binary.rb +0 -54
- data/test/unit/functions/test_cast.rb +0 -98
- data/test/unit/functions/test_length.rb +0 -52
- data/test/unit/functions/test_strftime.rb +0 -60
- data/test/unit/functions/test_trim.rb +0 -43
- data/test/unit/runner/test_single_threaded.rb +0 -12
- data/test/unit/test_data.rb +0 -445
- data/test/unit/test_decollation.rb +0 -201
- data/test/unit/test_function.rb +0 -233
- data/test/unit/test_group.rb +0 -38
- data/test/unit/test_meta_object.rb +0 -208
- data/test/unit/test_utils.rb +0 -341
@@ -1,494 +1,72 @@
|
|
1
1
|
module Linkage
|
2
2
|
class Configuration
|
3
|
-
|
4
|
-
|
5
|
-
class VisualComparisonWrapper
|
6
|
-
attr_reader :dsl, :lhs, :rhs
|
3
|
+
attr_reader :dataset_1, :dataset_2, :result_set, :comparators
|
4
|
+
attr_accessor :record_cache_size, :algorithm, :threshold
|
7
5
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
@rhs = rhs
|
12
|
-
|
13
|
-
if @lhs.is_a?(DataWrapper) && @rhs.is_a?(DataWrapper)
|
14
|
-
if @lhs.side == @rhs.side
|
15
|
-
raise ArgumentError, "Can't visually compare two data sources on the same side"
|
16
|
-
end
|
17
|
-
else
|
18
|
-
raise ArgumentError, "Must supply two data sources for visual comparison"
|
19
|
-
end
|
20
|
-
|
21
|
-
@dsl.add_visual_comparison(self)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
class ExpectationWrapper
|
26
|
-
VALID_OPERATORS = [:==, :>, :<, :>=, :<=]
|
27
|
-
OPERATOR_OPPOSITES = {
|
28
|
-
:== => :'!=',
|
29
|
-
:> => :<=,
|
30
|
-
:<= => :>,
|
31
|
-
:< => :>=,
|
32
|
-
:>= => :<
|
33
|
-
}
|
34
|
-
|
35
|
-
def initialize(dsl, type, lhs, *args)
|
36
|
-
@dsl = dsl
|
37
|
-
@type = type
|
38
|
-
@lhs = lhs
|
39
|
-
end
|
40
|
-
|
41
|
-
def compare_with(operator, rhs)
|
42
|
-
# NOTE: lhs is always a DataWrapper
|
43
|
-
|
44
|
-
if !rhs.is_a?(DataWrapper) || @lhs.static? || rhs.static? || @lhs.side == rhs.side
|
45
|
-
@side = !@lhs.static? ? @lhs.side : rhs.side
|
46
|
-
|
47
|
-
# If one of the objects in this comparison is a static function, we need to set the side
|
48
|
-
# and the dataset based on the other object
|
49
|
-
if rhs.is_a?(DataWrapper) && !rhs.static? && @lhs.is_a?(FunctionWrapper) && @lhs.static?
|
50
|
-
@lhs.dataset = rhs.dataset
|
51
|
-
@lhs.side = @side
|
52
|
-
elsif @lhs.is_a?(DataWrapper) && !@lhs.static? && rhs.is_a?(FunctionWrapper) && rhs.static?
|
53
|
-
rhs.dataset = @lhs.dataset
|
54
|
-
rhs.side = @side
|
55
|
-
end
|
56
|
-
elsif rhs.is_a?(DataWrapper) && operator != :==
|
57
|
-
# create an exhaustive expectation with the Compare comparator instead
|
58
|
-
comparator = Comparators::Compare.new(@lhs.meta_object,
|
59
|
-
MetaObject.new(operator.to_s), rhs.meta_object)
|
60
|
-
|
61
|
-
score_range = Comparators::Compare.score_range
|
62
|
-
threshold = @type == :must ? score_range.last : score_range.first
|
63
|
-
|
64
|
-
expectation = Expectations::Exhaustive.new(comparator, threshold, :equal)
|
65
|
-
@dsl.add_exhaustive_expectation(expectation)
|
66
|
-
return self
|
67
|
-
end
|
68
|
-
|
69
|
-
exp_operator = @type == :must_not ? OPERATOR_OPPOSITES[operator] : operator
|
70
|
-
|
71
|
-
rhs_meta_object = rhs.is_a?(DataWrapper) ? rhs.meta_object : MetaObject.new(rhs)
|
72
|
-
@expectation = Expectations::Simple.create(@lhs.meta_object,
|
73
|
-
rhs_meta_object, exp_operator)
|
74
|
-
@dsl.add_simple_expectation(@expectation)
|
75
|
-
self
|
76
|
-
end
|
77
|
-
|
78
|
-
VALID_OPERATORS.each do |operator|
|
79
|
-
define_method(operator) do |rhs|
|
80
|
-
compare_with(operator, rhs)
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
def exactly
|
85
|
-
if !@exact_match
|
86
|
-
@expectation.exactly!
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
class DataWrapper
|
92
|
-
attr_reader :meta_object
|
93
|
-
|
94
|
-
def initialize
|
95
|
-
raise NotImplementedError
|
96
|
-
end
|
97
|
-
|
98
|
-
[:must, :must_not].each do |type|
|
99
|
-
define_method(type) do |*args|
|
100
|
-
if args.length > 0
|
101
|
-
wrapper = args[0]
|
102
|
-
comparator = wrapper.to_comparator(self)
|
103
|
-
|
104
|
-
score_range = wrapper.klass.score_range
|
105
|
-
threshold = type == :must ? score_range.last : score_range.first
|
106
|
-
|
107
|
-
expectation = Expectations::Exhaustive.new(comparator, threshold, :equal)
|
108
|
-
@dsl.add_exhaustive_expectation(expectation)
|
109
|
-
else
|
110
|
-
ExpectationWrapper.new(@dsl, type, self)
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
def compare_with(other)
|
116
|
-
VisualComparisonWrapper.new(@dsl, self, other)
|
117
|
-
end
|
118
|
-
|
119
|
-
def method_missing(m, *args, &block)
|
120
|
-
if meta_object.respond_to?(m)
|
121
|
-
meta_object.send(m, *args, &block)
|
122
|
-
else
|
123
|
-
super(m, *args, &block)
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
|
-
class FieldWrapper < DataWrapper
|
129
|
-
attr_reader :name
|
130
|
-
|
131
|
-
def initialize(dsl, side, dataset, name)
|
132
|
-
@dsl = dsl
|
133
|
-
@meta_object = MetaObject.new(dataset.field_set[name], side)
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
class FunctionWrapper < DataWrapper
|
138
|
-
def initialize(dsl, klass, args)
|
139
|
-
@dsl = dsl
|
140
|
-
|
141
|
-
side = dataset = nil
|
142
|
-
static = true
|
143
|
-
function_args = []
|
144
|
-
args.each do |arg|
|
145
|
-
if arg.kind_of?(DataWrapper)
|
146
|
-
raise "conflicting sides" if side && side != arg.side
|
147
|
-
side = arg.side
|
148
|
-
static &&= arg.static?
|
149
|
-
dataset = arg.dataset
|
150
|
-
function_args << arg.object
|
151
|
-
else
|
152
|
-
function_args << arg
|
153
|
-
end
|
154
|
-
end
|
155
|
-
@meta_object = MetaObject.new(klass.new(*function_args), side)
|
156
|
-
end
|
6
|
+
def initialize(*args)
|
7
|
+
if args.length < 2 || args.length > 3
|
8
|
+
raise ArgumentError, "wrong number of arguments (#{args.length} for 3..4)"
|
157
9
|
end
|
158
10
|
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
def initialize(dsl, klass, args)
|
163
|
-
@dsl = dsl
|
164
|
-
@klass = klass
|
165
|
-
@args = args
|
166
|
-
end
|
167
|
-
|
168
|
-
def of(*args)
|
169
|
-
@args.push(*args)
|
170
|
-
self
|
171
|
-
end
|
172
|
-
|
173
|
-
def to_comparator(receiver)
|
174
|
-
comparator_args = ([receiver] + @args).collect do |arg|
|
175
|
-
arg.is_a?(DataWrapper) ? arg.meta_object : MetaObject.new(arg)
|
176
|
-
end
|
177
|
-
comparator = klass.new(*comparator_args)
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
class DatasetWrapper
|
182
|
-
attr_reader :dataset
|
183
|
-
|
184
|
-
def initialize(dsl, side, dataset)
|
185
|
-
@dsl = dsl
|
186
|
-
@dataset = dataset
|
187
|
-
@side = side
|
188
|
-
end
|
189
|
-
|
190
|
-
def [](field_name)
|
191
|
-
if @dataset.field_set.has_key?(field_name)
|
192
|
-
FieldWrapper.new(@dsl, @side, @dataset, field_name)
|
193
|
-
else
|
194
|
-
raise ArgumentError, "The '#{field_name}' field doesn't exist for the #{@side} dataset!"
|
195
|
-
end
|
196
|
-
end
|
197
|
-
end
|
198
|
-
|
199
|
-
def initialize(config, &block)
|
200
|
-
@config = config
|
201
|
-
@lhs_filters = []
|
202
|
-
@rhs_filters = []
|
203
|
-
instance_eval(&block)
|
204
|
-
end
|
205
|
-
|
206
|
-
def lhs
|
207
|
-
DatasetWrapper.new(self, :lhs, @config.dataset_1)
|
208
|
-
end
|
209
|
-
|
210
|
-
def rhs
|
211
|
-
DatasetWrapper.new(self, :rhs, @config.dataset_2)
|
212
|
-
end
|
213
|
-
|
214
|
-
def save_results_in(uri, options = {})
|
215
|
-
@config.results_uri = uri
|
216
|
-
@config.results_uri_options = options
|
217
|
-
end
|
218
|
-
|
219
|
-
def set_record_cache_size(num)
|
220
|
-
@config.record_cache_size = num
|
221
|
-
end
|
222
|
-
|
223
|
-
def add_simple_expectation(expectation)
|
224
|
-
@config.add_simple_expectation(expectation)
|
225
|
-
|
226
|
-
if @config.linkage_type == :self
|
227
|
-
case expectation.kind
|
228
|
-
when :cross
|
229
|
-
@config.linkage_type = :cross
|
230
|
-
when :filter
|
231
|
-
# If there different filters on both 'sides' of a self-linkage,
|
232
|
-
# it turns into a cross linkage.
|
233
|
-
these_filters, other_filters =
|
234
|
-
case expectation.side
|
235
|
-
when :lhs
|
236
|
-
[@lhs_filters, @rhs_filters]
|
237
|
-
when :rhs
|
238
|
-
[@rhs_filters, @lhs_filters]
|
239
|
-
end
|
240
|
-
|
241
|
-
these_filters << expectation
|
242
|
-
other_filters.each do |other|
|
243
|
-
if !expectation.same_except_side?(other)
|
244
|
-
@config.linkage_type = :cross
|
245
|
-
break
|
246
|
-
end
|
247
|
-
end
|
248
|
-
end
|
249
|
-
end
|
11
|
+
@dataset_1 = args[0]
|
12
|
+
if args.length > 2 && args[1]
|
13
|
+
@dataset_2 = args[1]
|
250
14
|
end
|
15
|
+
@result_set = args[-1]
|
251
16
|
|
252
|
-
|
253
|
-
@config.add_exhaustive_expectation(expectation)
|
254
|
-
if @config.linkage_type == :self
|
255
|
-
@config.linkage_type = expectation.kind
|
256
|
-
end
|
257
|
-
end
|
258
|
-
|
259
|
-
def add_visual_comparison(visual_comparison)
|
260
|
-
@config.visual_comparisons << visual_comparison
|
261
|
-
end
|
262
|
-
|
263
|
-
def groups_table_name(new_name)
|
264
|
-
@config.groups_table_name = new_name
|
265
|
-
end
|
266
|
-
|
267
|
-
def original_groups_table_name(new_name)
|
268
|
-
@config.original_groups_table_name = new_name
|
269
|
-
end
|
270
|
-
|
271
|
-
def scores_table_name(new_name)
|
272
|
-
@config.scores_table_name = new_name
|
273
|
-
end
|
274
|
-
|
275
|
-
def matches_table_name(new_name)
|
276
|
-
@config.matches_table_name = new_name
|
277
|
-
end
|
278
|
-
|
279
|
-
def method_missing(name, *args, &block)
|
280
|
-
# check for comparators
|
281
|
-
md = name.to_s.match(/^be_(.+)$/)
|
282
|
-
if md
|
283
|
-
klass = Comparator[md[1]]
|
284
|
-
if klass
|
285
|
-
ComparatorWrapper.new(self, klass, args)
|
286
|
-
else
|
287
|
-
super
|
288
|
-
end
|
289
|
-
else
|
290
|
-
# check for functions
|
291
|
-
klass = Function[name.to_s]
|
292
|
-
if klass
|
293
|
-
FunctionWrapper.new(self, klass, args)
|
294
|
-
else
|
295
|
-
super
|
296
|
-
end
|
297
|
-
end
|
298
|
-
end
|
299
|
-
end
|
300
|
-
|
301
|
-
attr_reader :dataset_1, :dataset_2, :simple_expectations,
|
302
|
-
:exhaustive_expectations, :visual_comparisons
|
303
|
-
attr_accessor :linkage_type, :results_uri, :results_uri_options,
|
304
|
-
:record_cache_size, :groups_table_name, :original_groups_table_name,
|
305
|
-
:scores_table_name, :matches_table_name
|
306
|
-
|
307
|
-
def initialize(dataset_1, dataset_2)
|
308
|
-
@dataset_1 = dataset_1
|
309
|
-
@dataset_2 = dataset_2
|
310
|
-
@linkage_type = dataset_1 == dataset_2 ? :self : :dual
|
311
|
-
@simple_expectations = []
|
312
|
-
@exhaustive_expectations = []
|
313
|
-
@visual_comparisons = []
|
314
|
-
@results_uri_options = {}
|
315
|
-
@decollation_needed = false
|
17
|
+
@comparators = []
|
316
18
|
@record_cache_size = 10_000
|
317
|
-
@groups_table_name = :groups
|
318
|
-
@original_groups_table_name = :original_groups
|
319
|
-
@scores_table_name = :scores
|
320
|
-
@matches_table_name = :matches
|
321
|
-
end
|
322
|
-
|
323
|
-
def configure(&block)
|
324
|
-
DSL.new(self, &block)
|
325
19
|
end
|
326
20
|
|
327
|
-
def
|
328
|
-
|
329
|
-
if
|
330
|
-
@
|
331
|
-
|
332
|
-
|
333
|
-
break
|
334
|
-
end
|
335
|
-
end
|
21
|
+
def score_recorder
|
22
|
+
pk_1 = @dataset_1.field_set.primary_key.name
|
23
|
+
if @dataset_2
|
24
|
+
pk_2 = @dataset_2.field_set.primary_key.name
|
25
|
+
else
|
26
|
+
pk_2 = pk_1
|
336
27
|
end
|
337
|
-
|
338
|
-
end
|
339
|
-
|
340
|
-
def decollation_needed?
|
341
|
-
@decollation_needed
|
28
|
+
ScoreRecorder.new(@comparators, @result_set.score_set, [pk_1, pk_2])
|
342
29
|
end
|
343
30
|
|
344
|
-
def
|
345
|
-
|
346
|
-
|
347
|
-
# add id
|
348
|
-
schema << [:id, Integer, {:primary_key => true}]
|
349
|
-
|
350
|
-
# add values
|
351
|
-
@simple_expectations.each do |exp|
|
352
|
-
next if exp.kind == :filter
|
353
|
-
|
354
|
-
merged_field = exp.merged_field
|
355
|
-
merged_type = merged_field.ruby_type
|
356
|
-
|
357
|
-
# if the merged field's database type is different than the result
|
358
|
-
# database, strip collation information
|
359
|
-
result_db_type = nil
|
360
|
-
result_set.database do |db|
|
361
|
-
result_db_type = db.database_type
|
362
|
-
end
|
363
|
-
if merged_field.database_type != result_db_type && merged_type.has_key?(:opts)
|
364
|
-
new_opts = merged_type[:opts].reject { |k, v| k == :collate }
|
365
|
-
merged_type = merged_type.merge(:opts => new_opts)
|
366
|
-
end
|
367
|
-
|
368
|
-
col = [merged_field.name, merged_type[:type], merged_type[:opts] || {}]
|
369
|
-
schema << col
|
370
|
-
end
|
371
|
-
|
372
|
-
schema
|
31
|
+
def matcher
|
32
|
+
Matcher.new(@comparators, @result_set.score_set, @algorithm || :mean, @threshold || 0.5)
|
373
33
|
end
|
374
34
|
|
375
|
-
def
|
376
|
-
|
377
|
-
|
378
|
-
# add id
|
379
|
-
schema << [:id, Integer, {:primary_key => true}]
|
380
|
-
|
381
|
-
# add comparator id
|
382
|
-
schema << [:comparator_id, Integer, {}]
|
383
|
-
|
384
|
-
# add record ids
|
385
|
-
pk = dataset_1.field_set.primary_key
|
386
|
-
ruby_type = pk.ruby_type
|
387
|
-
schema << [:record_1_id, ruby_type[:type], ruby_type[:opts] || {}]
|
388
|
-
|
389
|
-
pk = dataset_2.field_set.primary_key
|
390
|
-
ruby_type = pk.ruby_type
|
391
|
-
schema << [:record_2_id, ruby_type[:type], ruby_type[:opts] || {}]
|
392
|
-
|
393
|
-
# add score
|
394
|
-
schema << [:score, Integer, {}]
|
395
|
-
|
396
|
-
schema
|
35
|
+
def match_recorder(matcher)
|
36
|
+
MatchRecorder.new(matcher, @result_set.match_set)
|
397
37
|
end
|
398
38
|
|
399
|
-
def
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
schema << [:id, Integer, {:primary_key => true}]
|
404
|
-
|
405
|
-
# add record ids
|
406
|
-
pk = dataset_1.field_set.primary_key
|
407
|
-
ruby_type = pk.ruby_type
|
408
|
-
schema << [:record_1_id, ruby_type[:type], ruby_type[:opts] || {}]
|
409
|
-
|
410
|
-
pk = dataset_2.field_set.primary_key
|
411
|
-
ruby_type = pk.ruby_type
|
412
|
-
schema << [:record_2_id, ruby_type[:type], ruby_type[:opts] || {}]
|
413
|
-
|
414
|
-
# add score
|
415
|
-
schema << [:total_score, Integer, {}]
|
416
|
-
|
417
|
-
schema
|
418
|
-
end
|
419
|
-
|
420
|
-
def add_simple_expectation(expectation)
|
421
|
-
@simple_expectations << expectation
|
422
|
-
@decollation_needed ||= decollation_needed_for_simple_expectation?(expectation)
|
423
|
-
expectation
|
424
|
-
end
|
425
|
-
|
426
|
-
def add_exhaustive_expectation(expectation)
|
427
|
-
@exhaustive_expectations << expectation
|
428
|
-
expectation
|
429
|
-
end
|
430
|
-
|
431
|
-
def result_set
|
432
|
-
@result_set ||= ResultSet.new(self)
|
433
|
-
end
|
434
|
-
|
435
|
-
def datasets_with_applied_simple_expectations
|
436
|
-
dataset_1 = @dataset_1
|
437
|
-
dataset_2 = @dataset_2
|
438
|
-
@simple_expectations.each do |exp|
|
439
|
-
dataset_1 = exp.apply_to(dataset_1, :lhs)
|
440
|
-
dataset_2 = exp.apply_to(dataset_2, :rhs) if @linkage_type != :self
|
39
|
+
def method_missing(name, *args, &block)
|
40
|
+
klass = Comparator[name.to_s]
|
41
|
+
if klass.nil?
|
42
|
+
raise "unknown comparator: #{name}"
|
441
43
|
end
|
442
|
-
@linkage_type == :self ? [dataset_1, dataset_1] : [dataset_1, dataset_2]
|
443
|
-
end
|
444
44
|
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
dataset_1 = dataset_1.select(dataset_1.field_set.primary_key.to_expr)
|
451
|
-
dataset_2 = dataset_2.select(dataset_2.field_set.primary_key.to_expr)
|
452
|
-
@exhaustive_expectations.each do |exp|
|
453
|
-
dataset_1 = exp.apply_to(dataset_1, :lhs)
|
454
|
-
dataset_2 = exp.apply_to(dataset_2, :rhs)
|
45
|
+
set_1 = args[0]
|
46
|
+
if set_1.is_a?(Array)
|
47
|
+
set_1 = fields_for(dataset_1, *set_1)
|
48
|
+
else
|
49
|
+
set_1 = fields_for(dataset_1, set_1).first
|
455
50
|
end
|
456
|
-
[
|
457
|
-
end
|
458
|
-
|
459
|
-
def groups_table_needed?
|
460
|
-
has_simple_expectations?
|
461
|
-
end
|
462
|
-
|
463
|
-
def scores_table_needed?
|
464
|
-
has_exhaustive_expectations?
|
465
|
-
end
|
51
|
+
args[0] = set_1
|
466
52
|
|
467
|
-
|
468
|
-
|
469
|
-
|
53
|
+
set_2 = args[1]
|
54
|
+
if set_2.is_a?(Array)
|
55
|
+
set_2 = fields_for(dataset_2 || dataset_1, *set_2)
|
56
|
+
else
|
57
|
+
set_2 = fields_for(dataset_2 || dataset_1, set_2).first
|
58
|
+
end
|
59
|
+
args[1] = set_2
|
470
60
|
|
471
|
-
|
472
|
-
|
61
|
+
comparator = klass.new(*args, &block)
|
62
|
+
@comparators << comparator
|
473
63
|
end
|
474
64
|
|
475
|
-
|
65
|
+
protected
|
476
66
|
|
477
|
-
def
|
478
|
-
|
479
|
-
|
480
|
-
elsif results_uri && expectation.kind != :filter
|
481
|
-
result_set_database_type = ResultSet.new(self).database.database_type
|
482
|
-
database_types_differ =
|
483
|
-
result_set_database_type != dataset_1.database_type ||
|
484
|
-
result_set_database_type != dataset_2.database_type
|
485
|
-
|
486
|
-
merged_field = expectation.merged_field
|
487
|
-
merged_field.ruby_type[:type] == String &&
|
488
|
-
!merged_field.collation.nil? && database_types_differ
|
489
|
-
else
|
490
|
-
false
|
491
|
-
end
|
67
|
+
def fields_for(dataset, *args)
|
68
|
+
field_set = dataset.field_set
|
69
|
+
args.collect { |name| field_set[name] }
|
492
70
|
end
|
493
71
|
end
|
494
72
|
end
|