linkage 0.0.8 → 0.1.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.yardopts +1 -0
- data/Gemfile +1 -19
- data/Gemfile-java +3 -0
- data/README.markdown +88 -34
- data/Rakefile +16 -15
- data/TODO +4 -0
- data/lib/linkage/comparator.rb +139 -144
- data/lib/linkage/comparators/compare.rb +236 -29
- data/lib/linkage/comparators/strcompare.rb +85 -0
- data/lib/linkage/comparators/within.rb +24 -20
- data/lib/linkage/configuration.rb +44 -466
- data/lib/linkage/dataset.rb +28 -127
- data/lib/linkage/exceptions.rb +5 -0
- data/lib/linkage/field.rb +6 -37
- data/lib/linkage/field_set.rb +3 -3
- data/lib/linkage/match_recorder.rb +22 -0
- data/lib/linkage/match_set.rb +34 -0
- data/lib/linkage/match_sets/csv.rb +39 -0
- data/lib/linkage/match_sets/database.rb +45 -0
- data/lib/linkage/matcher.rb +30 -0
- data/lib/linkage/result_set.rb +25 -110
- data/lib/linkage/result_sets/csv.rb +54 -0
- data/lib/linkage/result_sets/database.rb +42 -0
- data/lib/linkage/runner.rb +57 -16
- data/lib/linkage/score_recorder.rb +30 -0
- data/lib/linkage/score_set.rb +49 -0
- data/lib/linkage/score_sets/csv.rb +64 -0
- data/lib/linkage/score_sets/database.rb +77 -0
- data/lib/linkage/version.rb +1 -1
- data/lib/linkage.rb +14 -17
- data/linkage.gemspec +13 -1
- data/linkage.gemspec-java +32 -0
- data/test/helper.rb +30 -23
- data/test/integration/test_cross_linkage.rb +46 -25
- data/test/integration/test_database_result_set.rb +55 -0
- data/test/integration/test_dual_linkage.rb +19 -94
- data/test/integration/test_self_linkage.rb +100 -203
- data/test/integration/test_within_comparator.rb +24 -77
- data/test/unit/comparators/test_compare.rb +254 -50
- data/test/unit/comparators/test_strcompare.rb +45 -0
- data/test/unit/comparators/test_within.rb +14 -26
- data/test/unit/match_sets/test_csv.rb +78 -0
- data/test/unit/match_sets/test_database.rb +63 -0
- data/test/unit/result_sets/test_csv.rb +111 -0
- data/test/unit/result_sets/test_database.rb +68 -0
- data/test/unit/score_sets/test_csv.rb +151 -0
- data/test/unit/score_sets/test_database.rb +149 -0
- data/test/unit/test_comparator.rb +46 -83
- data/test/unit/test_comparators.rb +4 -0
- data/test/unit/test_configuration.rb +99 -145
- data/test/unit/test_dataset.rb +52 -73
- data/test/unit/test_field.rb +4 -55
- data/test/unit/test_field_set.rb +6 -6
- data/test/unit/test_match_recorder.rb +23 -0
- data/test/unit/test_match_set.rb +23 -0
- data/test/unit/test_match_sets.rb +4 -0
- data/test/unit/test_matcher.rb +44 -0
- data/test/unit/test_result_set.rb +24 -223
- data/test/unit/test_result_sets.rb +4 -0
- data/test/unit/test_runner.rb +122 -17
- data/test/unit/test_runners.rb +4 -0
- data/test/unit/test_score_recorder.rb +25 -0
- data/test/unit/test_score_set.rb +37 -0
- data/test/unit/test_score_sets.rb +4 -0
- metadata +183 -90
- data/Gemfile.lock +0 -92
- data/lib/linkage/comparators/binary.rb +0 -12
- data/lib/linkage/data.rb +0 -175
- data/lib/linkage/decollation.rb +0 -93
- data/lib/linkage/expectation.rb +0 -21
- data/lib/linkage/expectations/exhaustive.rb +0 -63
- data/lib/linkage/expectations/simple.rb +0 -168
- data/lib/linkage/function.rb +0 -148
- data/lib/linkage/functions/binary.rb +0 -30
- data/lib/linkage/functions/cast.rb +0 -54
- data/lib/linkage/functions/length.rb +0 -29
- data/lib/linkage/functions/strftime.rb +0 -33
- data/lib/linkage/functions/trim.rb +0 -30
- data/lib/linkage/group.rb +0 -55
- data/lib/linkage/meta_object.rb +0 -139
- data/lib/linkage/runner/single_threaded.rb +0 -187
- data/lib/linkage/utils.rb +0 -164
- data/lib/linkage/warnings.rb +0 -5
- data/test/integration/test_collation.rb +0 -45
- data/test/integration/test_configuration.rb +0 -268
- data/test/integration/test_dataset.rb +0 -116
- data/test/integration/test_functions.rb +0 -88
- data/test/integration/test_result_set.rb +0 -85
- data/test/integration/test_scoring.rb +0 -84
- data/test/unit/expectations/test_exhaustive.rb +0 -111
- data/test/unit/expectations/test_simple.rb +0 -303
- data/test/unit/functions/test_binary.rb +0 -54
- data/test/unit/functions/test_cast.rb +0 -98
- data/test/unit/functions/test_length.rb +0 -52
- data/test/unit/functions/test_strftime.rb +0 -60
- data/test/unit/functions/test_trim.rb +0 -43
- data/test/unit/runner/test_single_threaded.rb +0 -12
- data/test/unit/test_data.rb +0 -445
- data/test/unit/test_decollation.rb +0 -201
- data/test/unit/test_function.rb +0 -233
- data/test/unit/test_group.rb +0 -38
- data/test/unit/test_meta_object.rb +0 -208
- data/test/unit/test_utils.rb +0 -341
@@ -1,494 +1,72 @@
|
|
1
1
|
module Linkage
|
2
2
|
class Configuration
|
3
|
-
|
4
|
-
|
5
|
-
class VisualComparisonWrapper
|
6
|
-
attr_reader :dsl, :lhs, :rhs
|
3
|
+
attr_reader :dataset_1, :dataset_2, :result_set, :comparators
|
4
|
+
attr_accessor :record_cache_size, :algorithm, :threshold
|
7
5
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
@rhs = rhs
|
12
|
-
|
13
|
-
if @lhs.is_a?(DataWrapper) && @rhs.is_a?(DataWrapper)
|
14
|
-
if @lhs.side == @rhs.side
|
15
|
-
raise ArgumentError, "Can't visually compare two data sources on the same side"
|
16
|
-
end
|
17
|
-
else
|
18
|
-
raise ArgumentError, "Must supply two data sources for visual comparison"
|
19
|
-
end
|
20
|
-
|
21
|
-
@dsl.add_visual_comparison(self)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
class ExpectationWrapper
|
26
|
-
VALID_OPERATORS = [:==, :>, :<, :>=, :<=]
|
27
|
-
OPERATOR_OPPOSITES = {
|
28
|
-
:== => :'!=',
|
29
|
-
:> => :<=,
|
30
|
-
:<= => :>,
|
31
|
-
:< => :>=,
|
32
|
-
:>= => :<
|
33
|
-
}
|
34
|
-
|
35
|
-
def initialize(dsl, type, lhs, *args)
|
36
|
-
@dsl = dsl
|
37
|
-
@type = type
|
38
|
-
@lhs = lhs
|
39
|
-
end
|
40
|
-
|
41
|
-
def compare_with(operator, rhs)
|
42
|
-
# NOTE: lhs is always a DataWrapper
|
43
|
-
|
44
|
-
if !rhs.is_a?(DataWrapper) || @lhs.static? || rhs.static? || @lhs.side == rhs.side
|
45
|
-
@side = !@lhs.static? ? @lhs.side : rhs.side
|
46
|
-
|
47
|
-
# If one of the objects in this comparison is a static function, we need to set the side
|
48
|
-
# and the dataset based on the other object
|
49
|
-
if rhs.is_a?(DataWrapper) && !rhs.static? && @lhs.is_a?(FunctionWrapper) && @lhs.static?
|
50
|
-
@lhs.dataset = rhs.dataset
|
51
|
-
@lhs.side = @side
|
52
|
-
elsif @lhs.is_a?(DataWrapper) && !@lhs.static? && rhs.is_a?(FunctionWrapper) && rhs.static?
|
53
|
-
rhs.dataset = @lhs.dataset
|
54
|
-
rhs.side = @side
|
55
|
-
end
|
56
|
-
elsif rhs.is_a?(DataWrapper) && operator != :==
|
57
|
-
# create an exhaustive expectation with the Compare comparator instead
|
58
|
-
comparator = Comparators::Compare.new(@lhs.meta_object,
|
59
|
-
MetaObject.new(operator.to_s), rhs.meta_object)
|
60
|
-
|
61
|
-
score_range = Comparators::Compare.score_range
|
62
|
-
threshold = @type == :must ? score_range.last : score_range.first
|
63
|
-
|
64
|
-
expectation = Expectations::Exhaustive.new(comparator, threshold, :equal)
|
65
|
-
@dsl.add_exhaustive_expectation(expectation)
|
66
|
-
return self
|
67
|
-
end
|
68
|
-
|
69
|
-
exp_operator = @type == :must_not ? OPERATOR_OPPOSITES[operator] : operator
|
70
|
-
|
71
|
-
rhs_meta_object = rhs.is_a?(DataWrapper) ? rhs.meta_object : MetaObject.new(rhs)
|
72
|
-
@expectation = Expectations::Simple.create(@lhs.meta_object,
|
73
|
-
rhs_meta_object, exp_operator)
|
74
|
-
@dsl.add_simple_expectation(@expectation)
|
75
|
-
self
|
76
|
-
end
|
77
|
-
|
78
|
-
VALID_OPERATORS.each do |operator|
|
79
|
-
define_method(operator) do |rhs|
|
80
|
-
compare_with(operator, rhs)
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
def exactly
|
85
|
-
if !@exact_match
|
86
|
-
@expectation.exactly!
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
class DataWrapper
|
92
|
-
attr_reader :meta_object
|
93
|
-
|
94
|
-
def initialize
|
95
|
-
raise NotImplementedError
|
96
|
-
end
|
97
|
-
|
98
|
-
[:must, :must_not].each do |type|
|
99
|
-
define_method(type) do |*args|
|
100
|
-
if args.length > 0
|
101
|
-
wrapper = args[0]
|
102
|
-
comparator = wrapper.to_comparator(self)
|
103
|
-
|
104
|
-
score_range = wrapper.klass.score_range
|
105
|
-
threshold = type == :must ? score_range.last : score_range.first
|
106
|
-
|
107
|
-
expectation = Expectations::Exhaustive.new(comparator, threshold, :equal)
|
108
|
-
@dsl.add_exhaustive_expectation(expectation)
|
109
|
-
else
|
110
|
-
ExpectationWrapper.new(@dsl, type, self)
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
def compare_with(other)
|
116
|
-
VisualComparisonWrapper.new(@dsl, self, other)
|
117
|
-
end
|
118
|
-
|
119
|
-
def method_missing(m, *args, &block)
|
120
|
-
if meta_object.respond_to?(m)
|
121
|
-
meta_object.send(m, *args, &block)
|
122
|
-
else
|
123
|
-
super(m, *args, &block)
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
|
-
class FieldWrapper < DataWrapper
|
129
|
-
attr_reader :name
|
130
|
-
|
131
|
-
def initialize(dsl, side, dataset, name)
|
132
|
-
@dsl = dsl
|
133
|
-
@meta_object = MetaObject.new(dataset.field_set[name], side)
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
class FunctionWrapper < DataWrapper
|
138
|
-
def initialize(dsl, klass, args)
|
139
|
-
@dsl = dsl
|
140
|
-
|
141
|
-
side = dataset = nil
|
142
|
-
static = true
|
143
|
-
function_args = []
|
144
|
-
args.each do |arg|
|
145
|
-
if arg.kind_of?(DataWrapper)
|
146
|
-
raise "conflicting sides" if side && side != arg.side
|
147
|
-
side = arg.side
|
148
|
-
static &&= arg.static?
|
149
|
-
dataset = arg.dataset
|
150
|
-
function_args << arg.object
|
151
|
-
else
|
152
|
-
function_args << arg
|
153
|
-
end
|
154
|
-
end
|
155
|
-
@meta_object = MetaObject.new(klass.new(*function_args), side)
|
156
|
-
end
|
6
|
+
def initialize(*args)
|
7
|
+
if args.length < 2 || args.length > 3
|
8
|
+
raise ArgumentError, "wrong number of arguments (#{args.length} for 3..4)"
|
157
9
|
end
|
158
10
|
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
def initialize(dsl, klass, args)
|
163
|
-
@dsl = dsl
|
164
|
-
@klass = klass
|
165
|
-
@args = args
|
166
|
-
end
|
167
|
-
|
168
|
-
def of(*args)
|
169
|
-
@args.push(*args)
|
170
|
-
self
|
171
|
-
end
|
172
|
-
|
173
|
-
def to_comparator(receiver)
|
174
|
-
comparator_args = ([receiver] + @args).collect do |arg|
|
175
|
-
arg.is_a?(DataWrapper) ? arg.meta_object : MetaObject.new(arg)
|
176
|
-
end
|
177
|
-
comparator = klass.new(*comparator_args)
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
class DatasetWrapper
|
182
|
-
attr_reader :dataset
|
183
|
-
|
184
|
-
def initialize(dsl, side, dataset)
|
185
|
-
@dsl = dsl
|
186
|
-
@dataset = dataset
|
187
|
-
@side = side
|
188
|
-
end
|
189
|
-
|
190
|
-
def [](field_name)
|
191
|
-
if @dataset.field_set.has_key?(field_name)
|
192
|
-
FieldWrapper.new(@dsl, @side, @dataset, field_name)
|
193
|
-
else
|
194
|
-
raise ArgumentError, "The '#{field_name}' field doesn't exist for the #{@side} dataset!"
|
195
|
-
end
|
196
|
-
end
|
197
|
-
end
|
198
|
-
|
199
|
-
def initialize(config, &block)
|
200
|
-
@config = config
|
201
|
-
@lhs_filters = []
|
202
|
-
@rhs_filters = []
|
203
|
-
instance_eval(&block)
|
204
|
-
end
|
205
|
-
|
206
|
-
def lhs
|
207
|
-
DatasetWrapper.new(self, :lhs, @config.dataset_1)
|
208
|
-
end
|
209
|
-
|
210
|
-
def rhs
|
211
|
-
DatasetWrapper.new(self, :rhs, @config.dataset_2)
|
212
|
-
end
|
213
|
-
|
214
|
-
def save_results_in(uri, options = {})
|
215
|
-
@config.results_uri = uri
|
216
|
-
@config.results_uri_options = options
|
217
|
-
end
|
218
|
-
|
219
|
-
def set_record_cache_size(num)
|
220
|
-
@config.record_cache_size = num
|
221
|
-
end
|
222
|
-
|
223
|
-
def add_simple_expectation(expectation)
|
224
|
-
@config.add_simple_expectation(expectation)
|
225
|
-
|
226
|
-
if @config.linkage_type == :self
|
227
|
-
case expectation.kind
|
228
|
-
when :cross
|
229
|
-
@config.linkage_type = :cross
|
230
|
-
when :filter
|
231
|
-
# If there different filters on both 'sides' of a self-linkage,
|
232
|
-
# it turns into a cross linkage.
|
233
|
-
these_filters, other_filters =
|
234
|
-
case expectation.side
|
235
|
-
when :lhs
|
236
|
-
[@lhs_filters, @rhs_filters]
|
237
|
-
when :rhs
|
238
|
-
[@rhs_filters, @lhs_filters]
|
239
|
-
end
|
240
|
-
|
241
|
-
these_filters << expectation
|
242
|
-
other_filters.each do |other|
|
243
|
-
if !expectation.same_except_side?(other)
|
244
|
-
@config.linkage_type = :cross
|
245
|
-
break
|
246
|
-
end
|
247
|
-
end
|
248
|
-
end
|
249
|
-
end
|
11
|
+
@dataset_1 = args[0]
|
12
|
+
if args.length > 2 && args[1]
|
13
|
+
@dataset_2 = args[1]
|
250
14
|
end
|
15
|
+
@result_set = args[-1]
|
251
16
|
|
252
|
-
|
253
|
-
@config.add_exhaustive_expectation(expectation)
|
254
|
-
if @config.linkage_type == :self
|
255
|
-
@config.linkage_type = expectation.kind
|
256
|
-
end
|
257
|
-
end
|
258
|
-
|
259
|
-
def add_visual_comparison(visual_comparison)
|
260
|
-
@config.visual_comparisons << visual_comparison
|
261
|
-
end
|
262
|
-
|
263
|
-
def groups_table_name(new_name)
|
264
|
-
@config.groups_table_name = new_name
|
265
|
-
end
|
266
|
-
|
267
|
-
def original_groups_table_name(new_name)
|
268
|
-
@config.original_groups_table_name = new_name
|
269
|
-
end
|
270
|
-
|
271
|
-
def scores_table_name(new_name)
|
272
|
-
@config.scores_table_name = new_name
|
273
|
-
end
|
274
|
-
|
275
|
-
def matches_table_name(new_name)
|
276
|
-
@config.matches_table_name = new_name
|
277
|
-
end
|
278
|
-
|
279
|
-
def method_missing(name, *args, &block)
|
280
|
-
# check for comparators
|
281
|
-
md = name.to_s.match(/^be_(.+)$/)
|
282
|
-
if md
|
283
|
-
klass = Comparator[md[1]]
|
284
|
-
if klass
|
285
|
-
ComparatorWrapper.new(self, klass, args)
|
286
|
-
else
|
287
|
-
super
|
288
|
-
end
|
289
|
-
else
|
290
|
-
# check for functions
|
291
|
-
klass = Function[name.to_s]
|
292
|
-
if klass
|
293
|
-
FunctionWrapper.new(self, klass, args)
|
294
|
-
else
|
295
|
-
super
|
296
|
-
end
|
297
|
-
end
|
298
|
-
end
|
299
|
-
end
|
300
|
-
|
301
|
-
attr_reader :dataset_1, :dataset_2, :simple_expectations,
|
302
|
-
:exhaustive_expectations, :visual_comparisons
|
303
|
-
attr_accessor :linkage_type, :results_uri, :results_uri_options,
|
304
|
-
:record_cache_size, :groups_table_name, :original_groups_table_name,
|
305
|
-
:scores_table_name, :matches_table_name
|
306
|
-
|
307
|
-
def initialize(dataset_1, dataset_2)
|
308
|
-
@dataset_1 = dataset_1
|
309
|
-
@dataset_2 = dataset_2
|
310
|
-
@linkage_type = dataset_1 == dataset_2 ? :self : :dual
|
311
|
-
@simple_expectations = []
|
312
|
-
@exhaustive_expectations = []
|
313
|
-
@visual_comparisons = []
|
314
|
-
@results_uri_options = {}
|
315
|
-
@decollation_needed = false
|
17
|
+
@comparators = []
|
316
18
|
@record_cache_size = 10_000
|
317
|
-
@groups_table_name = :groups
|
318
|
-
@original_groups_table_name = :original_groups
|
319
|
-
@scores_table_name = :scores
|
320
|
-
@matches_table_name = :matches
|
321
|
-
end
|
322
|
-
|
323
|
-
def configure(&block)
|
324
|
-
DSL.new(self, &block)
|
325
19
|
end
|
326
20
|
|
327
|
-
def
|
328
|
-
|
329
|
-
if
|
330
|
-
@
|
331
|
-
|
332
|
-
|
333
|
-
break
|
334
|
-
end
|
335
|
-
end
|
21
|
+
def score_recorder
|
22
|
+
pk_1 = @dataset_1.field_set.primary_key.name
|
23
|
+
if @dataset_2
|
24
|
+
pk_2 = @dataset_2.field_set.primary_key.name
|
25
|
+
else
|
26
|
+
pk_2 = pk_1
|
336
27
|
end
|
337
|
-
|
338
|
-
end
|
339
|
-
|
340
|
-
def decollation_needed?
|
341
|
-
@decollation_needed
|
28
|
+
ScoreRecorder.new(@comparators, @result_set.score_set, [pk_1, pk_2])
|
342
29
|
end
|
343
30
|
|
344
|
-
def
|
345
|
-
|
346
|
-
|
347
|
-
# add id
|
348
|
-
schema << [:id, Integer, {:primary_key => true}]
|
349
|
-
|
350
|
-
# add values
|
351
|
-
@simple_expectations.each do |exp|
|
352
|
-
next if exp.kind == :filter
|
353
|
-
|
354
|
-
merged_field = exp.merged_field
|
355
|
-
merged_type = merged_field.ruby_type
|
356
|
-
|
357
|
-
# if the merged field's database type is different than the result
|
358
|
-
# database, strip collation information
|
359
|
-
result_db_type = nil
|
360
|
-
result_set.database do |db|
|
361
|
-
result_db_type = db.database_type
|
362
|
-
end
|
363
|
-
if merged_field.database_type != result_db_type && merged_type.has_key?(:opts)
|
364
|
-
new_opts = merged_type[:opts].reject { |k, v| k == :collate }
|
365
|
-
merged_type = merged_type.merge(:opts => new_opts)
|
366
|
-
end
|
367
|
-
|
368
|
-
col = [merged_field.name, merged_type[:type], merged_type[:opts] || {}]
|
369
|
-
schema << col
|
370
|
-
end
|
371
|
-
|
372
|
-
schema
|
31
|
+
def matcher
|
32
|
+
Matcher.new(@comparators, @result_set.score_set, @algorithm || :mean, @threshold || 0.5)
|
373
33
|
end
|
374
34
|
|
375
|
-
def
|
376
|
-
|
377
|
-
|
378
|
-
# add id
|
379
|
-
schema << [:id, Integer, {:primary_key => true}]
|
380
|
-
|
381
|
-
# add comparator id
|
382
|
-
schema << [:comparator_id, Integer, {}]
|
383
|
-
|
384
|
-
# add record ids
|
385
|
-
pk = dataset_1.field_set.primary_key
|
386
|
-
ruby_type = pk.ruby_type
|
387
|
-
schema << [:record_1_id, ruby_type[:type], ruby_type[:opts] || {}]
|
388
|
-
|
389
|
-
pk = dataset_2.field_set.primary_key
|
390
|
-
ruby_type = pk.ruby_type
|
391
|
-
schema << [:record_2_id, ruby_type[:type], ruby_type[:opts] || {}]
|
392
|
-
|
393
|
-
# add score
|
394
|
-
schema << [:score, Integer, {}]
|
395
|
-
|
396
|
-
schema
|
35
|
+
def match_recorder(matcher)
|
36
|
+
MatchRecorder.new(matcher, @result_set.match_set)
|
397
37
|
end
|
398
38
|
|
399
|
-
def
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
schema << [:id, Integer, {:primary_key => true}]
|
404
|
-
|
405
|
-
# add record ids
|
406
|
-
pk = dataset_1.field_set.primary_key
|
407
|
-
ruby_type = pk.ruby_type
|
408
|
-
schema << [:record_1_id, ruby_type[:type], ruby_type[:opts] || {}]
|
409
|
-
|
410
|
-
pk = dataset_2.field_set.primary_key
|
411
|
-
ruby_type = pk.ruby_type
|
412
|
-
schema << [:record_2_id, ruby_type[:type], ruby_type[:opts] || {}]
|
413
|
-
|
414
|
-
# add score
|
415
|
-
schema << [:total_score, Integer, {}]
|
416
|
-
|
417
|
-
schema
|
418
|
-
end
|
419
|
-
|
420
|
-
def add_simple_expectation(expectation)
|
421
|
-
@simple_expectations << expectation
|
422
|
-
@decollation_needed ||= decollation_needed_for_simple_expectation?(expectation)
|
423
|
-
expectation
|
424
|
-
end
|
425
|
-
|
426
|
-
def add_exhaustive_expectation(expectation)
|
427
|
-
@exhaustive_expectations << expectation
|
428
|
-
expectation
|
429
|
-
end
|
430
|
-
|
431
|
-
def result_set
|
432
|
-
@result_set ||= ResultSet.new(self)
|
433
|
-
end
|
434
|
-
|
435
|
-
def datasets_with_applied_simple_expectations
|
436
|
-
dataset_1 = @dataset_1
|
437
|
-
dataset_2 = @dataset_2
|
438
|
-
@simple_expectations.each do |exp|
|
439
|
-
dataset_1 = exp.apply_to(dataset_1, :lhs)
|
440
|
-
dataset_2 = exp.apply_to(dataset_2, :rhs) if @linkage_type != :self
|
39
|
+
def method_missing(name, *args, &block)
|
40
|
+
klass = Comparator[name.to_s]
|
41
|
+
if klass.nil?
|
42
|
+
raise "unknown comparator: #{name}"
|
441
43
|
end
|
442
|
-
@linkage_type == :self ? [dataset_1, dataset_1] : [dataset_1, dataset_2]
|
443
|
-
end
|
444
44
|
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
dataset_1 = dataset_1.select(dataset_1.field_set.primary_key.to_expr)
|
451
|
-
dataset_2 = dataset_2.select(dataset_2.field_set.primary_key.to_expr)
|
452
|
-
@exhaustive_expectations.each do |exp|
|
453
|
-
dataset_1 = exp.apply_to(dataset_1, :lhs)
|
454
|
-
dataset_2 = exp.apply_to(dataset_2, :rhs)
|
45
|
+
set_1 = args[0]
|
46
|
+
if set_1.is_a?(Array)
|
47
|
+
set_1 = fields_for(dataset_1, *set_1)
|
48
|
+
else
|
49
|
+
set_1 = fields_for(dataset_1, set_1).first
|
455
50
|
end
|
456
|
-
[
|
457
|
-
end
|
458
|
-
|
459
|
-
def groups_table_needed?
|
460
|
-
has_simple_expectations?
|
461
|
-
end
|
462
|
-
|
463
|
-
def scores_table_needed?
|
464
|
-
has_exhaustive_expectations?
|
465
|
-
end
|
51
|
+
args[0] = set_1
|
466
52
|
|
467
|
-
|
468
|
-
|
469
|
-
|
53
|
+
set_2 = args[1]
|
54
|
+
if set_2.is_a?(Array)
|
55
|
+
set_2 = fields_for(dataset_2 || dataset_1, *set_2)
|
56
|
+
else
|
57
|
+
set_2 = fields_for(dataset_2 || dataset_1, set_2).first
|
58
|
+
end
|
59
|
+
args[1] = set_2
|
470
60
|
|
471
|
-
|
472
|
-
|
61
|
+
comparator = klass.new(*args, &block)
|
62
|
+
@comparators << comparator
|
473
63
|
end
|
474
64
|
|
475
|
-
|
65
|
+
protected
|
476
66
|
|
477
|
-
def
|
478
|
-
|
479
|
-
|
480
|
-
elsif results_uri && expectation.kind != :filter
|
481
|
-
result_set_database_type = ResultSet.new(self).database.database_type
|
482
|
-
database_types_differ =
|
483
|
-
result_set_database_type != dataset_1.database_type ||
|
484
|
-
result_set_database_type != dataset_2.database_type
|
485
|
-
|
486
|
-
merged_field = expectation.merged_field
|
487
|
-
merged_field.ruby_type[:type] == String &&
|
488
|
-
!merged_field.collation.nil? && database_types_differ
|
489
|
-
else
|
490
|
-
false
|
491
|
-
end
|
67
|
+
def fields_for(dataset, *args)
|
68
|
+
field_set = dataset.field_set
|
69
|
+
args.collect { |name| field_set[name] }
|
492
70
|
end
|
493
71
|
end
|
494
72
|
end
|