eco-helpers 2.0.19 → 2.0.21
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -1
- data/eco-helpers.gemspec +5 -1
- data/lib/eco/api/common/loaders/parser.rb +1 -0
- data/lib/eco/api/common/people/entries.rb +1 -0
- data/lib/eco/api/common/people/entry_factory.rb +49 -15
- data/lib/eco/api/common/version_patches/exception.rb +5 -2
- data/lib/eco/api/organization/people.rb +2 -2
- data/lib/eco/api/organization/people_similarity.rb +171 -11
- data/lib/eco/api/organization/tag_tree.rb +33 -0
- data/lib/eco/api/session.rb +4 -2
- data/lib/eco/api/usecases/default_cases.rb +1 -0
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +189 -19
- data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
- data/lib/eco/cli/config/default/options.rb +29 -1
- data/lib/eco/cli/config/default/people.rb +18 -24
- data/lib/eco/cli/config/default/usecases.rb +31 -2
- data/lib/eco/cli/config/default/workflow.rb +7 -5
- data/lib/eco/csv/table.rb +121 -21
- data/lib/eco/data/fuzzy_match.rb +52 -12
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +13 -9
- data/lib/eco/data/fuzzy_match/pairing.rb +12 -18
- data/lib/eco/data/fuzzy_match/result.rb +15 -1
- data/lib/eco/data/fuzzy_match/results.rb +18 -0
- data/lib/eco/data/fuzzy_match/score.rb +12 -7
- data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
- data/lib/eco/version.rb +1 -1
- metadata +83 -2
@@ -2,9 +2,27 @@ module Eco
|
|
2
2
|
module Data
|
3
3
|
module FuzzyMatch
|
4
4
|
class Results < Struct.new(:needle, :value, :raw_results)
|
5
|
+
include Enumerable
|
5
6
|
|
6
7
|
attr_accessor :threshold
|
7
8
|
|
9
|
+
def empty?
|
10
|
+
count < 1
|
11
|
+
end
|
12
|
+
|
13
|
+
def each(&block)
|
14
|
+
return to_enum(:each) unless block
|
15
|
+
raw_results.each(&block)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Merges the results of both Results object
|
19
|
+
def merge(res)
|
20
|
+
unless self.needle == res.needle
|
21
|
+
raise "To merge 2 Results, needle should be the same ('#{value}'). Given '#{res.value}'"
|
22
|
+
end
|
23
|
+
self.class.new(needle, value, raw_results.concat(res.raw_results))
|
24
|
+
end
|
25
|
+
|
8
26
|
def results_with_false_positives
|
9
27
|
relevant_results(order: :jaro_winkler, threshold: 0.5)
|
10
28
|
end
|
@@ -4,7 +4,10 @@ module Eco
|
|
4
4
|
class Score < Struct.new(:score, :total)
|
5
5
|
|
6
6
|
def ratio(decimals = 6)
|
7
|
-
|
7
|
+
tot = self.total; sc = self.score
|
8
|
+
tot = tot && tot > 0 ? tot : 1
|
9
|
+
sc = sc && sc > 0 ? sc : 0
|
10
|
+
(sc.to_f / tot).round(decimals)
|
8
11
|
end
|
9
12
|
|
10
13
|
def percent(decimals = 3)
|
@@ -13,6 +16,8 @@ module Eco
|
|
13
16
|
|
14
17
|
def increase(value = 1)
|
15
18
|
self.score += value
|
19
|
+
raise "Score #{self.score} (increase: #{value}) can't be greater than total #{self.total}" if self.score > self.total
|
20
|
+
self.score
|
16
21
|
end
|
17
22
|
|
18
23
|
def increase_total(value)
|
@@ -26,14 +31,14 @@ module Eco
|
|
26
31
|
end
|
27
32
|
|
28
33
|
# Merges 2 Score instance objects
|
29
|
-
def merge(
|
30
|
-
Score.new(*values_at(:score, :total)).merge!(
|
34
|
+
def merge(scr)
|
35
|
+
Score.new(*values_at(:score, :total)).merge!(scr)
|
31
36
|
end
|
32
37
|
|
33
|
-
def merge!(
|
34
|
-
raise "Expecting Score object. Given: #{
|
35
|
-
|
36
|
-
|
38
|
+
def merge!(scr)
|
39
|
+
raise "Expecting Score object. Given: #{scr.class}" unless scr.is_a?(Score)
|
40
|
+
increase_total(scr.total)
|
41
|
+
increase(scr.score)
|
37
42
|
self
|
38
43
|
end
|
39
44
|
|
@@ -17,7 +17,7 @@ module Eco
|
|
17
17
|
def get_words(str, normalized: false)
|
18
18
|
return [] unless str
|
19
19
|
str = normalize_string(str) unless normalized
|
20
|
-
str.scan(/[a-zA-Z'-]+/)
|
20
|
+
str.scan(/[a-zA-Z'-]+/).compact
|
21
21
|
end
|
22
22
|
|
23
23
|
# Keeps the start order of the `words` and consecutive `words` together/consecutive.
|
@@ -63,6 +63,19 @@ module Eco
|
|
63
63
|
str.tr(' ', '')
|
64
64
|
end
|
65
65
|
|
66
|
+
# Deletes the words of `str1` and `str2` that match
|
67
|
+
# @return [Array<String>] pair of words.
|
68
|
+
def remove_matching_words(str1, str2, normalized: false)
|
69
|
+
unless normalized
|
70
|
+
str1 = normalize_string(str1)
|
71
|
+
str2 = normalize_string(str2)
|
72
|
+
end
|
73
|
+
return [str1, str2] if !str1 || !str2 || str1.empty? || str2.empty?
|
74
|
+
ws1 = get_words(str1)
|
75
|
+
ws2 = get_words(str2)
|
76
|
+
[(ws1 - ws2).join(" "), (ws2 - ws1).join(" ")]
|
77
|
+
end
|
78
|
+
|
66
79
|
end
|
67
80
|
end
|
68
81
|
end
|
data/lib/eco/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: eco-helpers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.21
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Oscar Segura
|
@@ -230,6 +230,26 @@ dependencies:
|
|
230
230
|
- - "<"
|
231
231
|
- !ruby/object:Gem::Version
|
232
232
|
version: '3.1'
|
233
|
+
- !ruby/object:Gem::Dependency
|
234
|
+
name: hashdiff
|
235
|
+
requirement: !ruby/object:Gem::Requirement
|
236
|
+
requirements:
|
237
|
+
- - ">="
|
238
|
+
- !ruby/object:Gem::Version
|
239
|
+
version: 1.0.1
|
240
|
+
- - "<"
|
241
|
+
- !ruby/object:Gem::Version
|
242
|
+
version: '1.1'
|
243
|
+
type: :runtime
|
244
|
+
prerelease: false
|
245
|
+
version_requirements: !ruby/object:Gem::Requirement
|
246
|
+
requirements:
|
247
|
+
- - ">="
|
248
|
+
- !ruby/object:Gem::Version
|
249
|
+
version: 1.0.1
|
250
|
+
- - "<"
|
251
|
+
- !ruby/object:Gem::Version
|
252
|
+
version: '1.1'
|
233
253
|
- !ruby/object:Gem::Dependency
|
234
254
|
name: fuzzy_match
|
235
255
|
requirement: !ruby/object:Gem::Requirement
|
@@ -290,6 +310,66 @@ dependencies:
|
|
290
310
|
- - "<"
|
291
311
|
- !ruby/object:Gem::Version
|
292
312
|
version: '1.6'
|
313
|
+
- !ruby/object:Gem::Dependency
|
314
|
+
name: roo
|
315
|
+
requirement: !ruby/object:Gem::Requirement
|
316
|
+
requirements:
|
317
|
+
- - ">="
|
318
|
+
- !ruby/object:Gem::Version
|
319
|
+
version: 2.8.3
|
320
|
+
- - "<"
|
321
|
+
- !ruby/object:Gem::Version
|
322
|
+
version: '2.9'
|
323
|
+
type: :runtime
|
324
|
+
prerelease: false
|
325
|
+
version_requirements: !ruby/object:Gem::Requirement
|
326
|
+
requirements:
|
327
|
+
- - ">="
|
328
|
+
- !ruby/object:Gem::Version
|
329
|
+
version: 2.8.3
|
330
|
+
- - "<"
|
331
|
+
- !ruby/object:Gem::Version
|
332
|
+
version: '2.9'
|
333
|
+
- !ruby/object:Gem::Dependency
|
334
|
+
name: roo-xls
|
335
|
+
requirement: !ruby/object:Gem::Requirement
|
336
|
+
requirements:
|
337
|
+
- - ">="
|
338
|
+
- !ruby/object:Gem::Version
|
339
|
+
version: 1.2.0
|
340
|
+
- - "<"
|
341
|
+
- !ruby/object:Gem::Version
|
342
|
+
version: '1.3'
|
343
|
+
type: :runtime
|
344
|
+
prerelease: false
|
345
|
+
version_requirements: !ruby/object:Gem::Requirement
|
346
|
+
requirements:
|
347
|
+
- - ">="
|
348
|
+
- !ruby/object:Gem::Version
|
349
|
+
version: 1.2.0
|
350
|
+
- - "<"
|
351
|
+
- !ruby/object:Gem::Version
|
352
|
+
version: '1.3'
|
353
|
+
- !ruby/object:Gem::Dependency
|
354
|
+
name: creek
|
355
|
+
requirement: !ruby/object:Gem::Requirement
|
356
|
+
requirements:
|
357
|
+
- - ">="
|
358
|
+
- !ruby/object:Gem::Version
|
359
|
+
version: 2.5.2
|
360
|
+
- - "<"
|
361
|
+
- !ruby/object:Gem::Version
|
362
|
+
version: '2.6'
|
363
|
+
type: :runtime
|
364
|
+
prerelease: false
|
365
|
+
version_requirements: !ruby/object:Gem::Requirement
|
366
|
+
requirements:
|
367
|
+
- - ">="
|
368
|
+
- !ruby/object:Gem::Version
|
369
|
+
version: 2.5.2
|
370
|
+
- - "<"
|
371
|
+
- !ruby/object:Gem::Version
|
372
|
+
version: '2.6'
|
293
373
|
description:
|
294
374
|
email:
|
295
375
|
- oscar@ecoportal.co.nz
|
@@ -443,6 +523,7 @@ files:
|
|
443
523
|
- lib/eco/api/usecases/default_cases/analyse_people_case.rb
|
444
524
|
- lib/eco/api/usecases/default_cases/append_usergroups_case.rb
|
445
525
|
- lib/eco/api/usecases/default_cases/change_email_case.rb
|
526
|
+
- lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb
|
446
527
|
- lib/eco/api/usecases/default_cases/codes_to_tags_case.rb
|
447
528
|
- lib/eco/api/usecases/default_cases/create_case.rb
|
448
529
|
- lib/eco/api/usecases/default_cases/create_details_case.rb
|
@@ -547,7 +628,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
547
628
|
requirements:
|
548
629
|
- - ">="
|
549
630
|
- !ruby/object:Gem::Version
|
550
|
-
version: 2.
|
631
|
+
version: 2.5.0
|
551
632
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
552
633
|
requirements:
|
553
634
|
- - ">="
|