eco-helpers 2.0.19 → 2.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -1
- data/eco-helpers.gemspec +5 -1
- data/lib/eco/api/common/loaders/parser.rb +1 -0
- data/lib/eco/api/common/people/entries.rb +1 -0
- data/lib/eco/api/common/people/entry_factory.rb +49 -15
- data/lib/eco/api/common/version_patches/exception.rb +5 -2
- data/lib/eco/api/organization/people.rb +2 -2
- data/lib/eco/api/organization/people_similarity.rb +171 -11
- data/lib/eco/api/organization/tag_tree.rb +33 -0
- data/lib/eco/api/session.rb +4 -2
- data/lib/eco/api/usecases/default_cases.rb +1 -0
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +189 -19
- data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
- data/lib/eco/cli/config/default/options.rb +29 -1
- data/lib/eco/cli/config/default/people.rb +18 -24
- data/lib/eco/cli/config/default/usecases.rb +31 -2
- data/lib/eco/cli/config/default/workflow.rb +7 -5
- data/lib/eco/csv/table.rb +121 -21
- data/lib/eco/data/fuzzy_match.rb +52 -12
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +13 -9
- data/lib/eco/data/fuzzy_match/pairing.rb +12 -18
- data/lib/eco/data/fuzzy_match/result.rb +15 -1
- data/lib/eco/data/fuzzy_match/results.rb +18 -0
- data/lib/eco/data/fuzzy_match/score.rb +12 -7
- data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
- data/lib/eco/version.rb +1 -1
- metadata +83 -2
@@ -2,9 +2,27 @@ module Eco
|
|
2
2
|
module Data
|
3
3
|
module FuzzyMatch
|
4
4
|
class Results < Struct.new(:needle, :value, :raw_results)
|
5
|
+
include Enumerable
|
5
6
|
|
6
7
|
attr_accessor :threshold
|
7
8
|
|
9
|
+
def empty?
|
10
|
+
count < 1
|
11
|
+
end
|
12
|
+
|
13
|
+
def each(&block)
|
14
|
+
return to_enum(:each) unless block
|
15
|
+
raw_results.each(&block)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Merges the results of both Results object
|
19
|
+
def merge(res)
|
20
|
+
unless self.needle == res.needle
|
21
|
+
raise "To merge 2 Results, needle should be the same ('#{value}'). Given '#{res.value}'"
|
22
|
+
end
|
23
|
+
self.class.new(needle, value, raw_results.concat(res.raw_results))
|
24
|
+
end
|
25
|
+
|
8
26
|
def results_with_false_positives
|
9
27
|
relevant_results(order: :jaro_winkler, threshold: 0.5)
|
10
28
|
end
|
@@ -4,7 +4,10 @@ module Eco
|
|
4
4
|
class Score < Struct.new(:score, :total)
|
5
5
|
|
6
6
|
def ratio(decimals = 6)
|
7
|
-
|
7
|
+
tot = self.total; sc = self.score
|
8
|
+
tot = tot && tot > 0 ? tot : 1
|
9
|
+
sc = sc && sc > 0 ? sc : 0
|
10
|
+
(sc.to_f / tot).round(decimals)
|
8
11
|
end
|
9
12
|
|
10
13
|
def percent(decimals = 3)
|
@@ -13,6 +16,8 @@ module Eco
|
|
13
16
|
|
14
17
|
def increase(value = 1)
|
15
18
|
self.score += value
|
19
|
+
raise "Score #{self.score} (increase: #{value}) can't be greater than total #{self.total}" if self.score > self.total
|
20
|
+
self.score
|
16
21
|
end
|
17
22
|
|
18
23
|
def increase_total(value)
|
@@ -26,14 +31,14 @@ module Eco
|
|
26
31
|
end
|
27
32
|
|
28
33
|
# Merges 2 Score instance objects
|
29
|
-
def merge(
|
30
|
-
Score.new(*values_at(:score, :total)).merge!(
|
34
|
+
def merge(scr)
|
35
|
+
Score.new(*values_at(:score, :total)).merge!(scr)
|
31
36
|
end
|
32
37
|
|
33
|
-
def merge!(
|
34
|
-
raise "Expecting Score object. Given: #{
|
35
|
-
|
36
|
-
|
38
|
+
def merge!(scr)
|
39
|
+
raise "Expecting Score object. Given: #{scr.class}" unless scr.is_a?(Score)
|
40
|
+
increase_total(scr.total)
|
41
|
+
increase(scr.score)
|
37
42
|
self
|
38
43
|
end
|
39
44
|
|
@@ -17,7 +17,7 @@ module Eco
|
|
17
17
|
def get_words(str, normalized: false)
|
18
18
|
return [] unless str
|
19
19
|
str = normalize_string(str) unless normalized
|
20
|
-
str.scan(/[a-zA-Z'-]+/)
|
20
|
+
str.scan(/[a-zA-Z'-]+/).compact
|
21
21
|
end
|
22
22
|
|
23
23
|
# Keeps the start order of the `words` and consecutive `words` together/consecutive.
|
@@ -63,6 +63,19 @@ module Eco
|
|
63
63
|
str.tr(' ', '')
|
64
64
|
end
|
65
65
|
|
66
|
+
# Deletes the words of `str1` and `str2` that match
|
67
|
+
# @return [Array<String>] pair of words.
|
68
|
+
def remove_matching_words(str1, str2, normalized: false)
|
69
|
+
unless normalized
|
70
|
+
str1 = normalize_string(str1)
|
71
|
+
str2 = normalize_string(str2)
|
72
|
+
end
|
73
|
+
return [str1, str2] if !str1 || !str2 || str1.empty? || str2.empty?
|
74
|
+
ws1 = get_words(str1)
|
75
|
+
ws2 = get_words(str2)
|
76
|
+
[(ws1 - ws2).join(" "), (ws2 - ws1).join(" ")]
|
77
|
+
end
|
78
|
+
|
66
79
|
end
|
67
80
|
end
|
68
81
|
end
|
data/lib/eco/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: eco-helpers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.21
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Oscar Segura
|
@@ -230,6 +230,26 @@ dependencies:
|
|
230
230
|
- - "<"
|
231
231
|
- !ruby/object:Gem::Version
|
232
232
|
version: '3.1'
|
233
|
+
- !ruby/object:Gem::Dependency
|
234
|
+
name: hashdiff
|
235
|
+
requirement: !ruby/object:Gem::Requirement
|
236
|
+
requirements:
|
237
|
+
- - ">="
|
238
|
+
- !ruby/object:Gem::Version
|
239
|
+
version: 1.0.1
|
240
|
+
- - "<"
|
241
|
+
- !ruby/object:Gem::Version
|
242
|
+
version: '1.1'
|
243
|
+
type: :runtime
|
244
|
+
prerelease: false
|
245
|
+
version_requirements: !ruby/object:Gem::Requirement
|
246
|
+
requirements:
|
247
|
+
- - ">="
|
248
|
+
- !ruby/object:Gem::Version
|
249
|
+
version: 1.0.1
|
250
|
+
- - "<"
|
251
|
+
- !ruby/object:Gem::Version
|
252
|
+
version: '1.1'
|
233
253
|
- !ruby/object:Gem::Dependency
|
234
254
|
name: fuzzy_match
|
235
255
|
requirement: !ruby/object:Gem::Requirement
|
@@ -290,6 +310,66 @@ dependencies:
|
|
290
310
|
- - "<"
|
291
311
|
- !ruby/object:Gem::Version
|
292
312
|
version: '1.6'
|
313
|
+
- !ruby/object:Gem::Dependency
|
314
|
+
name: roo
|
315
|
+
requirement: !ruby/object:Gem::Requirement
|
316
|
+
requirements:
|
317
|
+
- - ">="
|
318
|
+
- !ruby/object:Gem::Version
|
319
|
+
version: 2.8.3
|
320
|
+
- - "<"
|
321
|
+
- !ruby/object:Gem::Version
|
322
|
+
version: '2.9'
|
323
|
+
type: :runtime
|
324
|
+
prerelease: false
|
325
|
+
version_requirements: !ruby/object:Gem::Requirement
|
326
|
+
requirements:
|
327
|
+
- - ">="
|
328
|
+
- !ruby/object:Gem::Version
|
329
|
+
version: 2.8.3
|
330
|
+
- - "<"
|
331
|
+
- !ruby/object:Gem::Version
|
332
|
+
version: '2.9'
|
333
|
+
- !ruby/object:Gem::Dependency
|
334
|
+
name: roo-xls
|
335
|
+
requirement: !ruby/object:Gem::Requirement
|
336
|
+
requirements:
|
337
|
+
- - ">="
|
338
|
+
- !ruby/object:Gem::Version
|
339
|
+
version: 1.2.0
|
340
|
+
- - "<"
|
341
|
+
- !ruby/object:Gem::Version
|
342
|
+
version: '1.3'
|
343
|
+
type: :runtime
|
344
|
+
prerelease: false
|
345
|
+
version_requirements: !ruby/object:Gem::Requirement
|
346
|
+
requirements:
|
347
|
+
- - ">="
|
348
|
+
- !ruby/object:Gem::Version
|
349
|
+
version: 1.2.0
|
350
|
+
- - "<"
|
351
|
+
- !ruby/object:Gem::Version
|
352
|
+
version: '1.3'
|
353
|
+
- !ruby/object:Gem::Dependency
|
354
|
+
name: creek
|
355
|
+
requirement: !ruby/object:Gem::Requirement
|
356
|
+
requirements:
|
357
|
+
- - ">="
|
358
|
+
- !ruby/object:Gem::Version
|
359
|
+
version: 2.5.2
|
360
|
+
- - "<"
|
361
|
+
- !ruby/object:Gem::Version
|
362
|
+
version: '2.6'
|
363
|
+
type: :runtime
|
364
|
+
prerelease: false
|
365
|
+
version_requirements: !ruby/object:Gem::Requirement
|
366
|
+
requirements:
|
367
|
+
- - ">="
|
368
|
+
- !ruby/object:Gem::Version
|
369
|
+
version: 2.5.2
|
370
|
+
- - "<"
|
371
|
+
- !ruby/object:Gem::Version
|
372
|
+
version: '2.6'
|
293
373
|
description:
|
294
374
|
email:
|
295
375
|
- oscar@ecoportal.co.nz
|
@@ -443,6 +523,7 @@ files:
|
|
443
523
|
- lib/eco/api/usecases/default_cases/analyse_people_case.rb
|
444
524
|
- lib/eco/api/usecases/default_cases/append_usergroups_case.rb
|
445
525
|
- lib/eco/api/usecases/default_cases/change_email_case.rb
|
526
|
+
- lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb
|
446
527
|
- lib/eco/api/usecases/default_cases/codes_to_tags_case.rb
|
447
528
|
- lib/eco/api/usecases/default_cases/create_case.rb
|
448
529
|
- lib/eco/api/usecases/default_cases/create_details_case.rb
|
@@ -547,7 +628,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
547
628
|
requirements:
|
548
629
|
- - ">="
|
549
630
|
- !ruby/object:Gem::Version
|
550
|
-
version: 2.
|
631
|
+
version: 2.5.0
|
551
632
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
552
633
|
requirements:
|
553
634
|
- - ">="
|