eco-helpers 2.0.17 → 2.0.23
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +85 -1
- data/eco-helpers.gemspec +4 -1
- data/lib/eco-helpers.rb +1 -0
- data/lib/eco/api/common/base_loader.rb +9 -5
- data/lib/eco/api/common/loaders/parser.rb +1 -0
- data/lib/eco/api/common/people/default_parsers.rb +1 -0
- data/lib/eco/api/common/people/default_parsers/xls_parser.rb +53 -0
- data/lib/eco/api/common/people/entries.rb +1 -0
- data/lib/eco/api/common/people/entry_factory.rb +88 -23
- data/lib/eco/api/common/people/person_entry.rb +1 -0
- data/lib/eco/api/common/people/person_parser.rb +1 -1
- data/lib/eco/api/common/session.rb +1 -0
- data/lib/eco/api/common/session/base_session.rb +2 -0
- data/lib/eco/api/common/session/helpers.rb +30 -0
- data/lib/eco/api/common/session/helpers/prompt_user.rb +34 -0
- data/lib/eco/api/common/version_patches/ecoportal_api/external_person.rb +1 -1
- data/lib/eco/api/common/version_patches/ecoportal_api/internal_person.rb +7 -4
- data/lib/eco/api/common/version_patches/exception.rb +11 -4
- data/lib/eco/api/microcases/with_each.rb +67 -6
- data/lib/eco/api/microcases/with_each_present.rb +4 -2
- data/lib/eco/api/microcases/with_each_starter.rb +4 -2
- data/lib/eco/api/organization.rb +1 -1
- data/lib/eco/api/organization/people.rb +94 -25
- data/lib/eco/api/organization/people_similarity.rb +272 -0
- data/lib/eco/api/organization/person_schemas.rb +5 -1
- data/lib/eco/api/organization/policy_groups.rb +5 -1
- data/lib/eco/api/organization/tag_tree.rb +33 -0
- data/lib/eco/api/session.rb +19 -8
- data/lib/eco/api/session/batch.rb +7 -5
- data/lib/eco/api/session/batch/job.rb +27 -8
- data/lib/eco/api/session/config/apis.rb +80 -14
- data/lib/eco/api/usecases.rb +2 -2
- data/lib/eco/api/usecases/base_case.rb +2 -2
- data/lib/eco/api/usecases/base_io.rb +17 -4
- data/lib/eco/api/usecases/default_cases.rb +1 -0
- data/lib/eco/api/usecases/default_cases/abstract_policygroup_abilities_case.rb +3 -3
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +179 -32
- data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
- data/lib/eco/api/usecases/default_cases/to_csv_case.rb +81 -36
- data/lib/eco/api/usecases/default_cases/to_csv_detailed_case.rb +3 -4
- data/lib/eco/api/usecases/ooze_samples/ooze_update_case.rb +3 -2
- data/lib/eco/cli/config/default/input.rb +61 -8
- data/lib/eco/cli/config/default/options.rb +36 -2
- data/lib/eco/cli/config/default/people.rb +18 -24
- data/lib/eco/cli/config/default/usecases.rb +33 -2
- data/lib/eco/cli/config/default/workflow.rb +21 -12
- data/lib/eco/cli/scripting/args_helpers.rb +2 -2
- data/lib/eco/csv.rb +4 -2
- data/lib/eco/csv/table.rb +121 -21
- data/lib/eco/data/fuzzy_match.rb +109 -27
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +19 -10
- data/lib/eco/data/fuzzy_match/pairing.rb +12 -19
- data/lib/eco/data/fuzzy_match/result.rb +22 -2
- data/lib/eco/data/fuzzy_match/results.rb +30 -6
- data/lib/eco/data/fuzzy_match/score.rb +12 -7
- data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
- data/lib/eco/version.rb +1 -1
- metadata +67 -3
- data/lib/eco/api/organization/people_analytics.rb +0 -60
@@ -11,6 +11,8 @@ module Eco
|
|
11
11
|
attr_reader :api, :file_manager, :logger
|
12
12
|
alias_method :fm, :file_manager
|
13
13
|
|
14
|
+
include Session::Helpers
|
15
|
+
|
14
16
|
def initialize(e)
|
15
17
|
raise "Expected object Eco::API::Common::Session::Environment. Given: #{e.class}" unless e.is_a?(Environment)
|
16
18
|
self.environment = e
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require_relative 'helpers/prompt_user'
|
2
|
+
|
3
|
+
module Eco
|
4
|
+
module API
|
5
|
+
module Common
|
6
|
+
module Session
|
7
|
+
module Helpers
|
8
|
+
|
9
|
+
class << self
|
10
|
+
def included(base)
|
11
|
+
base.send(:include, InstanceMethods)
|
12
|
+
base.extend(ClassMethods)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
module ClassMethods
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
module InstanceMethods
|
22
|
+
include Helpers::PromptUser
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'timeout'
|
2
|
+
module Eco
|
3
|
+
module API
|
4
|
+
module Common
|
5
|
+
module Session
|
6
|
+
module Helpers
|
7
|
+
module PromptUser
|
8
|
+
|
9
|
+
def prompt_user(question, default:, explanation: "", timeout: nil)
|
10
|
+
response = if config.run_mode_remote?
|
11
|
+
default
|
12
|
+
else
|
13
|
+
puts explanation
|
14
|
+
print "#{question} "
|
15
|
+
if timeout
|
16
|
+
begin
|
17
|
+
Timeout::timeout(timeout) { STDIN.gets.chop }
|
18
|
+
rescue Timeout::Error
|
19
|
+
default
|
20
|
+
end
|
21
|
+
else
|
22
|
+
STDIN.gets.chop
|
23
|
+
end
|
24
|
+
end
|
25
|
+
return response unless block_given?
|
26
|
+
yield(response)
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -11,12 +11,15 @@ module Ecoportal
|
|
11
11
|
original_doc["account"] = JSON.parse(doc["account"])
|
12
12
|
end
|
13
13
|
|
14
|
-
def new?
|
15
|
-
|
14
|
+
def new?(doc = :initial)
|
15
|
+
ref_doc = (doc == :original) ? original_doc : initial_doc
|
16
|
+
!ref_doc["details"] && !ref_doc["account"]
|
16
17
|
end
|
17
18
|
|
18
|
-
|
19
|
-
|
19
|
+
# @return [Boolean] if the account has been added, compared to `doc`
|
20
|
+
def account_added?(doc = :initial)
|
21
|
+
ref_doc = (doc == :original) ? original_doc : initial_doc
|
22
|
+
account && !ref_doc["account"]
|
20
23
|
end
|
21
24
|
|
22
25
|
end
|
@@ -1,9 +1,16 @@
|
|
1
1
|
class ::Exception
|
2
2
|
def patch_full_message
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
begin
|
4
|
+
msg = []
|
5
|
+
tracing = backtrace ? backtrace : []
|
6
|
+
tracing = (self.class == SystemStackError) ? tracing[1..30] : tracing[1..-1]
|
7
|
+
tracing ||= []
|
8
|
+
msg << "\n#{tracing.first} \n#{message} (#{self.class.to_s})"
|
9
|
+
tracing.each_with_index {|bt, i| msg << "#{" "*8}#{i+1}: from #{bt}"}
|
10
|
+
msg.join("\n")
|
11
|
+
rescue Exception => e
|
12
|
+
puts "Something is wrong with 'patch_full_message': #{e}"
|
13
|
+
end
|
7
14
|
end
|
8
15
|
end
|
9
16
|
|
@@ -12,16 +12,77 @@ module Eco
|
|
12
12
|
# @yieldparam person [Ecoportal::API::V1::Person] the found person that matches `entry`, or a new person otherwise.
|
13
13
|
# @return [Eco::API::Organization::People] all the people, including new and existing ones.
|
14
14
|
def with_each(entries, people, options)
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
@_skip_all_multiple_results = false
|
16
|
+
entries.each_with_object([]) do |entry, scoped|
|
17
|
+
begin
|
18
|
+
unless person = people.find(entry, strict: micro.strict_search?(options))
|
19
|
+
person = session.new_person
|
20
|
+
end
|
21
|
+
rescue Eco::API::Organization::People::MultipleSearchResults => e
|
22
|
+
unless @_skip_all_multiple_results
|
23
|
+
msg = "\n * When searching this Entry: #{entry.to_s(:identify)}"
|
24
|
+
person = _with_each_prompt_to_select_user(e.append_message(msg), entry: entry)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
if person
|
29
|
+
person.entry = entry
|
30
|
+
yield(entry, person) if block_given?
|
31
|
+
scoped << person
|
18
32
|
end
|
19
|
-
person.entry = entry
|
20
|
-
yield(entry, person) if block_given?
|
21
|
-
person
|
22
33
|
end.yield_self {|all_people| people.newFrom all_people.uniq}
|
23
34
|
end
|
24
35
|
|
36
|
+
private
|
37
|
+
|
38
|
+
def _with_each_prompt_to_select_user(error, entry: nil, increase_count: true)
|
39
|
+
unless error.is_a?(Eco::API::Organization::People::MultipleSearchResults)
|
40
|
+
raise "Expecting Eco::API::Organization::People::MultipleSearchResults. Given: #{error.class}"
|
41
|
+
end
|
42
|
+
@_with_each_prompts = 0 unless instance_variable_defined?(:@_with_each_prompts)
|
43
|
+
@_with_each_prompts += 1 if increase_count
|
44
|
+
|
45
|
+
lines = []
|
46
|
+
lines << "\n(#{@_with_each_prompts}) " + error.to_s + "\n"
|
47
|
+
lines << " #index - Select the correct person by its number index among the list above."
|
48
|
+
lines << " (I) - Just Skip/Ignore this one. I will deal with that input entry in another launch."
|
49
|
+
lines << " (A) - Ignore all the rest of input entries with this problem."
|
50
|
+
lines << " (C) - Create a new person."
|
51
|
+
lines << " (B) - Just break this script. I need to change the input file :/"
|
52
|
+
|
53
|
+
prompt_user("Type one option (#number/I/A/C/B):", explanation: lines.join("\n"), default: "I") do |res|
|
54
|
+
res = res.upcase
|
55
|
+
case
|
56
|
+
when res.start_with?("I")
|
57
|
+
logger.info "Ignoring entry... #{entry.to_s(:identify) if entry}"
|
58
|
+
nil
|
59
|
+
when res.start_with?("A")
|
60
|
+
logger.info "All input entries with this same issue will be ignored for this launch"
|
61
|
+
@_skip_all_multiple_results = true
|
62
|
+
nil
|
63
|
+
when res.start_with?("C")
|
64
|
+
logger.info "Creating new person...#{"for entry #{entry.to_s(:identify)}" if entry}"
|
65
|
+
session.new_person
|
66
|
+
when res.start_with?("B")
|
67
|
+
raise error
|
68
|
+
when res && !res.empty? && (pos = res.to_i rescue nil) && (pos < error.candidates.length)
|
69
|
+
error.candidate(pos).tap do |person|
|
70
|
+
logger.info "Thanks!! You selected #{person.identify}"
|
71
|
+
sleep(1.5)
|
72
|
+
end
|
73
|
+
else
|
74
|
+
if pos.is_a?(Numeric) && (pos >= error.candidates.length)
|
75
|
+
print "#{pos} is not a number in the range. "
|
76
|
+
else
|
77
|
+
print "#{res} is not an option. "
|
78
|
+
end
|
79
|
+
puts "Please select one of the offered options..."
|
80
|
+
sleep(1)
|
81
|
+
_with_each_prompt_to_select_user(error, increase_count: false, entry: entry)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
25
86
|
end
|
26
87
|
end
|
27
88
|
end
|
@@ -15,8 +15,10 @@ module Eco
|
|
15
15
|
def with_each_present(entries, people, options, log_starter: false)
|
16
16
|
found = []
|
17
17
|
micro.with_each(entries, people, options) do |entry, person|
|
18
|
-
if person.new?
|
19
|
-
|
18
|
+
if person.new?
|
19
|
+
if log_starter
|
20
|
+
session.logger.error("This person does not exist: #{entry.to_s(:identify)}")
|
21
|
+
end
|
20
22
|
next
|
21
23
|
end
|
22
24
|
found << person
|
@@ -15,8 +15,10 @@ module Eco
|
|
15
15
|
def with_each_starter(entries, people, options, log_present: false)
|
16
16
|
starters = []
|
17
17
|
micro.with_each(entries, people, options) do |entry, person|
|
18
|
-
if !person.new?
|
19
|
-
|
18
|
+
if !person.new?
|
19
|
+
if log_present
|
20
|
+
session.logger.error("This person (id: '#{person.id}') already exists: #{entry.to_s(:identify)}")
|
21
|
+
end
|
20
22
|
next
|
21
23
|
end
|
22
24
|
starters << person
|
data/lib/eco/api/organization.rb
CHANGED
@@ -9,7 +9,7 @@ require_relative 'organization/tag_tree'
|
|
9
9
|
require_relative 'organization/presets_factory'
|
10
10
|
require_relative 'organization/preferences'
|
11
11
|
require_relative 'organization/people'
|
12
|
-
require_relative 'organization/
|
12
|
+
require_relative 'organization/people_similarity'
|
13
13
|
require_relative 'organization/person_schemas'
|
14
14
|
require_relative 'organization/policy_groups'
|
15
15
|
require_relative 'organization/login_providers'
|
@@ -2,6 +2,43 @@ module Eco
|
|
2
2
|
module API
|
3
3
|
module Organization
|
4
4
|
class People < Eco::Language::Models::Collection
|
5
|
+
# Error class that allows to handle cases where multiple people were found for the same criterion.
|
6
|
+
# @note its main purpose to prevent the creation of duplicates or override information between different people.
|
7
|
+
class MultipleSearchResults < StandardError
|
8
|
+
attr_reader :candidates, :property
|
9
|
+
# @param msg [String] the basic message error.
|
10
|
+
# @param candiates [Array<Person>] the people that match the same search criterion.
|
11
|
+
# @param property [String] the property of the person model that triggered the error (base of the search criterion).
|
12
|
+
def initialize(msg, candidates: [], property: "email")
|
13
|
+
@candidates = candidates
|
14
|
+
@property = property
|
15
|
+
super(msg + " " + candidates_summary)
|
16
|
+
end
|
17
|
+
|
18
|
+
# @param with_index [Boolean] to add an index to each candidate description.
|
19
|
+
# @return [Array<String>] the `candidates` identified
|
20
|
+
def identify_candidates(with_index: false)
|
21
|
+
candidates.map.each_with_index do |person, i|
|
22
|
+
index = with_index ? "#{i}. " : ""
|
23
|
+
msg = person.account ? (person.account_added? ? "(new user)" : "(user)") : "(no account)"
|
24
|
+
"#{index}#{msg} #{person.identify}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# @return [Person] the `candidate` in the `index` position
|
29
|
+
def candidate(index)
|
30
|
+
candidates[index]
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def candidates_summary
|
36
|
+
lines = ["The following people have the same '#{property}':"]
|
37
|
+
lines.concat(identify_candidates(with_index: true)).join("\n ")
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
5
42
|
# build the shortcuts of Collection
|
6
43
|
attr_presence :account, :details
|
7
44
|
attr_collection :id, :external_id, :email, :name, :supervisor_id
|
@@ -78,34 +115,36 @@ module Eco
|
|
78
115
|
# @!group Searchers
|
79
116
|
|
80
117
|
# It searches a person using the parameters given.
|
118
|
+
# @note This is how the search function actually works:
|
119
|
+
# 1. if eP `id` is given, returns the person (if found), otherwise...
|
120
|
+
# 2. if `external_id` is given, returns the person (if found), otherwise...
|
121
|
+
# 3. if `strict` is `false` and `email` is given:
|
122
|
+
# - if there is only 1 person with that email, returns that person, otherwise...
|
123
|
+
# - if found but, there are many candidates, it raises MultipleSearchResults error
|
124
|
+
# - if person `external_id` matches `email`, returns that person
|
125
|
+
# @raise MultipleSearchResults if there are multiple people with the same `email`
|
126
|
+
# and there's no other criteria to find the person. It only gets to this point if
|
127
|
+
# `external_id` was **not** provided and we are **not** in 'strict' search mode.
|
128
|
+
# However, it could be we were in `strict` mode and `external_id` was not provided.
|
81
129
|
# @param id [String] the `internal id` of the person
|
82
130
|
# @param external_id [String] the `exernal_id` of the person
|
83
131
|
# @param email [String] the `email` of the person
|
84
|
-
# @param strict [Boolean] if should perform a
|
132
|
+
# @param strict [Boolean] if should perform a `:soft` or a `:strict` search. `strict` will avoid repeated email addresses.
|
85
133
|
# @return [Person, nil] the person we were searching, or `nil` if not found.
|
86
134
|
def person(id: nil, external_id: nil, email: nil, strict: false)
|
87
135
|
init_caches
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
if !pers && !strict && !email.to_s.strip.empty?
|
98
|
-
candidates = @by_non_users_email[email&.downcase.strip] || []
|
99
|
-
raise "Too many non-user candidates (#{candidates.length}) with email '#{email}'" if candidates.length > 1
|
100
|
-
pers = candidates.first
|
101
|
-
end
|
102
|
-
|
103
|
-
pers = @by_external_id[email&.downcase.strip]&.first if !pers && !email.to_s.strip.empty?
|
104
|
-
end
|
105
|
-
|
136
|
+
# normalize values
|
137
|
+
ext_id = !external_id.to_s.strip.empty? && external_id.strip
|
138
|
+
email = !email.to_s.strip.empty? && email.downcase.strip
|
139
|
+
|
140
|
+
pers = nil
|
141
|
+
pers ||= @by_id[id]&.first
|
142
|
+
pers ||= @by_external_id[ext_id]&.first
|
143
|
+
pers ||= person_by_email(email) unless strict && ext_id
|
106
144
|
pers
|
107
145
|
end
|
108
146
|
|
147
|
+
# @see Eco::API::Organization::People#person
|
109
148
|
def find(object, strict: false)
|
110
149
|
id = attr_value(object, "id")
|
111
150
|
external_id = attr_value(object, "external_id")
|
@@ -177,8 +216,8 @@ module Eco
|
|
177
216
|
# @!endgroup
|
178
217
|
|
179
218
|
# @!group Helper methods
|
180
|
-
def
|
181
|
-
Eco::API::Organization::
|
219
|
+
def similarity
|
220
|
+
Eco::API::Organization::PeopleSimilarity.new(self.to_a)
|
182
221
|
end
|
183
222
|
# @!endgroup
|
184
223
|
|
@@ -190,16 +229,46 @@ module Eco
|
|
190
229
|
|
191
230
|
private
|
192
231
|
|
232
|
+
def person_by_email(email, prevent_duplicates: true)
|
233
|
+
return nil unless email
|
234
|
+
|
235
|
+
candidates = @by_non_users_email[email] || []
|
236
|
+
email_users = @by_users_email[email] || []
|
237
|
+
|
238
|
+
if pers = email_users.first
|
239
|
+
return pers if candidates.empty?
|
240
|
+
candidates = [pers] + candidates
|
241
|
+
elsif candidates.length == 1
|
242
|
+
return candidates.first
|
243
|
+
end
|
244
|
+
|
245
|
+
if prevent_duplicates && !candidates.empty?
|
246
|
+
msg = "Multiple search results match the criteria."
|
247
|
+
raise MultipleSearchResults.new(msg, candidates: candidates, property: "email")
|
248
|
+
end
|
249
|
+
|
250
|
+
@by_external_id[email]&.first
|
251
|
+
end
|
252
|
+
|
193
253
|
def init_caches
|
194
254
|
return if @caches_init
|
195
255
|
@by_id = to_h
|
196
|
-
@by_external_id = to_h('external_id')
|
197
|
-
@by_users_email =
|
198
|
-
@by_non_users_email = non_users.to_h('email')
|
199
|
-
@by_email = to_h('email')
|
256
|
+
@by_external_id = no_nil_key(to_h('external_id'))
|
257
|
+
@by_users_email = no_nil_key(existing_users.to_h('email'))
|
258
|
+
@by_non_users_email = no_nil_key(non_users.to_h('email'))
|
259
|
+
@by_email = no_nil_key(to_h('email'))
|
200
260
|
@caches_init = true
|
201
261
|
end
|
202
262
|
|
263
|
+
def existing_users
|
264
|
+
newFrom users.select {|u| !u.account_added?(:original)}
|
265
|
+
end
|
266
|
+
|
267
|
+
def no_nil_key(hash)
|
268
|
+
hash.tap {|h| h.delete(nil)}
|
269
|
+
end
|
270
|
+
|
271
|
+
|
203
272
|
end
|
204
273
|
end
|
205
274
|
end
|
@@ -0,0 +1,272 @@
|
|
1
|
+
module Eco
|
2
|
+
module API
|
3
|
+
module Organization
|
4
|
+
|
5
|
+
# Class to find out duplicates in the People Manager
|
6
|
+
#
|
7
|
+
# @attr_writer attribute [String, Proc, nil] the target attribute to be read.
|
8
|
+
class PeopleSimilarity < Eco::API::Organization::People
|
9
|
+
include Eco::Data::FuzzyMatch
|
10
|
+
|
11
|
+
attr_accessor :attribute
|
12
|
+
|
13
|
+
# @!group Config
|
14
|
+
# @return [String, Proc, nil] the target attribute to be read.
|
15
|
+
def attribute=(attr)
|
16
|
+
@attribute = attr
|
17
|
+
end
|
18
|
+
|
19
|
+
def attribute
|
20
|
+
@attribute ||= :name
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns the target value to analyse
|
24
|
+
# @param person [Ecoportal::API::V1::Person]
|
25
|
+
def item_value(person)
|
26
|
+
return attr.call(item) if attribute.is_a?(Proc)
|
27
|
+
attr = attribute.to_sym
|
28
|
+
return item.send(attr) if item.respond_to?(attr)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Define the order or relevant of per user matches
|
32
|
+
# @param values[Array<Symbol>] the algorithms' results it should be ordered by
|
33
|
+
# * Possible values: `:dice`, `:levenshtein`, `:jaro_winkler`, `:ngrams`, `:words_ngrams`, `:chars_position`
|
34
|
+
def order=(values)
|
35
|
+
@order = values
|
36
|
+
end
|
37
|
+
|
38
|
+
def order
|
39
|
+
@order ||= [:words_ngrams, :dice]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Define the order or relevant of per user matches
|
43
|
+
# @param value [Float] the threshold that all of the algorithms should comply with
|
44
|
+
def threshold=(value)
|
45
|
+
@threshold = value
|
46
|
+
end
|
47
|
+
|
48
|
+
def threshold
|
49
|
+
@threshold ||= 0.15
|
50
|
+
end
|
51
|
+
|
52
|
+
# Generates a new object with same config but different base `data`.
|
53
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
54
|
+
def newFrom(data)
|
55
|
+
super(data).tap do |simil|
|
56
|
+
simil.threshold = threshold
|
57
|
+
simil.order = order
|
58
|
+
simil.attribute = attribute
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# @!endgroup
|
63
|
+
|
64
|
+
# @!group Searchers
|
65
|
+
|
66
|
+
# It gathers those that have the same `email`
|
67
|
+
# @return [Hash] where `keys` are `email`s and `values` an `Array<Person>`
|
68
|
+
def repeated_emails
|
69
|
+
init_caches
|
70
|
+
@by_email.select do |email, people|
|
71
|
+
people.count > 1
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# It returns all people with no name
|
76
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
77
|
+
def unnamed
|
78
|
+
select do |person|
|
79
|
+
person.name.to_s.strip.length < 2
|
80
|
+
end.yield_self do |results|
|
81
|
+
newFrom(results)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# It returns all people with no name
|
86
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
87
|
+
def named
|
88
|
+
reject do |person|
|
89
|
+
person.name.to_s.strip.length < 2
|
90
|
+
end.yield_self do |results|
|
91
|
+
newFrom(results)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# It returns all the entries with `attribute` empty
|
96
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
97
|
+
def blank_attribute
|
98
|
+
select do |person|
|
99
|
+
item_value(person).to_s.strip.length < 2
|
100
|
+
end.yield_self do |results|
|
101
|
+
newFrom(results)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# It returns all the entries with `attribute` **n0t** empty
|
106
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
107
|
+
def attribute_present
|
108
|
+
reject do |person|
|
109
|
+
item_value(person).to_s.strip.length < 2
|
110
|
+
end.yield_self do |results|
|
111
|
+
newFrom(results)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# @!endgroup
|
116
|
+
|
117
|
+
# @!group Analisys starters
|
118
|
+
|
119
|
+
# Analyses People bases on `options`
|
120
|
+
# @param needle_read [Proc, Symbol] when the value to read from `needle` object is different to the `:read` (`attribute`).
|
121
|
+
# This allows to for example, facet `needle.name` (needle_read) against `haystack_item.details[alt_id]` (read).
|
122
|
+
# @param keep_empty [Boolean] to indicate if it should get rid of people with no results (based on threshold)
|
123
|
+
# @return [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
|
124
|
+
def analyse(needle_read: nil, keep_empty: false, **options)
|
125
|
+
options = { read: self.attribute }.merge(options)
|
126
|
+
total = count; i = 1
|
127
|
+
each_with_object({}) do |person, results|
|
128
|
+
needle_str = needle_read ? item_string(person, needle_read) : nil
|
129
|
+
results[person.id] = find_all_with_score(person, needle_str: needle_str, **options)
|
130
|
+
print_progress("Analysed", total, i)
|
131
|
+
i += 1
|
132
|
+
end.yield_self do |analysed|
|
133
|
+
analysed = clean_empty(analysed) unless keep_empty
|
134
|
+
#puts "... #{analysed.count} results after cleaning empty"
|
135
|
+
analysed
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# @!endgroup
|
140
|
+
|
141
|
+
# @!group Results Treatment
|
142
|
+
|
143
|
+
# Gets a new instance object of this class, with only people in results
|
144
|
+
# @param analysed [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
|
145
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
146
|
+
def newSimilarity(analysed)
|
147
|
+
newFrom(people_in_results(analysed))
|
148
|
+
end
|
149
|
+
|
150
|
+
def people_in_results(analysed)
|
151
|
+
analysed.each_with_object([]) do |(id, results), people|
|
152
|
+
related = results.each_with_object([self[id]]) do |result, related|
|
153
|
+
related << result.match
|
154
|
+
end
|
155
|
+
related.each {|person| people << person unless people.include?(person)}
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
# Removes from results those that do not have similar entries
|
160
|
+
def clean_empty(analysed)
|
161
|
+
analysed.select do |id, results|
|
162
|
+
!results.empty?
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
# Helper to do some treatment fo the results
|
167
|
+
# @param analysed [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
|
168
|
+
# @return [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
|
169
|
+
def with_analysed(analysed, keep_empty: false)
|
170
|
+
analysed.each_with_object({}) do |(id, results), reanalysed|
|
171
|
+
reanalysed[id] = yield(self[id], results)
|
172
|
+
end.yield_self do |reanalysed|
|
173
|
+
reanalysed = clean_empty(reanalysed) unless keep_empty
|
174
|
+
reanalysed
|
175
|
+
end.tap {|out| "with_analysed... returns #{out.count} records"}
|
176
|
+
end
|
177
|
+
|
178
|
+
# Launches a reanalyis on `analysed` based on `options`
|
179
|
+
# @param analysed [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
|
180
|
+
def rearrange(analysed, **options)
|
181
|
+
with_analysed(analysed) do |person, results|
|
182
|
+
results.relevant_results(**options)
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
# Reanalyses by using a block to treat the needle and item values
|
187
|
+
def reanalyse(analysed, msg: "Reanalysing", **options, &block)
|
188
|
+
options = { read: self.attribute }.merge(options)
|
189
|
+
total = analysed.count; i = 1
|
190
|
+
with_analysed(analysed) do |person, results|
|
191
|
+
print_progress(msg, total, i)
|
192
|
+
i += 1
|
193
|
+
recalculate_results(results, &block)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# Renalyses by ignoring matching words between the `needle` and those found in `results`
|
198
|
+
def ignore_matching_words(analysed, **options)
|
199
|
+
prompt = "Reanalysing by ignoring matching words"
|
200
|
+
reanalyse(analysed, msg: prompt, **options) do |needle_str, item_str, needle, item|
|
201
|
+
self.class.remove_matching_words(needle_str, item_str)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# Renalyses by ignoring matching words between the `needle` and those found in `results`
|
206
|
+
def ignore_matching_words_old(analysed, **options)
|
207
|
+
options = { read: self.attribute }.merge(options)
|
208
|
+
total = analysed.count; i = 1
|
209
|
+
with_analysed(analysed) do |person, results|
|
210
|
+
print_progress("Reanalysing by ignoring matching words", total, i)
|
211
|
+
i += 1
|
212
|
+
ignore_same_words_score(results, **options)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
# @!endgroup
|
217
|
+
|
218
|
+
# @!group Reporting Helpers
|
219
|
+
|
220
|
+
# @return [String] well structured text
|
221
|
+
def report(analysed, format: :txt)
|
222
|
+
case
|
223
|
+
when format == :txt
|
224
|
+
analysed.each_with_object("") do |(id, results), out|
|
225
|
+
msg = results.results.map {|r| r.print}.join("\n ")
|
226
|
+
out << "#{self[id].identify}:\n " + msg + "\n"
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
# @note
|
232
|
+
# 1. Unless `:analysed` is provided, it launches an analysis cutting with Jaro Winker min 0.5
|
233
|
+
# 2. It then re-sorts and cuts based on `options`
|
234
|
+
# @return [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
|
235
|
+
def print_analysis(**options)
|
236
|
+
analysed = options[:analysed] || results_with_false_positives.analyse(**options)
|
237
|
+
analysed.each_with_object({}) do |(id, results), out|
|
238
|
+
puts report(analysed)
|
239
|
+
end
|
240
|
+
end
|
241
|
+
# @!endgroup
|
242
|
+
|
243
|
+
protected
|
244
|
+
|
245
|
+
def on_change
|
246
|
+
remove_instance_variable(@fuzzy_match)
|
247
|
+
super
|
248
|
+
end
|
249
|
+
|
250
|
+
private
|
251
|
+
|
252
|
+
def print_progress(msg, total, num)
|
253
|
+
return unless total > 10
|
254
|
+
puts "" unless num > 1
|
255
|
+
@print_msg_len ||= 0
|
256
|
+
percent = (100 * num.to_f / total).round(1)
|
257
|
+
msg = " #{msg}: #{percent}% (#{num} of #{total})\r"
|
258
|
+
@print_msg_len = msg.length unless @print_msg_len > msg.length
|
259
|
+
print msg
|
260
|
+
$stdout.flush
|
261
|
+
if percent > 99.9
|
262
|
+
sleep(0.2)
|
263
|
+
print "#{" " * @print_msg_len}\r"
|
264
|
+
$stdout.flush
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|