eco-helpers 2.0.16 → 2.0.22
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +125 -6
- data/eco-helpers.gemspec +10 -5
- data/lib/eco-helpers.rb +2 -0
- data/lib/eco/api/common/base_loader.rb +18 -0
- data/lib/eco/api/common/loaders/parser.rb +1 -0
- data/lib/eco/api/common/people/default_parsers.rb +1 -0
- data/lib/eco/api/common/people/default_parsers/date_parser.rb +11 -1
- data/lib/eco/api/common/people/default_parsers/login_providers_parser.rb +1 -1
- data/lib/eco/api/common/people/default_parsers/policy_groups_parser.rb +11 -11
- data/lib/eco/api/common/people/default_parsers/xls_parser.rb +53 -0
- data/lib/eco/api/common/people/entries.rb +1 -0
- data/lib/eco/api/common/people/entry_factory.rb +88 -23
- data/lib/eco/api/common/people/person_entry.rb +5 -2
- data/lib/eco/api/common/people/person_parser.rb +1 -1
- data/lib/eco/api/common/session.rb +1 -0
- data/lib/eco/api/common/session/base_session.rb +2 -0
- data/lib/eco/api/common/session/helpers.rb +30 -0
- data/lib/eco/api/common/session/helpers/prompt_user.rb +34 -0
- data/lib/eco/api/common/session/mailer.rb +0 -1
- data/lib/eco/api/common/session/s3_uploader.rb +0 -1
- data/lib/eco/api/common/session/sftp.rb +0 -1
- data/lib/eco/api/common/version_patches/ecoportal_api/external_person.rb +1 -1
- data/lib/eco/api/common/version_patches/ecoportal_api/internal_person.rb +7 -4
- data/lib/eco/api/common/version_patches/exception.rb +11 -4
- data/lib/eco/api/microcases.rb +3 -1
- data/lib/eco/api/microcases/append_usergroups.rb +0 -1
- data/lib/eco/api/microcases/people_cache.rb +2 -2
- data/lib/eco/api/microcases/people_load.rb +2 -2
- data/lib/eco/api/microcases/people_refresh.rb +2 -2
- data/lib/eco/api/microcases/people_search.rb +6 -6
- data/lib/eco/api/microcases/preserve_default_tag.rb +23 -0
- data/lib/eco/api/microcases/preserve_filter_tags.rb +28 -0
- data/lib/eco/api/microcases/preserve_policy_groups.rb +30 -0
- data/lib/eco/api/microcases/set_account.rb +0 -1
- data/lib/eco/api/microcases/with_each.rb +67 -6
- data/lib/eco/api/microcases/with_each_present.rb +4 -2
- data/lib/eco/api/microcases/with_each_starter.rb +4 -2
- data/lib/eco/api/organization.rb +1 -0
- data/lib/eco/api/organization/people.rb +98 -22
- data/lib/eco/api/organization/people_similarity.rb +272 -0
- data/lib/eco/api/organization/person_schemas.rb +5 -1
- data/lib/eco/api/organization/policy_groups.rb +5 -1
- data/lib/eco/api/organization/presets_factory.rb +22 -83
- data/lib/eco/api/organization/presets_integrity.json +6 -0
- data/lib/eco/api/organization/presets_values.json +5 -4
- data/lib/eco/api/organization/tag_tree.rb +33 -0
- data/lib/eco/api/policies/default_policies/99_user_access_policy.rb +0 -30
- data/lib/eco/api/session.rb +20 -28
- data/lib/eco/api/session/batch.rb +25 -7
- data/lib/eco/api/session/config.rb +0 -10
- data/lib/eco/api/session/config/apis.rb +80 -14
- data/lib/eco/api/session/config/people.rb +1 -17
- data/lib/eco/api/usecases.rb +2 -2
- data/lib/eco/api/usecases/base_case.rb +2 -2
- data/lib/eco/api/usecases/base_io.rb +17 -4
- data/lib/eco/api/usecases/default_cases.rb +2 -1
- data/lib/eco/api/usecases/default_cases/abstract_policygroup_abilities_case.rb +4 -4
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +223 -0
- data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
- data/lib/eco/api/usecases/default_cases/codes_to_tags_case.rb +2 -3
- data/lib/eco/api/usecases/default_cases/reset_landing_page_case.rb +11 -1
- data/lib/eco/api/usecases/default_cases/restore_db_case.rb +1 -2
- data/lib/eco/api/usecases/default_cases/supers_cyclic_identify_case.rb +1 -1
- data/lib/eco/api/usecases/default_cases/supers_hierarchy_case.rb +1 -1
- data/lib/eco/api/usecases/default_cases/to_csv_case.rb +132 -29
- data/lib/eco/api/usecases/default_cases/to_csv_detailed_case.rb +61 -36
- data/lib/eco/api/usecases/ooze_samples/ooze_update_case.rb +3 -2
- data/lib/eco/cli/config/default/input.rb +61 -8
- data/lib/eco/cli/config/default/options.rb +48 -17
- data/lib/eco/cli/config/default/people.rb +18 -24
- data/lib/eco/cli/config/default/people_filters.rb +3 -3
- data/lib/eco/cli/config/default/usecases.rb +97 -32
- data/lib/eco/cli/config/default/workflow.rb +22 -13
- data/lib/eco/cli/config/help.rb +1 -0
- data/lib/eco/cli/config/options_set.rb +106 -13
- data/lib/eco/cli/config/use_cases.rb +33 -33
- data/lib/eco/cli/scripting/args_helpers.rb +32 -5
- data/lib/eco/csv.rb +4 -2
- data/lib/eco/csv/table.rb +121 -21
- data/lib/eco/data.rb +1 -0
- data/lib/eco/data/crypto/encryption.rb +3 -3
- data/lib/eco/data/files/helpers.rb +6 -4
- data/lib/eco/data/fuzzy_match.rb +201 -0
- data/lib/eco/data/fuzzy_match/array_helpers.rb +75 -0
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +38 -0
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +82 -0
- data/lib/eco/data/fuzzy_match/pairing.rb +95 -0
- data/lib/eco/data/fuzzy_match/result.rb +87 -0
- data/lib/eco/data/fuzzy_match/results.rb +77 -0
- data/lib/eco/data/fuzzy_match/score.rb +49 -0
- data/lib/eco/data/fuzzy_match/stop_words.rb +35 -0
- data/lib/eco/data/fuzzy_match/string_helpers.rb +82 -0
- data/lib/eco/version.rb +1 -1
- metadata +147 -11
- data/lib/eco/api/microcases/refresh_abilities.rb +0 -19
- data/lib/eco/api/organization/presets_reference.json +0 -59
- data/lib/eco/api/usecases/default_cases/refresh_abilities_case.rb +0 -30
@@ -15,8 +15,10 @@ module Eco
|
|
15
15
|
def with_each_present(entries, people, options, log_starter: false)
|
16
16
|
found = []
|
17
17
|
micro.with_each(entries, people, options) do |entry, person|
|
18
|
-
if person.new?
|
19
|
-
|
18
|
+
if person.new?
|
19
|
+
if log_starter
|
20
|
+
session.logger.error("This person does not exist: #{entry.to_s(:identify)}")
|
21
|
+
end
|
20
22
|
next
|
21
23
|
end
|
22
24
|
found << person
|
@@ -15,8 +15,10 @@ module Eco
|
|
15
15
|
def with_each_starter(entries, people, options, log_present: false)
|
16
16
|
starters = []
|
17
17
|
micro.with_each(entries, people, options) do |entry, person|
|
18
|
-
if !person.new?
|
19
|
-
|
18
|
+
if !person.new?
|
19
|
+
if log_present
|
20
|
+
session.logger.error("This person (id: '#{person.id}') already exists: #{entry.to_s(:identify)}")
|
21
|
+
end
|
20
22
|
next
|
21
23
|
end
|
22
24
|
starters << person
|
data/lib/eco/api/organization.rb
CHANGED
@@ -9,6 +9,7 @@ require_relative 'organization/tag_tree'
|
|
9
9
|
require_relative 'organization/presets_factory'
|
10
10
|
require_relative 'organization/preferences'
|
11
11
|
require_relative 'organization/people'
|
12
|
+
require_relative 'organization/people_similarity'
|
12
13
|
require_relative 'organization/person_schemas'
|
13
14
|
require_relative 'organization/policy_groups'
|
14
15
|
require_relative 'organization/login_providers'
|
@@ -2,6 +2,43 @@ module Eco
|
|
2
2
|
module API
|
3
3
|
module Organization
|
4
4
|
class People < Eco::Language::Models::Collection
|
5
|
+
# Error class that allows to handle cases where multiple people were found for the same criterion.
|
6
|
+
# @note its main purpose to prevent the creation of duplicates or override information between different people.
|
7
|
+
class MultipleSearchResults < StandardError
|
8
|
+
attr_reader :candidates, :property
|
9
|
+
# @param msg [String] the basic message error.
|
10
|
+
# @param candiates [Array<Person>] the people that match the same search criterion.
|
11
|
+
# @param property [String] the property of the person model that triggered the error (base of the search criterion).
|
12
|
+
def initialize(msg, candidates: [], property: "email")
|
13
|
+
@candidates = candidates
|
14
|
+
@property = property
|
15
|
+
super(msg + " " + candidates_summary)
|
16
|
+
end
|
17
|
+
|
18
|
+
# @param with_index [Boolean] to add an index to each candidate description.
|
19
|
+
# @return [Array<String>] the `candidates` identified
|
20
|
+
def identify_candidates(with_index: false)
|
21
|
+
candidates.map.each_with_index do |person, i|
|
22
|
+
index = with_index ? "#{i}. " : ""
|
23
|
+
msg = person.account ? (person.account_added? ? "(new user)" : "(user)") : "(no account)"
|
24
|
+
"#{index}#{msg} #{person.identify}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# @return [Person] the `candidate` in the `index` position
|
29
|
+
def candidate(index)
|
30
|
+
candidates[index]
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def candidates_summary
|
36
|
+
lines = ["The following people have the same '#{property}':"]
|
37
|
+
lines.concat(identify_candidates(with_index: true)).join("\n ")
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
5
42
|
# build the shortcuts of Collection
|
6
43
|
attr_presence :account, :details
|
7
44
|
attr_collection :id, :external_id, :email, :name, :supervisor_id
|
@@ -78,34 +115,36 @@ module Eco
|
|
78
115
|
# @!group Searchers
|
79
116
|
|
80
117
|
# It searches a person using the parameters given.
|
118
|
+
# @note This is how the search function actually works:
|
119
|
+
# 1. if eP `id` is given, returns the person (if found), otherwise...
|
120
|
+
# 2. if `external_id` is given, returns the person (if found), otherwise...
|
121
|
+
# 3. if `strict` is `false` and `email` is given:
|
122
|
+
# - if there is only 1 person with that email, returns that person, otherwise...
|
123
|
+
# - if found but, there are many candidates, it raises MultipleSearchResults error
|
124
|
+
# - if person `external_id` matches `email`, returns that person
|
125
|
+
# @raise MultipleSearchResults if there are multiple people with the same `email`
|
126
|
+
# and there's no other criteria to find the person. It only gets to this point if
|
127
|
+
# `external_id` was **not** provided and we are **not** in 'strict' search mode.
|
128
|
+
# However, it could be we were in `strict` mode and `external_id` was not provided.
|
81
129
|
# @param id [String] the `internal id` of the person
|
82
130
|
# @param external_id [String] the `exernal_id` of the person
|
83
131
|
# @param email [String] the `email` of the person
|
84
|
-
# @param strict [Boolean] if should perform a
|
132
|
+
# @param strict [Boolean] if should perform a `:soft` or a `:strict` search. `strict` will avoid repeated email addresses.
|
85
133
|
# @return [Person, nil] the person we were searching, or `nil` if not found.
|
86
134
|
def person(id: nil, external_id: nil, email: nil, strict: false)
|
87
135
|
init_caches
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
if !pers && !strict && !email.to_s.strip.empty?
|
98
|
-
candidates = @by_non_users_email[email&.downcase.strip] || []
|
99
|
-
raise "Too many non-user candidates (#{candidates.length}) with email '#{email}'" if candidates.length > 1
|
100
|
-
pers = candidates.first
|
101
|
-
end
|
102
|
-
|
103
|
-
pers = @by_external_id[email&.downcase.strip]&.first if !pers && !email.to_s.strip.empty?
|
104
|
-
end
|
105
|
-
|
136
|
+
# normalize values
|
137
|
+
ext_id = !external_id.to_s.strip.empty? && external_id.strip
|
138
|
+
email = !email.to_s.strip.empty? && email.downcase.strip
|
139
|
+
|
140
|
+
pers = nil
|
141
|
+
pers ||= @by_id[id]&.first
|
142
|
+
pers ||= @by_external_id[ext_id]&.first
|
143
|
+
pers ||= person_by_email(email) unless strict && ext_id
|
106
144
|
pers
|
107
145
|
end
|
108
146
|
|
147
|
+
# @see Eco::API::Organization::People#person
|
109
148
|
def find(object, strict: false)
|
110
149
|
id = attr_value(object, "id")
|
111
150
|
external_id = attr_value(object, "external_id")
|
@@ -176,6 +215,12 @@ module Eco
|
|
176
215
|
end
|
177
216
|
# @!endgroup
|
178
217
|
|
218
|
+
# @!group Helper methods
|
219
|
+
def similarity
|
220
|
+
Eco::API::Organization::PeopleSimilarity.new(self.to_a)
|
221
|
+
end
|
222
|
+
# @!endgroup
|
223
|
+
|
179
224
|
protected
|
180
225
|
|
181
226
|
def on_change
|
@@ -184,15 +229,46 @@ module Eco
|
|
184
229
|
|
185
230
|
private
|
186
231
|
|
232
|
+
def person_by_email(email, prevent_duplicates: true)
|
233
|
+
return nil unless email
|
234
|
+
|
235
|
+
candidates = @by_non_users_email[email] || []
|
236
|
+
email_users = @by_users_email[email] || []
|
237
|
+
|
238
|
+
if pers = email_users.first
|
239
|
+
return pers if candidates.empty?
|
240
|
+
candidates = [pers] + candidates
|
241
|
+
elsif candidates.length == 1
|
242
|
+
return candidates.first
|
243
|
+
end
|
244
|
+
|
245
|
+
if prevent_duplicates && !candidates.empty?
|
246
|
+
msg = "Multiple search results match the criteria."
|
247
|
+
raise MultipleSearchResults.new(msg, candidates: candidates, property: "email")
|
248
|
+
end
|
249
|
+
|
250
|
+
@by_external_id[email]&.first
|
251
|
+
end
|
252
|
+
|
187
253
|
def init_caches
|
188
254
|
return if @caches_init
|
189
255
|
@by_id = to_h
|
190
|
-
@by_external_id = to_h('external_id')
|
191
|
-
@by_users_email =
|
192
|
-
@by_non_users_email = non_users.to_h('email')
|
256
|
+
@by_external_id = no_nil_key(to_h('external_id'))
|
257
|
+
@by_users_email = no_nil_key(existing_users.to_h('email'))
|
258
|
+
@by_non_users_email = no_nil_key(non_users.to_h('email'))
|
259
|
+
@by_email = no_nil_key(to_h('email'))
|
193
260
|
@caches_init = true
|
194
261
|
end
|
195
262
|
|
263
|
+
def existing_users
|
264
|
+
newFrom users.select {|u| !u.account_added?(:original)}
|
265
|
+
end
|
266
|
+
|
267
|
+
def no_nil_key(hash)
|
268
|
+
hash.tap {|h| h.delete(nil)}
|
269
|
+
end
|
270
|
+
|
271
|
+
|
196
272
|
end
|
197
273
|
end
|
198
274
|
end
|
@@ -0,0 +1,272 @@
|
|
1
|
+
module Eco
|
2
|
+
module API
|
3
|
+
module Organization
|
4
|
+
|
5
|
+
# Class to find out duplicates in the People Manager
|
6
|
+
#
|
7
|
+
# @attr_writer attribute [String, Proc, nil] the target attribute to be read.
|
8
|
+
class PeopleSimilarity < Eco::API::Organization::People
|
9
|
+
include Eco::Data::FuzzyMatch
|
10
|
+
|
11
|
+
attr_accessor :attribute
|
12
|
+
|
13
|
+
# @!group Config
|
14
|
+
# @return [String, Proc, nil] the target attribute to be read.
|
15
|
+
def attribute=(attr)
|
16
|
+
@attribute = attr
|
17
|
+
end
|
18
|
+
|
19
|
+
def attribute
|
20
|
+
@attribute ||= :name
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns the target value to analyse
|
24
|
+
# @param person [Ecoportal::API::V1::Person]
|
25
|
+
def item_value(person)
|
26
|
+
return attr.call(item) if attribute.is_a?(Proc)
|
27
|
+
attr = attribute.to_sym
|
28
|
+
return item.send(attr) if item.respond_to?(attr)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Define the order or relevant of per user matches
|
32
|
+
# @param values[Array<Symbol>] the algorithms' results it should be ordered by
|
33
|
+
# * Possible values: `:dice`, `:levenshtein`, `:jaro_winkler`, `:ngrams`, `:words_ngrams`, `:chars_position`
|
34
|
+
def order=(values)
|
35
|
+
@order = values
|
36
|
+
end
|
37
|
+
|
38
|
+
def order
|
39
|
+
@order ||= [:words_ngrams, :dice]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Define the order or relevant of per user matches
|
43
|
+
# @param value [Float] the threshold that all of the algorithms should comply with
|
44
|
+
def threshold=(value)
|
45
|
+
@threshold = value
|
46
|
+
end
|
47
|
+
|
48
|
+
def threshold
|
49
|
+
@threshold ||= 0.15
|
50
|
+
end
|
51
|
+
|
52
|
+
# Generates a new object with same config but different base `data`.
|
53
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
54
|
+
def newFrom(data)
|
55
|
+
super(data).tap do |simil|
|
56
|
+
simil.threshold = threshold
|
57
|
+
simil.order = order
|
58
|
+
simil.attribute = attribute
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# @!endgroup
|
63
|
+
|
64
|
+
# @!group Searchers
|
65
|
+
|
66
|
+
# It gathers those that have the same `email`
|
67
|
+
# @return [Hash] where `keys` are `email`s and `values` an `Array<Person>`
|
68
|
+
def repeated_emails
|
69
|
+
init_caches
|
70
|
+
@by_email.select do |email, people|
|
71
|
+
people.count > 1
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# It returns all people with no name
|
76
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
77
|
+
def unnamed
|
78
|
+
select do |person|
|
79
|
+
person.name.to_s.strip.length < 2
|
80
|
+
end.yield_self do |results|
|
81
|
+
newFrom(results)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# It returns all people with no name
|
86
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
87
|
+
def named
|
88
|
+
reject do |person|
|
89
|
+
person.name.to_s.strip.length < 2
|
90
|
+
end.yield_self do |results|
|
91
|
+
newFrom(results)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# It returns all the entries with `attribute` empty
|
96
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
97
|
+
def blank_attribute
|
98
|
+
select do |person|
|
99
|
+
item_value(person).to_s.strip.length < 2
|
100
|
+
end.yield_self do |results|
|
101
|
+
newFrom(results)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# It returns all the entries with `attribute` **n0t** empty
|
106
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
107
|
+
def attribute_present
|
108
|
+
reject do |person|
|
109
|
+
item_value(person).to_s.strip.length < 2
|
110
|
+
end.yield_self do |results|
|
111
|
+
newFrom(results)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# @!endgroup
|
116
|
+
|
117
|
+
# @!group Analisys starters
|
118
|
+
|
119
|
+
# Analyses People bases on `options`
|
120
|
+
# @param needle_read [Proc, Symbol] when the value to read from `needle` object is different to the `:read` (`attribute`).
|
121
|
+
# This allows to for example, facet `needle.name` (needle_read) against `haystack_item.details[alt_id]` (read).
|
122
|
+
# @param keep_empty [Boolean] to indicate if it should get rid of people with no results (based on threshold)
|
123
|
+
# @return [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
|
124
|
+
def analyse(needle_read: nil, keep_empty: false, **options)
|
125
|
+
options = { read: self.attribute }.merge(options)
|
126
|
+
total = count; i = 1
|
127
|
+
each_with_object({}) do |person, results|
|
128
|
+
needle_str = needle_read ? item_string(person, needle_read) : nil
|
129
|
+
results[person.id] = find_all_with_score(person, needle_str: needle_str, **options)
|
130
|
+
print_progress("Analysed", total, i)
|
131
|
+
i += 1
|
132
|
+
end.yield_self do |analysed|
|
133
|
+
analysed = clean_empty(analysed) unless keep_empty
|
134
|
+
#puts "... #{analysed.count} results after cleaning empty"
|
135
|
+
analysed
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# @!endgroup
|
140
|
+
|
141
|
+
# @!group Results Treatment
|
142
|
+
|
143
|
+
# Gets a new instance object of this class, with only people in results
|
144
|
+
# @param analysed [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
|
145
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
146
|
+
def newSimilarity(analysed)
|
147
|
+
newFrom(people_in_results(analysed))
|
148
|
+
end
|
149
|
+
|
150
|
+
def people_in_results(analysed)
|
151
|
+
analysed.each_with_object([]) do |(id, results), people|
|
152
|
+
related = results.each_with_object([self[id]]) do |result, related|
|
153
|
+
related << result.match
|
154
|
+
end
|
155
|
+
related.each {|person| people << person unless people.include?(person)}
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
# Removes from results those that do not have similar entries
|
160
|
+
def clean_empty(analysed)
|
161
|
+
analysed.select do |id, results|
|
162
|
+
!results.empty?
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
# Helper to do some treatment fo the results
|
167
|
+
# @param analysed [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
|
168
|
+
# @return [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
|
169
|
+
def with_analysed(analysed, keep_empty: false)
|
170
|
+
analysed.each_with_object({}) do |(id, results), reanalysed|
|
171
|
+
reanalysed[id] = yield(self[id], results)
|
172
|
+
end.yield_self do |reanalysed|
|
173
|
+
reanalysed = clean_empty(reanalysed) unless keep_empty
|
174
|
+
reanalysed
|
175
|
+
end.tap {|out| "with_analysed... returns #{out.count} records"}
|
176
|
+
end
|
177
|
+
|
178
|
+
# Launches a reanalyis on `analysed` based on `options`
|
179
|
+
# @param analysed [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
|
180
|
+
def rearrange(analysed, **options)
|
181
|
+
with_analysed(analysed) do |person, results|
|
182
|
+
results.relevant_results(**options)
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
# Reanalyses by using a block to treat the needle and item values
|
187
|
+
def reanalyse(analysed, msg: "Reanalysing", **options, &block)
|
188
|
+
options = { read: self.attribute }.merge(options)
|
189
|
+
total = analysed.count; i = 1
|
190
|
+
with_analysed(analysed) do |person, results|
|
191
|
+
print_progress(msg, total, i)
|
192
|
+
i += 1
|
193
|
+
recalculate_results(results, &block)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# Renalyses by ignoring matching words between the `needle` and those found in `results`
|
198
|
+
def ignore_matching_words(analysed, **options)
|
199
|
+
prompt = "Reanalysing by ignoring matching words"
|
200
|
+
reanalyse(analysed, msg: prompt, **options) do |needle_str, item_str, needle, item|
|
201
|
+
self.class.remove_matching_words(needle_str, item_str)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# Renalyses by ignoring matching words between the `needle` and those found in `results`
|
206
|
+
def ignore_matching_words_old(analysed, **options)
|
207
|
+
options = { read: self.attribute }.merge(options)
|
208
|
+
total = analysed.count; i = 1
|
209
|
+
with_analysed(analysed) do |person, results|
|
210
|
+
print_progress("Reanalysing by ignoring matching words", total, i)
|
211
|
+
i += 1
|
212
|
+
ignore_same_words_score(results, **options)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
# @!endgroup
|
217
|
+
|
218
|
+
# @!group Reporting Helpers
|
219
|
+
|
220
|
+
# @return [String] well structured text
|
221
|
+
def report(analysed, format: :txt)
|
222
|
+
case
|
223
|
+
when format == :txt
|
224
|
+
analysed.each_with_object("") do |(id, results), out|
|
225
|
+
msg = results.results.map {|r| r.print}.join("\n ")
|
226
|
+
out << "#{self[id].identify}:\n " + msg + "\n"
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
# @note
|
232
|
+
# 1. Unless `:analysed` is provided, it launches an analysis cutting with Jaro Winker min 0.5
|
233
|
+
# 2. It then re-sorts and cuts based on `options`
|
234
|
+
# @return [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
|
235
|
+
def print_analysis(**options)
|
236
|
+
analysed = options[:analysed] || results_with_false_positives.analyse(**options)
|
237
|
+
analysed.each_with_object({}) do |(id, results), out|
|
238
|
+
puts report(analysed)
|
239
|
+
end
|
240
|
+
end
|
241
|
+
# @!endgroup
|
242
|
+
|
243
|
+
protected
|
244
|
+
|
245
|
+
def on_change
|
246
|
+
remove_instance_variable(@fuzzy_match)
|
247
|
+
super
|
248
|
+
end
|
249
|
+
|
250
|
+
private
|
251
|
+
|
252
|
+
def print_progress(msg, total, num)
|
253
|
+
return unless total > 10
|
254
|
+
puts "" unless num > 1
|
255
|
+
@print_msg_len ||= 0
|
256
|
+
percent = (100 * num.to_f / total).round(1)
|
257
|
+
msg = " #{msg}: #{percent}% (#{num} of #{total})\r"
|
258
|
+
@print_msg_len = msg.length unless @print_msg_len > msg.length
|
259
|
+
print msg
|
260
|
+
$stdout.flush
|
261
|
+
if percent > 99.9
|
262
|
+
sleep(0.2)
|
263
|
+
print "#{" " * @print_msg_len}\r"
|
264
|
+
$stdout.flush
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|