eco-helpers 2.0.17 → 2.0.23

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +85 -1
  3. data/eco-helpers.gemspec +4 -1
  4. data/lib/eco-helpers.rb +1 -0
  5. data/lib/eco/api/common/base_loader.rb +9 -5
  6. data/lib/eco/api/common/loaders/parser.rb +1 -0
  7. data/lib/eco/api/common/people/default_parsers.rb +1 -0
  8. data/lib/eco/api/common/people/default_parsers/xls_parser.rb +53 -0
  9. data/lib/eco/api/common/people/entries.rb +1 -0
  10. data/lib/eco/api/common/people/entry_factory.rb +88 -23
  11. data/lib/eco/api/common/people/person_entry.rb +1 -0
  12. data/lib/eco/api/common/people/person_parser.rb +1 -1
  13. data/lib/eco/api/common/session.rb +1 -0
  14. data/lib/eco/api/common/session/base_session.rb +2 -0
  15. data/lib/eco/api/common/session/helpers.rb +30 -0
  16. data/lib/eco/api/common/session/helpers/prompt_user.rb +34 -0
  17. data/lib/eco/api/common/version_patches/ecoportal_api/external_person.rb +1 -1
  18. data/lib/eco/api/common/version_patches/ecoportal_api/internal_person.rb +7 -4
  19. data/lib/eco/api/common/version_patches/exception.rb +11 -4
  20. data/lib/eco/api/microcases/with_each.rb +67 -6
  21. data/lib/eco/api/microcases/with_each_present.rb +4 -2
  22. data/lib/eco/api/microcases/with_each_starter.rb +4 -2
  23. data/lib/eco/api/organization.rb +1 -1
  24. data/lib/eco/api/organization/people.rb +94 -25
  25. data/lib/eco/api/organization/people_similarity.rb +272 -0
  26. data/lib/eco/api/organization/person_schemas.rb +5 -1
  27. data/lib/eco/api/organization/policy_groups.rb +5 -1
  28. data/lib/eco/api/organization/tag_tree.rb +33 -0
  29. data/lib/eco/api/session.rb +19 -8
  30. data/lib/eco/api/session/batch.rb +7 -5
  31. data/lib/eco/api/session/batch/job.rb +27 -8
  32. data/lib/eco/api/session/config/apis.rb +80 -14
  33. data/lib/eco/api/usecases.rb +2 -2
  34. data/lib/eco/api/usecases/base_case.rb +2 -2
  35. data/lib/eco/api/usecases/base_io.rb +17 -4
  36. data/lib/eco/api/usecases/default_cases.rb +1 -0
  37. data/lib/eco/api/usecases/default_cases/abstract_policygroup_abilities_case.rb +3 -3
  38. data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +179 -32
  39. data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
  40. data/lib/eco/api/usecases/default_cases/to_csv_case.rb +81 -36
  41. data/lib/eco/api/usecases/default_cases/to_csv_detailed_case.rb +3 -4
  42. data/lib/eco/api/usecases/ooze_samples/ooze_update_case.rb +3 -2
  43. data/lib/eco/cli/config/default/input.rb +61 -8
  44. data/lib/eco/cli/config/default/options.rb +36 -2
  45. data/lib/eco/cli/config/default/people.rb +18 -24
  46. data/lib/eco/cli/config/default/usecases.rb +33 -2
  47. data/lib/eco/cli/config/default/workflow.rb +21 -12
  48. data/lib/eco/cli/scripting/args_helpers.rb +2 -2
  49. data/lib/eco/csv.rb +4 -2
  50. data/lib/eco/csv/table.rb +121 -21
  51. data/lib/eco/data/fuzzy_match.rb +109 -27
  52. data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
  53. data/lib/eco/data/fuzzy_match/ngrams_score.rb +19 -10
  54. data/lib/eco/data/fuzzy_match/pairing.rb +12 -19
  55. data/lib/eco/data/fuzzy_match/result.rb +22 -2
  56. data/lib/eco/data/fuzzy_match/results.rb +30 -6
  57. data/lib/eco/data/fuzzy_match/score.rb +12 -7
  58. data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
  59. data/lib/eco/version.rb +1 -1
  60. metadata +67 -3
  61. data/lib/eco/api/organization/people_analytics.rb +0 -60
@@ -11,6 +11,8 @@ module Eco
11
11
  attr_reader :api, :file_manager, :logger
12
12
  alias_method :fm, :file_manager
13
13
 
14
+ include Session::Helpers
15
+
14
16
  def initialize(e)
15
17
  raise "Expected object Eco::API::Common::Session::Environment. Given: #{e.class}" unless e.is_a?(Environment)
16
18
  self.environment = e
@@ -0,0 +1,30 @@
1
+ require_relative 'helpers/prompt_user'
2
+
3
+ module Eco
4
+ module API
5
+ module Common
6
+ module Session
7
+ module Helpers
8
+
9
+ class << self
10
+ def included(base)
11
+ base.send(:include, InstanceMethods)
12
+ base.extend(ClassMethods)
13
+ end
14
+ end
15
+
16
+
17
+ module ClassMethods
18
+
19
+ end
20
+
21
+ module InstanceMethods
22
+ include Helpers::PromptUser
23
+
24
+ end
25
+
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,34 @@
1
+ require 'timeout'
2
+ module Eco
3
+ module API
4
+ module Common
5
+ module Session
6
+ module Helpers
7
+ module PromptUser
8
+
9
+ def prompt_user(question, default:, explanation: "", timeout: nil)
10
+ response = if config.run_mode_remote?
11
+ default
12
+ else
13
+ puts explanation
14
+ print "#{question} "
15
+ if timeout
16
+ begin
17
+ Timeout::timeout(timeout) { STDIN.gets.chop }
18
+ rescue Timeout::Error
19
+ default
20
+ end
21
+ else
22
+ STDIN.gets.chop
23
+ end
24
+ end
25
+ return response unless block_given?
26
+ yield(response)
27
+ end
28
+
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -18,7 +18,7 @@ module Ecoportal
18
18
  entry.to_s(:identify)
19
19
  else
20
20
  str_id = id ? "id: '#{id}'; " : ""
21
- "#{name}' (#{str_id}ext_id: '#{external_id}'; email: '#{email}')"
21
+ "'#{name}' (#{str_id}ext_id: '#{external_id}'; email: '#{email}')"
22
22
  end
23
23
  end
24
24
 
@@ -11,12 +11,15 @@ module Ecoportal
11
11
  original_doc["account"] = JSON.parse(doc["account"])
12
12
  end
13
13
 
14
- def new?
15
- !initial_doc["details"] && !initial_doc["account"]
14
+ def new?(doc = :initial)
15
+ ref_doc = (doc == :original) ? original_doc : initial_doc
16
+ !ref_doc["details"] && !ref_doc["account"]
16
17
  end
17
18
 
18
- def account_added?
19
- account && !initial_doc["account"]
19
+ # @return [Boolean] if the account has been added, compared to `doc`
20
+ def account_added?(doc = :initial)
21
+ ref_doc = (doc == :original) ? original_doc : initial_doc
22
+ account && !ref_doc["account"]
20
23
  end
21
24
 
22
25
  end
@@ -1,9 +1,16 @@
1
1
  class ::Exception
2
2
  def patch_full_message
3
- msg = []
4
- msg << "\n#{backtrace.first} \n#{message} (#{self.class.to_s})"
5
- backtrace[1..-1].each_with_index {|bt, i| msg << "#{" "*8}#{i+1}: from #{bt}"}
6
- msg.join("\n")
3
+ begin
4
+ msg = []
5
+ tracing = backtrace ? backtrace : []
6
+ tracing = (self.class == SystemStackError) ? tracing[1..30] : tracing[1..-1]
7
+ tracing ||= []
8
+ msg << "\n#{tracing.first} \n#{message} (#{self.class.to_s})"
9
+ tracing.each_with_index {|bt, i| msg << "#{" "*8}#{i+1}: from #{bt}"}
10
+ msg.join("\n")
11
+ rescue Exception => e
12
+ puts "Something is wrong with 'patch_full_message': #{e}"
13
+ end
7
14
  end
8
15
  end
9
16
 
@@ -12,16 +12,77 @@ module Eco
12
12
  # @yieldparam person [Ecoportal::API::V1::Person] the found person that matches `entry`, or a new person otherwise.
13
13
  # @return [Eco::API::Organization::People] all the people, including new and existing ones.
14
14
  def with_each(entries, people, options)
15
- entries.map do |entry|
16
- unless person = people.find(entry, strict: micro.strict_search?(options))
17
- person = session.new_person
15
+ @_skip_all_multiple_results = false
16
+ entries.each_with_object([]) do |entry, scoped|
17
+ begin
18
+ unless person = people.find(entry, strict: micro.strict_search?(options))
19
+ person = session.new_person
20
+ end
21
+ rescue Eco::API::Organization::People::MultipleSearchResults => e
22
+ unless @_skip_all_multiple_results
23
+ msg = "\n * When searching this Entry: #{entry.to_s(:identify)}"
24
+ person = _with_each_prompt_to_select_user(e.append_message(msg), entry: entry)
25
+ end
26
+ end
27
+
28
+ if person
29
+ person.entry = entry
30
+ yield(entry, person) if block_given?
31
+ scoped << person
18
32
  end
19
- person.entry = entry
20
- yield(entry, person) if block_given?
21
- person
22
33
  end.yield_self {|all_people| people.newFrom all_people.uniq}
23
34
  end
24
35
 
36
+ private
37
+
38
+ def _with_each_prompt_to_select_user(error, entry: nil, increase_count: true)
39
+ unless error.is_a?(Eco::API::Organization::People::MultipleSearchResults)
40
+ raise "Expecting Eco::API::Organization::People::MultipleSearchResults. Given: #{error.class}"
41
+ end
42
+ @_with_each_prompts = 0 unless instance_variable_defined?(:@_with_each_prompts)
43
+ @_with_each_prompts += 1 if increase_count
44
+
45
+ lines = []
46
+ lines << "\n(#{@_with_each_prompts}) " + error.to_s + "\n"
47
+ lines << " #index - Select the correct person by its number index among the list above."
48
+ lines << " (I) - Just Skip/Ignore this one. I will deal with that input entry in another launch."
49
+ lines << " (A) - Ignore all the rest of input entries with this problem."
50
+ lines << " (C) - Create a new person."
51
+ lines << " (B) - Just break this script. I need to change the input file :/"
52
+
53
+ prompt_user("Type one option (#number/I/A/C/B):", explanation: lines.join("\n"), default: "I") do |res|
54
+ res = res.upcase
55
+ case
56
+ when res.start_with?("I")
57
+ logger.info "Ignoring entry... #{entry.to_s(:identify) if entry}"
58
+ nil
59
+ when res.start_with?("A")
60
+ logger.info "All input entries with this same issue will be ignored for this launch"
61
+ @_skip_all_multiple_results = true
62
+ nil
63
+ when res.start_with?("C")
64
+ logger.info "Creating new person...#{"for entry #{entry.to_s(:identify)}" if entry}"
65
+ session.new_person
66
+ when res.start_with?("B")
67
+ raise error
68
+ when res && !res.empty? && (pos = res.to_i rescue nil) && (pos < error.candidates.length)
69
+ error.candidate(pos).tap do |person|
70
+ logger.info "Thanks!! You selected #{person.identify}"
71
+ sleep(1.5)
72
+ end
73
+ else
74
+ if pos.is_a?(Numeric) && (pos >= error.candidates.length)
75
+ print "#{pos} is not a number in the range. "
76
+ else
77
+ print "#{res} is not an option. "
78
+ end
79
+ puts "Please select one of the offered options..."
80
+ sleep(1)
81
+ _with_each_prompt_to_select_user(error, increase_count: false, entry: entry)
82
+ end
83
+ end
84
+ end
85
+
25
86
  end
26
87
  end
27
88
  end
@@ -15,8 +15,10 @@ module Eco
15
15
  def with_each_present(entries, people, options, log_starter: false)
16
16
  found = []
17
17
  micro.with_each(entries, people, options) do |entry, person|
18
- if person.new? && log_starter
19
- session.logger.error("This person does not exist: #{entry.to_s(:identify)}")
18
+ if person.new?
19
+ if log_starter
20
+ session.logger.error("This person does not exist: #{entry.to_s(:identify)}")
21
+ end
20
22
  next
21
23
  end
22
24
  found << person
@@ -15,8 +15,10 @@ module Eco
15
15
  def with_each_starter(entries, people, options, log_present: false)
16
16
  starters = []
17
17
  micro.with_each(entries, people, options) do |entry, person|
18
- if !person.new? && log_present
19
- session.logger.error("This person (id: '#{person.id}') already exists: #{entry.to_s(:identify)}")
18
+ if !person.new?
19
+ if log_present
20
+ session.logger.error("This person (id: '#{person.id}') already exists: #{entry.to_s(:identify)}")
21
+ end
20
22
  next
21
23
  end
22
24
  starters << person
@@ -9,7 +9,7 @@ require_relative 'organization/tag_tree'
9
9
  require_relative 'organization/presets_factory'
10
10
  require_relative 'organization/preferences'
11
11
  require_relative 'organization/people'
12
- require_relative 'organization/people_analytics'
12
+ require_relative 'organization/people_similarity'
13
13
  require_relative 'organization/person_schemas'
14
14
  require_relative 'organization/policy_groups'
15
15
  require_relative 'organization/login_providers'
@@ -2,6 +2,43 @@ module Eco
2
2
  module API
3
3
  module Organization
4
4
  class People < Eco::Language::Models::Collection
5
+ # Error class that allows to handle cases where multiple people were found for the same criterion.
6
+ # @note its main purpose to prevent the creation of duplicates or override information between different people.
7
+ class MultipleSearchResults < StandardError
8
+ attr_reader :candidates, :property
9
+ # @param msg [String] the basic message error.
10
+ # @param candiates [Array<Person>] the people that match the same search criterion.
11
+ # @param property [String] the property of the person model that triggered the error (base of the search criterion).
12
+ def initialize(msg, candidates: [], property: "email")
13
+ @candidates = candidates
14
+ @property = property
15
+ super(msg + " " + candidates_summary)
16
+ end
17
+
18
+ # @param with_index [Boolean] to add an index to each candidate description.
19
+ # @return [Array<String>] the `candidates` identified
20
+ def identify_candidates(with_index: false)
21
+ candidates.map.each_with_index do |person, i|
22
+ index = with_index ? "#{i}. " : ""
23
+ msg = person.account ? (person.account_added? ? "(new user)" : "(user)") : "(no account)"
24
+ "#{index}#{msg} #{person.identify}"
25
+ end
26
+ end
27
+
28
+ # @return [Person] the `candidate` in the `index` position
29
+ def candidate(index)
30
+ candidates[index]
31
+ end
32
+
33
+ private
34
+
35
+ def candidates_summary
36
+ lines = ["The following people have the same '#{property}':"]
37
+ lines.concat(identify_candidates(with_index: true)).join("\n ")
38
+ end
39
+
40
+ end
41
+
5
42
  # build the shortcuts of Collection
6
43
  attr_presence :account, :details
7
44
  attr_collection :id, :external_id, :email, :name, :supervisor_id
@@ -78,34 +115,36 @@ module Eco
78
115
  # @!group Searchers
79
116
 
80
117
  # It searches a person using the parameters given.
118
+ # @note This is how the search function actually works:
119
+ # 1. if eP `id` is given, returns the person (if found), otherwise...
120
+ # 2. if `external_id` is given, returns the person (if found), otherwise...
121
+ # 3. if `strict` is `false` and `email` is given:
122
+ # - if there is only 1 person with that email, returns that person, otherwise...
123
+ # - if found but, there are many candidates, it raises MultipleSearchResults error
124
+ # - if person `external_id` matches `email`, returns that person
125
+ # @raise MultipleSearchResults if there are multiple people with the same `email`
126
+ # and there's no other criteria to find the person. It only gets to this point if
127
+ # `external_id` was **not** provided and we are **not** in 'strict' search mode.
128
+ # However, it could be we were in `strict` mode and `external_id` was not provided.
81
129
  # @param id [String] the `internal id` of the person
82
130
  # @param external_id [String] the `exernal_id` of the person
83
131
  # @param email [String] the `email` of the person
84
- # @param strict [Boolean] if should perform a `soft` or a `strict` search. `strict` will avoid repeated email addresses.
132
+ # @param strict [Boolean] if should perform a `:soft` or a `:strict` search. `strict` will avoid repeated email addresses.
85
133
  # @return [Person, nil] the person we were searching, or `nil` if not found.
86
134
  def person(id: nil, external_id: nil, email: nil, strict: false)
87
135
  init_caches
88
- pers = @by_id[id]&.first if id
89
- pers = @by_external_id[external_id&.strip]&.first if !pers && !external_id.to_s.strip.empty?
90
-
91
- # strict prevents taking existing user for searched person with same email
92
- # specially useful if the organisation ensures all have external id (no need for email search)
93
- if !pers && (!strict || external_id.to_s.strip.empty?)
94
- # person still not found and either not strict or no external_id provided
95
- pers = @by_users_email[email&.downcase.strip]&.first if !email.to_s.strip.empty?
96
-
97
- if !pers && !strict && !email.to_s.strip.empty?
98
- candidates = @by_non_users_email[email&.downcase.strip] || []
99
- raise "Too many non-user candidates (#{candidates.length}) with email '#{email}'" if candidates.length > 1
100
- pers = candidates.first
101
- end
102
-
103
- pers = @by_external_id[email&.downcase.strip]&.first if !pers && !email.to_s.strip.empty?
104
- end
105
-
136
+ # normalize values
137
+ ext_id = !external_id.to_s.strip.empty? && external_id.strip
138
+ email = !email.to_s.strip.empty? && email.downcase.strip
139
+
140
+ pers = nil
141
+ pers ||= @by_id[id]&.first
142
+ pers ||= @by_external_id[ext_id]&.first
143
+ pers ||= person_by_email(email) unless strict && ext_id
106
144
  pers
107
145
  end
108
146
 
147
+ # @see Eco::API::Organization::People#person
109
148
  def find(object, strict: false)
110
149
  id = attr_value(object, "id")
111
150
  external_id = attr_value(object, "external_id")
@@ -177,8 +216,8 @@ module Eco
177
216
  # @!endgroup
178
217
 
179
218
  # @!group Helper methods
180
- def analytics
181
- Eco::API::Organization::PeopleAnalytics.new(self.to_a)
219
+ def similarity
220
+ Eco::API::Organization::PeopleSimilarity.new(self.to_a)
182
221
  end
183
222
  # @!endgroup
184
223
 
@@ -190,16 +229,46 @@ module Eco
190
229
 
191
230
  private
192
231
 
232
+ def person_by_email(email, prevent_duplicates: true)
233
+ return nil unless email
234
+
235
+ candidates = @by_non_users_email[email] || []
236
+ email_users = @by_users_email[email] || []
237
+
238
+ if pers = email_users.first
239
+ return pers if candidates.empty?
240
+ candidates = [pers] + candidates
241
+ elsif candidates.length == 1
242
+ return candidates.first
243
+ end
244
+
245
+ if prevent_duplicates && !candidates.empty?
246
+ msg = "Multiple search results match the criteria."
247
+ raise MultipleSearchResults.new(msg, candidates: candidates, property: "email")
248
+ end
249
+
250
+ @by_external_id[email]&.first
251
+ end
252
+
193
253
  def init_caches
194
254
  return if @caches_init
195
255
  @by_id = to_h
196
- @by_external_id = to_h('external_id')
197
- @by_users_email = users.to_h('email')
198
- @by_non_users_email = non_users.to_h('email')
199
- @by_email = to_h('email')
256
+ @by_external_id = no_nil_key(to_h('external_id'))
257
+ @by_users_email = no_nil_key(existing_users.to_h('email'))
258
+ @by_non_users_email = no_nil_key(non_users.to_h('email'))
259
+ @by_email = no_nil_key(to_h('email'))
200
260
  @caches_init = true
201
261
  end
202
262
 
263
+ def existing_users
264
+ newFrom users.select {|u| !u.account_added?(:original)}
265
+ end
266
+
267
+ def no_nil_key(hash)
268
+ hash.tap {|h| h.delete(nil)}
269
+ end
270
+
271
+
203
272
  end
204
273
  end
205
274
  end
@@ -0,0 +1,272 @@
1
+ module Eco
2
+ module API
3
+ module Organization
4
+
5
+ # Class to find out duplicates in the People Manager
6
+ #
7
+ # @attr_writer attribute [String, Proc, nil] the target attribute to be read.
8
+ class PeopleSimilarity < Eco::API::Organization::People
9
+ include Eco::Data::FuzzyMatch
10
+
11
+ attr_accessor :attribute
12
+
13
+ # @!group Config
14
+ # @return [String, Proc, nil] the target attribute to be read.
15
+ def attribute=(attr)
16
+ @attribute = attr
17
+ end
18
+
19
+ def attribute
20
+ @attribute ||= :name
21
+ end
22
+
23
+ # Returns the target value to analyse
24
+ # @param person [Ecoportal::API::V1::Person]
25
+ def item_value(person)
26
+ return attr.call(item) if attribute.is_a?(Proc)
27
+ attr = attribute.to_sym
28
+ return item.send(attr) if item.respond_to?(attr)
29
+ end
30
+
31
+ # Define the order or relevant of per user matches
32
+ # @param values[Array<Symbol>] the algorithms' results it should be ordered by
33
+ # * Possible values: `:dice`, `:levenshtein`, `:jaro_winkler`, `:ngrams`, `:words_ngrams`, `:chars_position`
34
+ def order=(values)
35
+ @order = values
36
+ end
37
+
38
+ def order
39
+ @order ||= [:words_ngrams, :dice]
40
+ end
41
+
42
+ # Define the order or relevant of per user matches
43
+ # @param value [Float] the threshold that all of the algorithms should comply with
44
+ def threshold=(value)
45
+ @threshold = value
46
+ end
47
+
48
+ def threshold
49
+ @threshold ||= 0.15
50
+ end
51
+
52
+ # Generates a new object with same config but different base `data`.
53
+ # @return [Eco::API::Organization::PeopleSimilarity]
54
+ def newFrom(data)
55
+ super(data).tap do |simil|
56
+ simil.threshold = threshold
57
+ simil.order = order
58
+ simil.attribute = attribute
59
+ end
60
+ end
61
+
62
+ # @!endgroup
63
+
64
+ # @!group Searchers
65
+
66
+ # It gathers those that have the same `email`
67
+ # @return [Hash] where `keys` are `email`s and `values` an `Array<Person>`
68
+ def repeated_emails
69
+ init_caches
70
+ @by_email.select do |email, people|
71
+ people.count > 1
72
+ end
73
+ end
74
+
75
+ # It returns all people with no name
76
+ # @return [Eco::API::Organization::PeopleSimilarity]
77
+ def unnamed
78
+ select do |person|
79
+ person.name.to_s.strip.length < 2
80
+ end.yield_self do |results|
81
+ newFrom(results)
82
+ end
83
+ end
84
+
85
+ # It returns all people with no name
86
+ # @return [Eco::API::Organization::PeopleSimilarity]
87
+ def named
88
+ reject do |person|
89
+ person.name.to_s.strip.length < 2
90
+ end.yield_self do |results|
91
+ newFrom(results)
92
+ end
93
+ end
94
+
95
+ # It returns all the entries with `attribute` empty
96
+ # @return [Eco::API::Organization::PeopleSimilarity]
97
+ def blank_attribute
98
+ select do |person|
99
+ item_value(person).to_s.strip.length < 2
100
+ end.yield_self do |results|
101
+ newFrom(results)
102
+ end
103
+ end
104
+
105
+ # It returns all the entries with `attribute` **n0t** empty
106
+ # @return [Eco::API::Organization::PeopleSimilarity]
107
+ def attribute_present
108
+ reject do |person|
109
+ item_value(person).to_s.strip.length < 2
110
+ end.yield_self do |results|
111
+ newFrom(results)
112
+ end
113
+ end
114
+
115
+ # @!endgroup
116
+
117
+ # @!group Analisys starters
118
+
119
+ # Analyses People bases on `options`
120
+ # @param needle_read [Proc, Symbol] when the value to read from `needle` object is different to the `:read` (`attribute`).
121
+ # This allows to for example, facet `needle.name` (needle_read) against `haystack_item.details[alt_id]` (read).
122
+ # @param keep_empty [Boolean] to indicate if it should get rid of people with no results (based on threshold)
123
+ # @return [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
124
+ def analyse(needle_read: nil, keep_empty: false, **options)
125
+ options = { read: self.attribute }.merge(options)
126
+ total = count; i = 1
127
+ each_with_object({}) do |person, results|
128
+ needle_str = needle_read ? item_string(person, needle_read) : nil
129
+ results[person.id] = find_all_with_score(person, needle_str: needle_str, **options)
130
+ print_progress("Analysed", total, i)
131
+ i += 1
132
+ end.yield_self do |analysed|
133
+ analysed = clean_empty(analysed) unless keep_empty
134
+ #puts "... #{analysed.count} results after cleaning empty"
135
+ analysed
136
+ end
137
+ end
138
+
139
+ # @!endgroup
140
+
141
+ # @!group Results Treatment
142
+
143
+ # Gets a new instance object of this class, with only people in results
144
+ # @param analysed [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
145
+ # @return [Eco::API::Organization::PeopleSimilarity]
146
+ def newSimilarity(analysed)
147
+ newFrom(people_in_results(analysed))
148
+ end
149
+
150
+ def people_in_results(analysed)
151
+ analysed.each_with_object([]) do |(id, results), people|
152
+ related = results.each_with_object([self[id]]) do |result, related|
153
+ related << result.match
154
+ end
155
+ related.each {|person| people << person unless people.include?(person)}
156
+ end
157
+ end
158
+
159
+ # Removes from results those that do not have similar entries
160
+ def clean_empty(analysed)
161
+ analysed.select do |id, results|
162
+ !results.empty?
163
+ end
164
+ end
165
+
166
+ # Helper to do some treatment fo the results
167
+ # @param analysed [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
168
+ # @return [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
169
+ def with_analysed(analysed, keep_empty: false)
170
+ analysed.each_with_object({}) do |(id, results), reanalysed|
171
+ reanalysed[id] = yield(self[id], results)
172
+ end.yield_self do |reanalysed|
173
+ reanalysed = clean_empty(reanalysed) unless keep_empty
174
+ reanalysed
175
+ end.tap {|out| "with_analysed... returns #{out.count} records"}
176
+ end
177
+
178
+ # Launches a reanalyis on `analysed` based on `options`
179
+ # @param analysed [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
180
+ def rearrange(analysed, **options)
181
+ with_analysed(analysed) do |person, results|
182
+ results.relevant_results(**options)
183
+ end
184
+ end
185
+
186
+ # Reanalyses by using a block to treat the needle and item values
187
+ def reanalyse(analysed, msg: "Reanalysing", **options, &block)
188
+ options = { read: self.attribute }.merge(options)
189
+ total = analysed.count; i = 1
190
+ with_analysed(analysed) do |person, results|
191
+ print_progress(msg, total, i)
192
+ i += 1
193
+ recalculate_results(results, &block)
194
+ end
195
+ end
196
+
197
+ # Renalyses by ignoring matching words between the `needle` and those found in `results`
198
+ def ignore_matching_words(analysed, **options)
199
+ prompt = "Reanalysing by ignoring matching words"
200
+ reanalyse(analysed, msg: prompt, **options) do |needle_str, item_str, needle, item|
201
+ self.class.remove_matching_words(needle_str, item_str)
202
+ end
203
+ end
204
+
205
+ # Renalyses by ignoring matching words between the `needle` and those found in `results`
206
+ def ignore_matching_words_old(analysed, **options)
207
+ options = { read: self.attribute }.merge(options)
208
+ total = analysed.count; i = 1
209
+ with_analysed(analysed) do |person, results|
210
+ print_progress("Reanalysing by ignoring matching words", total, i)
211
+ i += 1
212
+ ignore_same_words_score(results, **options)
213
+ end
214
+ end
215
+
216
+ # @!endgroup
217
+
218
+ # @!group Reporting Helpers
219
+
220
+ # @return [String] well structured text
221
+ def report(analysed, format: :txt)
222
+ case
223
+ when format == :txt
224
+ analysed.each_with_object("") do |(id, results), out|
225
+ msg = results.results.map {|r| r.print}.join("\n ")
226
+ out << "#{self[id].identify}:\n " + msg + "\n"
227
+ end
228
+ end
229
+ end
230
+
231
+ # @note
232
+ # 1. Unless `:analysed` is provided, it launches an analysis cutting with Jaro Winker min 0.5
233
+ # 2. It then re-sorts and cuts based on `options`
234
+ # @return [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
235
+ def print_analysis(**options)
236
+ analysed = options[:analysed] || results_with_false_positives.analyse(**options)
237
+ analysed.each_with_object({}) do |(id, results), out|
238
+ puts report(analysed)
239
+ end
240
+ end
241
+ # @!endgroup
242
+
243
+ protected
244
+
245
+ def on_change
246
+ remove_instance_variable(@fuzzy_match)
247
+ super
248
+ end
249
+
250
+ private
251
+
252
+ def print_progress(msg, total, num)
253
+ return unless total > 10
254
+ puts "" unless num > 1
255
+ @print_msg_len ||= 0
256
+ percent = (100 * num.to_f / total).round(1)
257
+ msg = " #{msg}: #{percent}% (#{num} of #{total})\r"
258
+ @print_msg_len = msg.length unless @print_msg_len > msg.length
259
+ print msg
260
+ $stdout.flush
261
+ if percent > 99.9
262
+ sleep(0.2)
263
+ print "#{" " * @print_msg_len}\r"
264
+ $stdout.flush
265
+ end
266
+ end
267
+
268
+
269
+ end
270
+ end
271
+ end
272
+ end