eco-helpers 2.0.18 → 2.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +80 -1
  3. data/eco-helpers.gemspec +4 -1
  4. data/lib/eco/api/common/base_loader.rb +9 -5
  5. data/lib/eco/api/common/loaders/parser.rb +1 -0
  6. data/lib/eco/api/common/people/default_parsers.rb +1 -0
  7. data/lib/eco/api/common/people/default_parsers/xls_parser.rb +53 -0
  8. data/lib/eco/api/common/people/entries.rb +1 -0
  9. data/lib/eco/api/common/people/entry_factory.rb +88 -23
  10. data/lib/eco/api/common/people/person_entry.rb +1 -0
  11. data/lib/eco/api/common/people/person_parser.rb +1 -1
  12. data/lib/eco/api/common/session.rb +1 -0
  13. data/lib/eco/api/common/session/base_session.rb +2 -0
  14. data/lib/eco/api/common/session/helpers.rb +30 -0
  15. data/lib/eco/api/common/session/helpers/prompt_user.rb +34 -0
  16. data/lib/eco/api/common/version_patches/ecoportal_api/external_person.rb +1 -1
  17. data/lib/eco/api/common/version_patches/ecoportal_api/internal_person.rb +7 -4
  18. data/lib/eco/api/common/version_patches/exception.rb +5 -2
  19. data/lib/eco/api/microcases/with_each.rb +67 -6
  20. data/lib/eco/api/microcases/with_each_present.rb +4 -2
  21. data/lib/eco/api/microcases/with_each_starter.rb +4 -2
  22. data/lib/eco/api/organization.rb +1 -1
  23. data/lib/eco/api/organization/people.rb +94 -25
  24. data/lib/eco/api/organization/people_similarity.rb +272 -0
  25. data/lib/eco/api/organization/person_schemas.rb +5 -1
  26. data/lib/eco/api/organization/policy_groups.rb +5 -1
  27. data/lib/eco/api/organization/tag_tree.rb +33 -0
  28. data/lib/eco/api/session.rb +19 -8
  29. data/lib/eco/api/session/batch.rb +7 -5
  30. data/lib/eco/api/session/batch/job.rb +34 -9
  31. data/lib/eco/api/usecases.rb +2 -2
  32. data/lib/eco/api/usecases/base_case.rb +2 -2
  33. data/lib/eco/api/usecases/base_io.rb +17 -4
  34. data/lib/eco/api/usecases/default_cases.rb +1 -0
  35. data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +179 -32
  36. data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
  37. data/lib/eco/api/usecases/default_cases/to_csv_case.rb +81 -36
  38. data/lib/eco/api/usecases/default_cases/to_csv_detailed_case.rb +3 -4
  39. data/lib/eco/api/usecases/ooze_samples/ooze_update_case.rb +3 -2
  40. data/lib/eco/cli/config/default/input.rb +61 -8
  41. data/lib/eco/cli/config/default/options.rb +47 -2
  42. data/lib/eco/cli/config/default/people.rb +18 -24
  43. data/lib/eco/cli/config/default/usecases.rb +33 -2
  44. data/lib/eco/cli/config/default/workflow.rb +12 -7
  45. data/lib/eco/cli/scripting/args_helpers.rb +2 -2
  46. data/lib/eco/csv.rb +4 -2
  47. data/lib/eco/csv/table.rb +121 -21
  48. data/lib/eco/data/fuzzy_match.rb +109 -27
  49. data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
  50. data/lib/eco/data/fuzzy_match/ngrams_score.rb +19 -10
  51. data/lib/eco/data/fuzzy_match/pairing.rb +12 -19
  52. data/lib/eco/data/fuzzy_match/result.rb +22 -2
  53. data/lib/eco/data/fuzzy_match/results.rb +30 -6
  54. data/lib/eco/data/fuzzy_match/score.rb +12 -7
  55. data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
  56. data/lib/eco/version.rb +1 -1
  57. metadata +67 -3
  58. data/lib/eco/api/organization/people_analytics.rb +0 -60
@@ -0,0 +1,30 @@
1
+ require_relative 'helpers/prompt_user'
2
+
3
+ module Eco
4
+ module API
5
+ module Common
6
+ module Session
7
+ module Helpers
8
+
9
+ class << self
10
+ def included(base)
11
+ base.send(:include, InstanceMethods)
12
+ base.extend(ClassMethods)
13
+ end
14
+ end
15
+
16
+
17
+ module ClassMethods
18
+
19
+ end
20
+
21
+ module InstanceMethods
22
+ include Helpers::PromptUser
23
+
24
+ end
25
+
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,34 @@
1
+ require 'timeout'
2
+ module Eco
3
+ module API
4
+ module Common
5
+ module Session
6
+ module Helpers
7
+ module PromptUser
8
+
9
+ def prompt_user(question, default:, explanation: "", timeout: nil)
10
+ response = if config.run_mode_remote?
11
+ default
12
+ else
13
+ puts explanation
14
+ print "#{question} "
15
+ if timeout
16
+ begin
17
+ Timeout::timeout(timeout) { STDIN.gets.chop }
18
+ rescue Timeout::Error
19
+ default
20
+ end
21
+ else
22
+ STDIN.gets.chop
23
+ end
24
+ end
25
+ return response unless block_given?
26
+ yield(response)
27
+ end
28
+
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -18,7 +18,7 @@ module Ecoportal
18
18
  entry.to_s(:identify)
19
19
  else
20
20
  str_id = id ? "id: '#{id}'; " : ""
21
- "#{name}' (#{str_id}ext_id: '#{external_id}'; email: '#{email}')"
21
+ "'#{name}' (#{str_id}ext_id: '#{external_id}'; email: '#{email}')"
22
22
  end
23
23
  end
24
24
 
@@ -11,12 +11,15 @@ module Ecoportal
11
11
  original_doc["account"] = JSON.parse(doc["account"])
12
12
  end
13
13
 
14
- def new?
15
- !initial_doc["details"] && !initial_doc["account"]
14
+ def new?(doc = :initial)
15
+ ref_doc = (doc == :original) ? original_doc : initial_doc
16
+ !ref_doc["details"] && !ref_doc["account"]
16
17
  end
17
18
 
18
- def account_added?
19
- account && !initial_doc["account"]
19
+ # @return [Boolean] if the account has been added, compared to `doc`
20
+ def account_added?(doc = :initial)
21
+ ref_doc = (doc == :original) ? original_doc : initial_doc
22
+ account && !ref_doc["account"]
20
23
  end
21
24
 
22
25
  end
@@ -2,8 +2,11 @@ class ::Exception
2
2
  def patch_full_message
3
3
  begin
4
4
  msg = []
5
- msg << "\n#{backtrace.first} \n#{message} (#{self.class.to_s})"
6
- backtrace[1..-1].each_with_index {|bt, i| msg << "#{" "*8}#{i+1}: from #{bt}"}
5
+ tracing = backtrace ? backtrace : []
6
+ tracing = (self.class == SystemStackError) ? tracing[1..30] : tracing[1..-1]
7
+ tracing ||= []
8
+ msg << "\n#{tracing.first} \n#{message} (#{self.class.to_s})"
9
+ tracing.each_with_index {|bt, i| msg << "#{" "*8}#{i+1}: from #{bt}"}
7
10
  msg.join("\n")
8
11
  rescue Exception => e
9
12
  puts "Something is wrong with 'patch_full_message': #{e}"
@@ -12,16 +12,77 @@ module Eco
12
12
  # @yieldparam person [Ecoportal::API::V1::Person] the found person that matches `entry`, or a new person otherwise.
13
13
  # @return [Eco::API::Organization::People] all the people, including new and existing ones.
14
14
  def with_each(entries, people, options)
15
- entries.map do |entry|
16
- unless person = people.find(entry, strict: micro.strict_search?(options))
17
- person = session.new_person
15
+ @_skip_all_multiple_results = false
16
+ entries.each_with_object([]) do |entry, scoped|
17
+ begin
18
+ unless person = people.find(entry, strict: micro.strict_search?(options))
19
+ person = session.new_person
20
+ end
21
+ rescue Eco::API::Organization::People::MultipleSearchResults => e
22
+ unless @_skip_all_multiple_results
23
+ msg = "\n * When searching this Entry: #{entry.to_s(:identify)}"
24
+ person = _with_each_prompt_to_select_user(e.append_message(msg), entry: entry)
25
+ end
26
+ end
27
+
28
+ if person
29
+ person.entry = entry
30
+ yield(entry, person) if block_given?
31
+ scoped << person
18
32
  end
19
- person.entry = entry
20
- yield(entry, person) if block_given?
21
- person
22
33
  end.yield_self {|all_people| people.newFrom all_people.uniq}
23
34
  end
24
35
 
36
+ private
37
+
38
+ def _with_each_prompt_to_select_user(error, entry: nil, increase_count: true)
39
+ unless error.is_a?(Eco::API::Organization::People::MultipleSearchResults)
40
+ raise "Expecting Eco::API::Organization::People::MultipleSearchResults. Given: #{error.class}"
41
+ end
42
+ @_with_each_prompts = 0 unless instance_variable_defined?(:@_with_each_prompts)
43
+ @_with_each_prompts += 1 if increase_count
44
+
45
+ lines = []
46
+ lines << "\n(#{@_with_each_prompts}) " + error.to_s + "\n"
47
+ lines << " #index - Select the correct person by its number index among the list above."
48
+ lines << " (I) - Just Skip/Ignore this one. I will deal with that input entry in another launch."
49
+ lines << " (A) - Ignore all the rest of input entries with this problem."
50
+ lines << " (C) - Create a new person."
51
+ lines << " (B) - Just break this script. I need to change the input file :/"
52
+
53
+ prompt_user("Type one option (#number/I/A/C/B):", explanation: lines.join("\n"), default: "I") do |res|
54
+ res = res.upcase
55
+ case
56
+ when res.start_with?("I")
57
+ logger.info "Ignoring entry... #{entry.to_s(:identify) if entry}"
58
+ nil
59
+ when res.start_with?("A")
60
+ logger.info "All input entries with this same issue will be ignored for this launch"
61
+ @_skip_all_multiple_results = true
62
+ nil
63
+ when res.start_with?("C")
64
+ logger.info "Creating new person...#{"for entry #{entry.to_s(:identify)}" if entry}"
65
+ session.new_person
66
+ when res.start_with?("B")
67
+ raise error
68
+ when res && !res.empty? && (pos = res.to_i rescue nil) && (pos < error.candidates.length)
69
+ error.candidate(pos).tap do |person|
70
+ logger.info "Thanks!! You selected #{person.identify}"
71
+ sleep(1.5)
72
+ end
73
+ else
74
+ if pos.is_a?(Numeric) && (pos >= error.candidates.length)
75
+ print "#{pos} is not a number in the range. "
76
+ else
77
+ print "#{res} is not an option. "
78
+ end
79
+ puts "Please select one of the offered options..."
80
+ sleep(1)
81
+ _with_each_prompt_to_select_user(error, increase_count: false, entry: entry)
82
+ end
83
+ end
84
+ end
85
+
25
86
  end
26
87
  end
27
88
  end
@@ -15,8 +15,10 @@ module Eco
15
15
  def with_each_present(entries, people, options, log_starter: false)
16
16
  found = []
17
17
  micro.with_each(entries, people, options) do |entry, person|
18
- if person.new? && log_starter
19
- session.logger.error("This person does not exist: #{entry.to_s(:identify)}")
18
+ if person.new?
19
+ if log_starter
20
+ session.logger.error("This person does not exist: #{entry.to_s(:identify)}")
21
+ end
20
22
  next
21
23
  end
22
24
  found << person
@@ -15,8 +15,10 @@ module Eco
15
15
  def with_each_starter(entries, people, options, log_present: false)
16
16
  starters = []
17
17
  micro.with_each(entries, people, options) do |entry, person|
18
- if !person.new? && log_present
19
- session.logger.error("This person (id: '#{person.id}') already exists: #{entry.to_s(:identify)}")
18
+ if !person.new?
19
+ if log_present
20
+ session.logger.error("This person (id: '#{person.id}') already exists: #{entry.to_s(:identify)}")
21
+ end
20
22
  next
21
23
  end
22
24
  starters << person
@@ -9,7 +9,7 @@ require_relative 'organization/tag_tree'
9
9
  require_relative 'organization/presets_factory'
10
10
  require_relative 'organization/preferences'
11
11
  require_relative 'organization/people'
12
- require_relative 'organization/people_analytics'
12
+ require_relative 'organization/people_similarity'
13
13
  require_relative 'organization/person_schemas'
14
14
  require_relative 'organization/policy_groups'
15
15
  require_relative 'organization/login_providers'
@@ -2,6 +2,43 @@ module Eco
2
2
  module API
3
3
  module Organization
4
4
  class People < Eco::Language::Models::Collection
5
+ # Error class that allows to handle cases where multiple people were found for the same criterion.
6
+ # @note its main purpose to prevent the creation of duplicates or override information between different people.
7
+ class MultipleSearchResults < StandardError
8
+ attr_reader :candidates, :property
9
+ # @param msg [String] the basic message error.
10
+ # @param candiates [Array<Person>] the people that match the same search criterion.
11
+ # @param property [String] the property of the person model that triggered the error (base of the search criterion).
12
+ def initialize(msg, candidates: [], property: "email")
13
+ @candidates = candidates
14
+ @property = property
15
+ super(msg + " " + candidates_summary)
16
+ end
17
+
18
+ # @param with_index [Boolean] to add an index to each candidate description.
19
+ # @return [Array<String>] the `candidates` identified
20
+ def identify_candidates(with_index: false)
21
+ candidates.map.each_with_index do |person, i|
22
+ index = with_index ? "#{i}. " : ""
23
+ msg = person.account ? (person.account_added? ? "(new user)" : "(user)") : "(no account)"
24
+ "#{index}#{msg} #{person.identify}"
25
+ end
26
+ end
27
+
28
+ # @return [Person] the `candidate` in the `index` position
29
+ def candidate(index)
30
+ candidates[index]
31
+ end
32
+
33
+ private
34
+
35
+ def candidates_summary
36
+ lines = ["The following people have the same '#{property}':"]
37
+ lines.concat(identify_candidates(with_index: true)).join("\n ")
38
+ end
39
+
40
+ end
41
+
5
42
  # build the shortcuts of Collection
6
43
  attr_presence :account, :details
7
44
  attr_collection :id, :external_id, :email, :name, :supervisor_id
@@ -78,34 +115,36 @@ module Eco
78
115
  # @!group Searchers
79
116
 
80
117
  # It searches a person using the parameters given.
118
+ # @note This is how the search function actually works:
119
+ # 1. if eP `id` is given, returns the person (if found), otherwise...
120
+ # 2. if `external_id` is given, returns the person (if found), otherwise...
121
+ # 3. if `strict` is `false` and `email` is given:
122
+ # - if there is only 1 person with that email, returns that person, otherwise...
123
+ # - if found but, there are many candidates, it raises MultipleSearchResults error
124
+ # - if person `external_id` matches `email`, returns that person
125
+ # @raise MultipleSearchResults if there are multiple people with the same `email`
126
+ # and there's no other criteria to find the person. It only gets to this point if
127
+ # `external_id` was **not** provided and we are **not** in 'strict' search mode.
128
+ # However, it could be we were in `strict` mode and `external_id` was not provided.
81
129
  # @param id [String] the `internal id` of the person
82
130
  # @param external_id [String] the `exernal_id` of the person
83
131
  # @param email [String] the `email` of the person
84
- # @param strict [Boolean] if should perform a `soft` or a `strict` search. `strict` will avoid repeated email addresses.
132
+ # @param strict [Boolean] if should perform a `:soft` or a `:strict` search. `strict` will avoid repeated email addresses.
85
133
  # @return [Person, nil] the person we were searching, or `nil` if not found.
86
134
  def person(id: nil, external_id: nil, email: nil, strict: false)
87
135
  init_caches
88
- pers = @by_id[id]&.first if id
89
- pers = @by_external_id[external_id&.strip]&.first if !pers && !external_id.to_s.strip.empty?
90
-
91
- # strict prevents taking existing user for searched person with same email
92
- # specially useful if the organisation ensures all have external id (no need for email search)
93
- if !pers && (!strict || external_id.to_s.strip.empty?)
94
- # person still not found and either not strict or no external_id provided
95
- pers = @by_users_email[email&.downcase.strip]&.first if !email.to_s.strip.empty?
96
-
97
- if !pers && !strict && !email.to_s.strip.empty?
98
- candidates = @by_non_users_email[email&.downcase.strip] || []
99
- raise "Too many non-user candidates (#{candidates.length}) with email '#{email}'" if candidates.length > 1
100
- pers = candidates.first
101
- end
102
-
103
- pers = @by_external_id[email&.downcase.strip]&.first if !pers && !email.to_s.strip.empty?
104
- end
105
-
136
+ # normalize values
137
+ ext_id = !external_id.to_s.strip.empty? && external_id.strip
138
+ email = !email.to_s.strip.empty? && email.downcase.strip
139
+
140
+ pers = nil
141
+ pers ||= @by_id[id]&.first
142
+ pers ||= @by_external_id[ext_id]&.first
143
+ pers ||= person_by_email(email) unless strict && ext_id
106
144
  pers
107
145
  end
108
146
 
147
+ # @see Eco::API::Organization::People#person
109
148
  def find(object, strict: false)
110
149
  id = attr_value(object, "id")
111
150
  external_id = attr_value(object, "external_id")
@@ -177,8 +216,8 @@ module Eco
177
216
  # @!endgroup
178
217
 
179
218
  # @!group Helper methods
180
- def analytics
181
- Eco::API::Organization::PeopleAnalytics.new(self.to_a)
219
+ def similarity
220
+ Eco::API::Organization::PeopleSimilarity.new(self.to_a)
182
221
  end
183
222
  # @!endgroup
184
223
 
@@ -190,16 +229,46 @@ module Eco
190
229
 
191
230
  private
192
231
 
232
+ def person_by_email(email, prevent_duplicates: true)
233
+ return nil unless email
234
+
235
+ candidates = @by_non_users_email[email] || []
236
+ email_users = @by_users_email[email] || []
237
+
238
+ if pers = email_users.first
239
+ return pers if candidates.empty?
240
+ candidates = [pers] + candidates
241
+ elsif candidates.length == 1
242
+ return candidates.first
243
+ end
244
+
245
+ if prevent_duplicates && !candidates.empty?
246
+ msg = "Multiple search results match the criteria."
247
+ raise MultipleSearchResults.new(msg, candidates: candidates, property: "email")
248
+ end
249
+
250
+ @by_external_id[email]&.first
251
+ end
252
+
193
253
  def init_caches
194
254
  return if @caches_init
195
255
  @by_id = to_h
196
- @by_external_id = to_h('external_id')
197
- @by_users_email = users.to_h('email')
198
- @by_non_users_email = non_users.to_h('email')
199
- @by_email = to_h('email')
256
+ @by_external_id = no_nil_key(to_h('external_id'))
257
+ @by_users_email = no_nil_key(existing_users.to_h('email'))
258
+ @by_non_users_email = no_nil_key(non_users.to_h('email'))
259
+ @by_email = no_nil_key(to_h('email'))
200
260
  @caches_init = true
201
261
  end
202
262
 
263
+ def existing_users
264
+ newFrom users.select {|u| !u.account_added?(:original)}
265
+ end
266
+
267
+ def no_nil_key(hash)
268
+ hash.tap {|h| h.delete(nil)}
269
+ end
270
+
271
+
203
272
  end
204
273
  end
205
274
  end
@@ -0,0 +1,272 @@
1
+ module Eco
2
+ module API
3
+ module Organization
4
+
5
+ # Class to find out duplicates in the People Manager
6
+ #
7
+ # @attr_writer attribute [String, Proc, nil] the target attribute to be read.
8
+ class PeopleSimilarity < Eco::API::Organization::People
9
+ include Eco::Data::FuzzyMatch
10
+
11
+ attr_accessor :attribute
12
+
13
+ # @!group Config
14
+ # @return [String, Proc, nil] the target attribute to be read.
15
+ def attribute=(attr)
16
+ @attribute = attr
17
+ end
18
+
19
+ def attribute
20
+ @attribute ||= :name
21
+ end
22
+
23
+ # Returns the target value to analyse
24
+ # @param person [Ecoportal::API::V1::Person]
25
+ def item_value(person)
26
+ return attr.call(item) if attribute.is_a?(Proc)
27
+ attr = attribute.to_sym
28
+ return item.send(attr) if item.respond_to?(attr)
29
+ end
30
+
31
+ # Define the order or relevant of per user matches
32
+ # @param values[Array<Symbol>] the algorithms' results it should be ordered by
33
+ # * Possible values: `:dice`, `:levenshtein`, `:jaro_winkler`, `:ngrams`, `:words_ngrams`, `:chars_position`
34
+ def order=(values)
35
+ @order = values
36
+ end
37
+
38
+ def order
39
+ @order ||= [:words_ngrams, :dice]
40
+ end
41
+
42
+ # Define the order or relevant of per user matches
43
+ # @param value [Float] the threshold that all of the algorithms should comply with
44
+ def threshold=(value)
45
+ @threshold = value
46
+ end
47
+
48
+ def threshold
49
+ @threshold ||= 0.15
50
+ end
51
+
52
+ # Generates a new object with same config but different base `data`.
53
+ # @return [Eco::API::Organization::PeopleSimilarity]
54
+ def newFrom(data)
55
+ super(data).tap do |simil|
56
+ simil.threshold = threshold
57
+ simil.order = order
58
+ simil.attribute = attribute
59
+ end
60
+ end
61
+
62
+ # @!endgroup
63
+
64
+ # @!group Searchers
65
+
66
+ # It gathers those that have the same `email`
67
+ # @return [Hash] where `keys` are `email`s and `values` an `Array<Person>`
68
+ def repeated_emails
69
+ init_caches
70
+ @by_email.select do |email, people|
71
+ people.count > 1
72
+ end
73
+ end
74
+
75
+ # It returns all people with no name
76
+ # @return [Eco::API::Organization::PeopleSimilarity]
77
+ def unnamed
78
+ select do |person|
79
+ person.name.to_s.strip.length < 2
80
+ end.yield_self do |results|
81
+ newFrom(results)
82
+ end
83
+ end
84
+
85
+ # It returns all people with no name
86
+ # @return [Eco::API::Organization::PeopleSimilarity]
87
+ def named
88
+ reject do |person|
89
+ person.name.to_s.strip.length < 2
90
+ end.yield_self do |results|
91
+ newFrom(results)
92
+ end
93
+ end
94
+
95
+ # It returns all the entries with `attribute` empty
96
+ # @return [Eco::API::Organization::PeopleSimilarity]
97
+ def blank_attribute
98
+ select do |person|
99
+ item_value(person).to_s.strip.length < 2
100
+ end.yield_self do |results|
101
+ newFrom(results)
102
+ end
103
+ end
104
+
105
+ # It returns all the entries with `attribute` **n0t** empty
106
+ # @return [Eco::API::Organization::PeopleSimilarity]
107
+ def attribute_present
108
+ reject do |person|
109
+ item_value(person).to_s.strip.length < 2
110
+ end.yield_self do |results|
111
+ newFrom(results)
112
+ end
113
+ end
114
+
115
+ # @!endgroup
116
+
117
+ # @!group Analisys starters
118
+
119
+ # Analyses People bases on `options`
120
+ # @param needle_read [Proc, Symbol] when the value to read from `needle` object is different to the `:read` (`attribute`).
121
+ # This allows to for example, facet `needle.name` (needle_read) against `haystack_item.details[alt_id]` (read).
122
+ # @param keep_empty [Boolean] to indicate if it should get rid of people with no results (based on threshold)
123
+ # @return [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
124
+ def analyse(needle_read: nil, keep_empty: false, **options)
125
+ options = { read: self.attribute }.merge(options)
126
+ total = count; i = 1
127
+ each_with_object({}) do |person, results|
128
+ needle_str = needle_read ? item_string(person, needle_read) : nil
129
+ results[person.id] = find_all_with_score(person, needle_str: needle_str, **options)
130
+ print_progress("Analysed", total, i)
131
+ i += 1
132
+ end.yield_self do |analysed|
133
+ analysed = clean_empty(analysed) unless keep_empty
134
+ #puts "... #{analysed.count} results after cleaning empty"
135
+ analysed
136
+ end
137
+ end
138
+
139
+ # @!endgroup
140
+
141
+ # @!group Results Treatment
142
+
143
+ # Gets a new instance object of this class, with only people in results
144
+ # @param analysed [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
145
+ # @return [Eco::API::Organization::PeopleSimilarity]
146
+ def newSimilarity(analysed)
147
+ newFrom(people_in_results(analysed))
148
+ end
149
+
150
+ def people_in_results(analysed)
151
+ analysed.each_with_object([]) do |(id, results), people|
152
+ related = results.each_with_object([self[id]]) do |result, related|
153
+ related << result.match
154
+ end
155
+ related.each {|person| people << person unless people.include?(person)}
156
+ end
157
+ end
158
+
159
+ # Removes from results those that do not have similar entries
160
+ def clean_empty(analysed)
161
+ analysed.select do |id, results|
162
+ !results.empty?
163
+ end
164
+ end
165
+
166
+ # Helper to do some treatment fo the results
167
+ # @param analysed [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
168
+ # @return [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
169
+ def with_analysed(analysed, keep_empty: false)
170
+ analysed.each_with_object({}) do |(id, results), reanalysed|
171
+ reanalysed[id] = yield(self[id], results)
172
+ end.yield_self do |reanalysed|
173
+ reanalysed = clean_empty(reanalysed) unless keep_empty
174
+ reanalysed
175
+ end.tap {|out| "with_analysed... returns #{out.count} records"}
176
+ end
177
+
178
+ # Launches a reanalyis on `analysed` based on `options`
179
+ # @param analysed [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
180
+ def rearrange(analysed, **options)
181
+ with_analysed(analysed) do |person, results|
182
+ results.relevant_results(**options)
183
+ end
184
+ end
185
+
186
+ # Reanalyses by using a block to treat the needle and item values
187
+ def reanalyse(analysed, msg: "Reanalysing", **options, &block)
188
+ options = { read: self.attribute }.merge(options)
189
+ total = analysed.count; i = 1
190
+ with_analysed(analysed) do |person, results|
191
+ print_progress(msg, total, i)
192
+ i += 1
193
+ recalculate_results(results, &block)
194
+ end
195
+ end
196
+
197
+ # Renalyses by ignoring matching words between the `needle` and those found in `results`
198
+ def ignore_matching_words(analysed, **options)
199
+ prompt = "Reanalysing by ignoring matching words"
200
+ reanalyse(analysed, msg: prompt, **options) do |needle_str, item_str, needle, item|
201
+ self.class.remove_matching_words(needle_str, item_str)
202
+ end
203
+ end
204
+
205
+ # Renalyses by ignoring matching words between the `needle` and those found in `results`
206
+ def ignore_matching_words_old(analysed, **options)
207
+ options = { read: self.attribute }.merge(options)
208
+ total = analysed.count; i = 1
209
+ with_analysed(analysed) do |person, results|
210
+ print_progress("Reanalysing by ignoring matching words", total, i)
211
+ i += 1
212
+ ignore_same_words_score(results, **options)
213
+ end
214
+ end
215
+
216
+ # @!endgroup
217
+
218
+ # @!group Reporting Helpers
219
+
220
+ # @return [String] well structured text
221
+ def report(analysed, format: :txt)
222
+ case
223
+ when format == :txt
224
+ analysed.each_with_object("") do |(id, results), out|
225
+ msg = results.results.map {|r| r.print}.join("\n ")
226
+ out << "#{self[id].identify}:\n " + msg + "\n"
227
+ end
228
+ end
229
+ end
230
+
231
+ # @note
232
+ # 1. Unless `:analysed` is provided, it launches an analysis cutting with Jaro Winker min 0.5
233
+ # 2. It then re-sorts and cuts based on `options`
234
+ # @return [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
235
+ def print_analysis(**options)
236
+ analysed = options[:analysed] || results_with_false_positives.analyse(**options)
237
+ analysed.each_with_object({}) do |(id, results), out|
238
+ puts report(analysed)
239
+ end
240
+ end
241
+ # @!endgroup
242
+
243
+ protected
244
+
245
+ def on_change
246
+ remove_instance_variable(@fuzzy_match)
247
+ super
248
+ end
249
+
250
+ private
251
+
252
+ def print_progress(msg, total, num)
253
+ return unless total > 10
254
+ puts "" unless num > 1
255
+ @print_msg_len ||= 0
256
+ percent = (100 * num.to_f / total).round(1)
257
+ msg = " #{msg}: #{percent}% (#{num} of #{total})\r"
258
+ @print_msg_len = msg.length unless @print_msg_len > msg.length
259
+ print msg
260
+ $stdout.flush
261
+ if percent > 99.9
262
+ sleep(0.2)
263
+ print "#{" " * @print_msg_len}\r"
264
+ $stdout.flush
265
+ end
266
+ end
267
+
268
+
269
+ end
270
+ end
271
+ end
272
+ end