eco-helpers 2.0.19 → 2.0.25

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +77 -1
  3. data/eco-helpers.gemspec +4 -1
  4. data/lib/eco/api/common/base_loader.rb +9 -5
  5. data/lib/eco/api/common/loaders/parser.rb +1 -0
  6. data/lib/eco/api/common/people/default_parsers.rb +1 -0
  7. data/lib/eco/api/common/people/default_parsers/xls_parser.rb +53 -0
  8. data/lib/eco/api/common/people/entries.rb +1 -0
  9. data/lib/eco/api/common/people/entry_factory.rb +64 -16
  10. data/lib/eco/api/common/people/person_parser.rb +1 -1
  11. data/lib/eco/api/common/version_patches/exception.rb +5 -2
  12. data/lib/eco/api/organization/people.rb +8 -2
  13. data/lib/eco/api/organization/people_similarity.rb +171 -11
  14. data/lib/eco/api/organization/tag_tree.rb +33 -0
  15. data/lib/eco/api/session.rb +15 -7
  16. data/lib/eco/api/session/batch.rb +1 -1
  17. data/lib/eco/api/session/batch/job.rb +34 -9
  18. data/lib/eco/api/usecases.rb +2 -2
  19. data/lib/eco/api/usecases/base_case.rb +2 -2
  20. data/lib/eco/api/usecases/base_io.rb +17 -4
  21. data/lib/eco/api/usecases/default_cases.rb +1 -0
  22. data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +189 -19
  23. data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
  24. data/lib/eco/api/usecases/default_cases/hris_case.rb +20 -0
  25. data/lib/eco/cli/config/default/input.rb +61 -8
  26. data/lib/eco/cli/config/default/options.rb +46 -2
  27. data/lib/eco/cli/config/default/people.rb +18 -24
  28. data/lib/eco/cli/config/default/usecases.rb +31 -2
  29. data/lib/eco/cli/config/default/workflow.rb +8 -6
  30. data/lib/eco/cli/scripting/args_helpers.rb +2 -2
  31. data/lib/eco/csv/table.rb +121 -21
  32. data/lib/eco/data/fuzzy_match.rb +52 -12
  33. data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
  34. data/lib/eco/data/fuzzy_match/ngrams_score.rb +13 -9
  35. data/lib/eco/data/fuzzy_match/pairing.rb +12 -18
  36. data/lib/eco/data/fuzzy_match/result.rb +15 -1
  37. data/lib/eco/data/fuzzy_match/results.rb +18 -0
  38. data/lib/eco/data/fuzzy_match/score.rb +12 -7
  39. data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
  40. data/lib/eco/language/models/collection.rb +5 -2
  41. data/lib/eco/version.rb +1 -1
  42. metadata +64 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 14260868c76936513a93d4d104eacebbd11e47ed05806d4102ee76196a300d2b
4
- data.tar.gz: 35784d03a18f89d2ce8bf5c4105e0eaa647dd10b4e1fee03897319d9ad838760
3
+ metadata.gz: 06a58306abadf9b27421583990eb14960f7f30368515481b16aa474de1bc1b08
4
+ data.tar.gz: 0eef93068fdb31bc6d1949f1022eac325403ac3dbb47c95b593a5b9623655773
5
5
  SHA512:
6
- metadata.gz: 514d71e93bfa4fb854d9062be03306e154a4dfd184256ab03da30e2bf4bb2a45fb305efd5e2206821e522dc7cc0bfbc69e75285e3c6292dca78f359bd166f52a
7
- data.tar.gz: c99a424905916cef61333c18bb31726e90fb9759a4a1b747b72ab123f4bc09ecc6962205a966b3ea46aa1f0a4cc3dd7f72a8c53829cc764a7993d64ad45495bf
6
+ metadata.gz: 80b0d2fc7bedb99deabae6d7d273cb4967eb0022db2e743078a82cace02d4f499fe8ad51ec02b7c5bcef549aac9fb03b0ea7ef5358fb602c65856654c7c20814
7
+ data.tar.gz: 553e1342f38c244ab57bb259b639d55ddc4a4d5d6f72bd54ed9290111636f4dffb29834f69a5b7d2707ee3d44951fa52efccf81589194f11dfa1a709309ddb77
data/CHANGELOG.md CHANGED
@@ -1,7 +1,83 @@
1
1
  # Change Log
2
2
  All notable changes to this project will be documented in this file.
3
3
 
4
- ## [2.0.19] - 2021-05-xx
4
+ ## [2.0.25] - 2021-06-xx
5
+
6
+ ### Added
7
+ - `Eco::API::UseCases::DefaultCases::HrisCase` validation error to require `-schema-id` command line when there are people in schemas other than the active one
8
+
9
+ ### Changed
10
+ - `Eco::API::Session::Batch::Job`
11
+ * for backwards compatibility `-include-only-excluded` should bring an options structure compatible with `-include-excluded`
12
+
13
+ ### Fixed
14
+ - `Eco::API::Session::Batch` fixed typo that would prevent `prompt_user` to work
15
+
16
+ ## [2.0.24] - 2021-06-22
17
+
18
+ ### Added
19
+ - `Eco::API::Session::Batch::Job` made **native** `-include-excluded`
20
+ * also added new option `-include-only-excluded` to be able to only target people HRIS excluded
21
+
22
+
23
+ ## [2.0.23] - 2021-06-22
24
+
25
+ ### Added
26
+ - `Eco::API::Session::Batch::Job` **new** option (`-save-requests`) to save requests even if in `dry-run` (`-simulate`)
27
+ ### Changed
28
+ - `Eco::API::Session::Batch::Job` new people won't create updates unless they have either details or account
29
+ * because that entry is not supposed to be created unless has account or details
30
+
31
+ ## [2.0.22] - 2021-06-18
32
+
33
+ ### Added
34
+ - exposed `logger` in `BaseLoader` and
35
+ - support for multiple input files
36
+ * `Eco::API::Common::People::EntryFactory#entries`:
37
+ - refactored to allow multiple input files parsing
38
+ - moreover to `idx`, hash entries will get their `source_file`
39
+ * Input callback at `lib/eco/cli/config/default/input` refactored format detection and enabled folder input
40
+ * `SCR.get_file` language extended to also mention folder (not just file)
41
+ - support for `.xls` and `.xlsx` files
42
+ * `Eco::API::Common::People::DefaultParsers::XLSParser` the Excel files **parser**
43
+ * `Eco::API::Common::People::PersonParser` added `:xls` as an accepted format
44
+ * `Eco::API::Session#fields_mapper` exposed mapper through a method to allow **headers detection**
45
+ - The external names of the fields are the column headers of the input file
46
+ * `Eco::API::UseCases::BaseIO` when arguments validation rails, now it raises with specific `MissingParameter` error
47
+
48
+ ### Changed
49
+ - dry out `BaseLoader` (only session is set as instance variable)
50
+ - removed `creek` **dependency** (it was not used anywhere in the gem)
51
+ * we just kept `roo` and `roo-xls`
52
+ - custom `Error` classes now all inherit from `StandardError` (rather than `Exception`)
53
+
54
+
55
+
56
+ ## [2.0.21] - 2021-06-04
57
+
58
+ ### Added
59
+ - `Eco::CSV::Table`, support to create the table out of an `Array<Hash>`
60
+ - This opens new methods to transform input Excel file to this data structure and unify input data structures.
61
+ - **new** use case `Eco::API::UseCases::DefaultCases::CleanUnknownTags` invokable via `clean-unknown-tags`
62
+
63
+ ### Changed
64
+ - `Eco::API::Common::People::EntryFactory` slight **refactor** to boost better support for multiple input formats
65
+
66
+
67
+ ## [2.0.20] - 2021-05-31
68
+
69
+ ### Added
70
+ - **dependencies** to `creek`, `roo` and `roo-xls`
71
+ - **dependencies** to `hashdiff`
72
+ - `Eco::API::Session#parse_attribute` => added missing parameter `deps:`
73
+ - new option `-stdout [file]` to redirect the output to a file
74
+ - `Eco::CSV::Table`, **added** more helper methods `#group_by`, `#transform_values`, `#slice`, `#slice_columns`, `#delete_column`
75
+ - `Eco::API::Organization::TagTree` **added** more helper methods: `top?`, `tag=`, `as_json`, `dup`, `diff`
76
+
77
+ ### Fixed
78
+ - `Exception` patch: when `SystemStackError` there is not `backtrace` :/
79
+
80
+ ## [2.0.19] - 2021-05-31
5
81
 
6
82
  ### Added
7
83
  - Better error message for people searches & **offer** to select among the candidates:
data/eco-helpers.gemspec CHANGED
@@ -14,7 +14,7 @@ Gem::Specification.new do |spec|
14
14
  spec.homepage = "https://www.ecoportal.com"
15
15
  spec.licenses = %w[MIT]
16
16
 
17
- spec.required_ruby_version = '>= 2.4.4'
17
+ spec.required_ruby_version = '>= 2.5.0'
18
18
 
19
19
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
20
20
  f.match(%r{^(test|spec|features)/})
@@ -36,7 +36,10 @@ Gem::Specification.new do |spec|
36
36
  spec.add_dependency 'aws-sdk-ses', '>= 1.36.0', '< 2'
37
37
  spec.add_dependency 'dotenv', '>= 2.7.6', '< 2.8'
38
38
  spec.add_dependency 'net-sftp', '>= 3.0.0', '< 3.1'
39
+ spec.add_dependency 'hashdiff', '>= 1.0.1', '< 1.1'
39
40
  spec.add_dependency 'fuzzy_match', '>= 2.1.0', '< 2.2'
40
41
  spec.add_dependency 'amatch', '>= 0.4.0', '< 0.5'
41
42
  spec.add_dependency 'jaro_winkler', '>= 1.5.4', '< 1.6'
43
+ spec.add_dependency 'roo', '>= 2.8.3', '< 2.9'
44
+ spec.add_dependency 'roo-xls', '>= 1.2.0', '< 1.3'
42
45
  end
@@ -51,15 +51,19 @@ module Eco
51
51
  private
52
52
 
53
53
  def session
54
- @session ||= ASSETS.session
54
+ ASSETS.session
55
55
  end
56
56
 
57
- def micro
58
- session.micro
57
+ def config
58
+ session.config
59
59
  end
60
60
 
61
- def config
62
- @config ||= ASSETS.config
61
+ def logger
62
+ session.logger
63
+ end
64
+
65
+ def micro
66
+ session.micro
63
67
  end
64
68
 
65
69
  end
@@ -19,6 +19,7 @@ module Eco
19
19
  @attribute = value
20
20
  end
21
21
 
22
+ # TODO: it migh rather merge?
22
23
  # Some parsers require dependencies to do their job.
23
24
  def dependencies(value = nil)
24
25
  @dependencies ||= {}
@@ -44,3 +44,4 @@ require_relative 'default_parsers/freemium_parser'
44
44
  require_relative 'default_parsers/policy_groups_parser'
45
45
  require_relative 'default_parsers/login_providers_parser'
46
46
  require_relative 'default_parsers/csv_parser'
47
+ require_relative 'default_parsers/xls_parser'
@@ -0,0 +1,53 @@
1
+ class Eco::API::Common::People::DefaultParsers::XLSParser < Eco::API::Common::Loaders::Parser
2
+ attribute :xls
3
+
4
+ attr_accessor :already_required
5
+ attr_reader :file
6
+
7
+ def parser(file, deps)
8
+ @file = file
9
+ rows.tap {|r| @file = nil}
10
+ end
11
+
12
+ def serializer(array_hash, deps)
13
+ raise "Not implemented. TODO: using axlsx or rubyXL gems. See: https://spin.atomicobject.com/2017/03/22/parsing-excel-files-ruby/"
14
+ end
15
+
16
+ private
17
+
18
+ def headers
19
+ raise "You should implement this method"
20
+ end
21
+
22
+ def sheet_name
23
+ 0
24
+ end
25
+
26
+ def workbook
27
+ require_reading_libs!
28
+ Roo::Spreadsheet.open(file)
29
+ end
30
+
31
+ def spreadheet(name_or_index = sheet_name)
32
+ workbook.sheet(name_or_index)
33
+ end
34
+
35
+ def rows(target = headers)
36
+ begin
37
+ spreadheet.parse(header_search: target)
38
+ rescue Roo::HeaderRowNotFoundError => e
39
+ missing = JSON.parse(e.message)
40
+ logger.warn("The input file is missing these headers: #{missing}")
41
+ present = target - missing
42
+ rows(present)
43
+ end
44
+ end
45
+
46
+ def require_reading_libs!
47
+ return if already_required
48
+ require 'roo'
49
+ require 'roo-xls'
50
+ already_required = true
51
+ end
52
+
53
+ end
@@ -99,6 +99,7 @@ module Eco
99
99
  newFrom to_a - discarded
100
100
  end
101
101
 
102
+ # TODO: it should rather use the the people-to-csv case somehow
102
103
  # Helper to dump the entries into a CSV
103
104
  # @param filename [String] the destination file
104
105
  def export(filename)
@@ -2,7 +2,7 @@ module Eco
2
2
  module API
3
3
  module Common
4
4
  module People
5
- # TODO: EntryFactory should suppport multiple schemas itself
5
+ # TODO: EntryFactory should suppport multiple schemas itself (rather that being done on `Session`)
6
6
  # => currently, it's through session.entry_factory(schema: id), but this is wrong
7
7
  # => This way, Entries and PersonEntry will be able to refer to attr_map and person_parser linked to schema_id
8
8
  # => "schema_id" should be an optional column in the input file, or parsable via a custom parser to scope the schema
@@ -88,26 +88,63 @@ module Eco
88
88
  fatal("Format should be a Symbol. Given '#{format}'") if format && !format.is_a?(Symbol)
89
89
  fatal("There is no parser/serializer for format ':#{format.to_s}'") unless no_format || @person_parser.defined?(format)
90
90
 
91
- if file
92
- arr_hash = []
93
- if Eco::API::Common::Session::FileManager.file_exists?(file)
94
- encoding ||= Eco::API::Common::Session::FileManager.encoding(file)
95
- encoding = (encoding != "utf-8")? "#{encoding}|utf-8": encoding
96
- file_content = File.read(file, encoding: encoding)
97
- arr_hash = person_parser.parse(format, file_content).map.each_with_index do |entry_hash, i|
98
- j = (format == :csv)? i + 2 : i + 1
99
- entry_hash.tap {|hash| hash["idx"] = j}
100
- end
91
+ kargs = {}
92
+ kargs.merge!(content: data) unless no_data
93
+ kargs.merge!(file: file) unless no_file
94
+ kargs.merge!(format: format) unless no_format
95
+ kargs.merge!(encoding: encoding) if encoding
96
+
97
+ Entries.new(to_array_of_hashes(**kargs), klass: PersonEntry, factory: self)
98
+ end
99
+
100
+ def to_array_of_hashes(**kargs)
101
+ data = []
102
+ content, file, encoding, format = kargs.values_at(:content, :file, :encoding, :format)
103
+
104
+ # Support for multiple file
105
+ if file.is_a?(Array)
106
+ return file.each_with_object([]) do |f, out|
107
+ logger.info("Parsing file '#{f}'")
108
+ curr = to_array_of_hashes(**kargs.merge(file: f))
109
+ out.concat(curr)
110
+ end
111
+ end
112
+ # Get content only when it's not :xls
113
+ # note: even if content was provided, file takes precedence
114
+ content = get_file_content(file, format, encoding) if (format != :xls) && file
115
+
116
+ case content
117
+ when Hash
118
+ logger.error("Input data as 'Hash' not supported. Expecting 'Enumerable' or 'String'")
119
+ exit(1)
120
+ when String
121
+ to_array_of_hashes(content: person_parser.parse(format, content))
122
+ when Enumerable
123
+ sample = content.to_a.first
124
+ case sample
125
+ when Hash, Array, ::CSV::Row
126
+ Eco::CSV::Table.new(content).to_array_of_hashes
101
127
  else
102
- logger.warn("File does not exist: #{file}")
128
+ logger.error("Input content 'Array' of '#{sample.class}' is not supported.")
103
129
  end
104
-
105
- entries(data: arr_hash)
106
130
  else
107
- Entries.new(data, klass: PersonEntry, factory: self)
131
+ if file && format == :xls
132
+ person_parser.parse(format, file)
133
+ else
134
+ logger.error("Could not obtain any data out of these: #{kargs}. Given content: '#{content.class}'")
135
+ exit(1)
136
+ end
137
+ end.tap do |out_array|
138
+ start_from_two = (format == :csv) || format == :xls
139
+ out_array.each_with_index do |entry_hash, i|
140
+ entry_hash["idx"] = start_from_two ? i + 2 : i + 1
141
+ entry_hash["source_file"] = file
142
+ end
108
143
  end
144
+
109
145
  end
110
146
 
147
+
111
148
  # Helper that generates a file out of `data:`.
112
149
  # @raise Exception
113
150
  # - if you try to provide `data:` in the wrong format.
@@ -127,7 +164,7 @@ module Eco
127
164
 
128
165
  run = true
129
166
  if Eco::API::Common::Session::FileManager.file_exists?(file)
130
- prompt_user("The file '#{file}' already exists. Do you want to overwrite it? (Y/n):", default: "Y") do |response|
167
+ prompt_user("Do you want to overwrite it? (Y/n):", explanation: "The file '#{file}' already exists.", default: "Y") do |response|
131
168
  run = (response == "") || reponse.upcase.start_with?("Y")
132
169
  end
133
170
  end
@@ -150,6 +187,17 @@ module Eco
150
187
 
151
188
  private
152
189
 
190
+ def get_file_content(file, format, encoding)
191
+ unless Eco::API::Common::Session::FileManager.file_exists?(file)
192
+ logger.error("File does not exist: #{file}")
193
+ exit(1)
194
+ end
195
+ ext = File.extname(file)
196
+ encoding ||= Eco::API::Common::Session::FileManager.encoding(file)
197
+ encoding = (encoding != "utf-8")? "#{encoding}|utf-8": encoding
198
+ content = File.read(file, encoding: encoding)
199
+ end
200
+
153
201
  def fatal(msg)
154
202
  logger.fatal(msg)
155
203
  raise msg
@@ -16,7 +16,7 @@ module Eco
16
16
  CORE_ATTRS = ["id", "external_id", "email", "name", "supervisor_id", "filter_tags", "freemium"]
17
17
  ACCOUNT_ATTRS = ["policy_group_ids", "default_tag", "send_invites", "landing_page_id", "login_provider_ids"]
18
18
  TYPE = [:select, :text, :date, :number, :phone_number, :boolean, :multiple]
19
- FORMAT = [:csv, :xml, :json]
19
+ FORMAT = [:csv, :xml, :json, :xls]
20
20
 
21
21
  attr_reader :schema
22
22
  attr_reader :details_attrs, :all_model_attrs
@@ -2,8 +2,11 @@ class ::Exception
2
2
  def patch_full_message
3
3
  begin
4
4
  msg = []
5
- msg << "\n#{backtrace.first} \n#{message} (#{self.class.to_s})"
6
- backtrace[1..-1].each_with_index {|bt, i| msg << "#{" "*8}#{i+1}: from #{bt}"}
5
+ tracing = backtrace ? backtrace : []
6
+ tracing = (self.class == SystemStackError) ? tracing[1..30] : tracing[1..-1]
7
+ tracing ||= []
8
+ msg << "\n#{tracing.first} \n#{message} (#{self.class.to_s})"
9
+ tracing.each_with_index {|bt, i| msg << "#{" "*8}#{i+1}: from #{bt}"}
7
10
  msg.join("\n")
8
11
  rescue Exception => e
9
12
  puts "Something is wrong with 'patch_full_message': #{e}"
@@ -210,14 +210,20 @@ module Eco
210
210
  to_h(:supervisor_id)
211
211
  end
212
212
 
213
+ def group_by_schema
214
+ to_h do |person|
215
+ person.details && person.details.schema_id
216
+ end
217
+ end
218
+
213
219
  def to_h(attr = "id")
214
220
  super(attr || "id")
215
221
  end
216
222
  # @!endgroup
217
223
 
218
224
  # @!group Helper methods
219
- def analytics
220
- Eco::API::Organization::PeopleAnalytics.new(self.to_a)
225
+ def similarity
226
+ Eco::API::Organization::PeopleSimilarity.new(self.to_a)
221
227
  end
222
228
  # @!endgroup
223
229
 
@@ -13,7 +13,19 @@ module Eco
13
13
  # @!group Config
14
14
  # @return [String, Proc, nil] the target attribute to be read.
15
15
  def attribute=(attr)
16
- @attribute ||= "name"
16
+ @attribute = attr
17
+ end
18
+
19
+ def attribute
20
+ @attribute ||= :name
21
+ end
22
+
23
+ # Returns the target value to analyse
24
+ # @param person [Ecoportal::API::V1::Person]
25
+ def item_value(person)
26
+ return attr.call(item) if attribute.is_a?(Proc)
27
+ attr = attribute.to_sym
28
+ return item.send(attr) if item.respond_to?(attr)
17
29
  end
18
30
 
19
31
  # Define the order or relevant of per user matches
@@ -37,6 +49,16 @@ module Eco
37
49
  @threshold ||= 0.15
38
50
  end
39
51
 
52
+ # Generates a new object with same config but different base `data`.
53
+ # @return [Eco::API::Organization::PeopleSimilarity]
54
+ def newFrom(data)
55
+ super(data).tap do |simil|
56
+ simil.threshold = threshold
57
+ simil.order = order
58
+ simil.attribute = attribute
59
+ end
60
+ end
61
+
40
62
  # @!endgroup
41
63
 
42
64
  # @!group Searchers
@@ -50,36 +72,158 @@ module Eco
50
72
  end
51
73
  end
52
74
 
75
+ # It returns all people with no name
76
+ # @return [Eco::API::Organization::PeopleSimilarity]
77
+ def unnamed
78
+ select do |person|
79
+ person.name.to_s.strip.length < 2
80
+ end.yield_self do |results|
81
+ newFrom(results)
82
+ end
83
+ end
84
+
85
+ # It returns all people with no name
86
+ # @return [Eco::API::Organization::PeopleSimilarity]
87
+ def named
88
+ reject do |person|
89
+ person.name.to_s.strip.length < 2
90
+ end.yield_self do |results|
91
+ newFrom(results)
92
+ end
93
+ end
94
+
95
+ # It returns all the entries with `attribute` empty
96
+ # @return [Eco::API::Organization::PeopleSimilarity]
97
+ def blank_attribute
98
+ select do |person|
99
+ item_value(person).to_s.strip.length < 2
100
+ end.yield_self do |results|
101
+ newFrom(results)
102
+ end
103
+ end
104
+
105
+ # It returns all the entries with `attribute` **n0t** empty
106
+ # @return [Eco::API::Organization::PeopleSimilarity]
107
+ def attribute_present
108
+ reject do |person|
109
+ item_value(person).to_s.strip.length < 2
110
+ end.yield_self do |results|
111
+ newFrom(results)
112
+ end
113
+ end
114
+
53
115
  # @!endgroup
54
116
 
55
- # @!group Analysers
117
+ # @!group Analisys starters
56
118
 
57
119
  # Analyses People bases on `options`
120
+ # @param needle_read [Proc, Symbol] when the value to read from `needle` object is different to the `:read` (`attribute`).
121
+ # This allows to for example, facet `needle.name` (needle_read) against `haystack_item.details[alt_id]` (read).
122
+ # @param keep_empty [Boolean] to indicate if it should get rid of people with no results (based on threshold)
58
123
  # @return [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
59
- def analyse(**options)
124
+ def analyse(needle_read: nil, keep_empty: false, **options)
60
125
  options = { read: self.attribute }.merge(options)
126
+ total = count; i = 1
61
127
  each_with_object({}) do |person, results|
62
- results[person.id] = find_all_with_score(person, **options)
128
+ needle_str = needle_read ? item_string(person, needle_read) : nil
129
+ results[person.id] = find_all_with_score(person, needle_str: needle_str, **options)
130
+ print_progress("Analysed", total, i)
131
+ i += 1
132
+ end.yield_self do |analysed|
133
+ analysed = clean_empty(analysed) unless keep_empty
134
+ #puts "... #{analysed.count} results after cleaning empty"
135
+ analysed
136
+ end
137
+ end
138
+
139
+ # @!endgroup
140
+
141
+ # @!group Results Treatment
142
+
143
+ # Gets a new instance object of this class, with only people in results
144
+ # @param analysed [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
145
+ # @return [Eco::API::Organization::PeopleSimilarity]
146
+ def newSimilarity(analysed)
147
+ newFrom(people_in_results(analysed))
148
+ end
149
+
150
+ def people_in_results(analysed)
151
+ analysed.each_with_object([]) do |(id, results), people|
152
+ related = results.each_with_object([self[id]]) do |result, related|
153
+ related << result.match
154
+ end
155
+ related.each {|person| people << person unless people.include?(person)}
63
156
  end
64
157
  end
65
158
 
159
+ # Removes from results those that do not have similar entries
160
+ def clean_empty(analysed)
161
+ analysed.select do |id, results|
162
+ !results.empty?
163
+ end
164
+ end
165
+
166
+ # Helper to do some treatment fo the results
167
+ # @param analysed [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
168
+ # @return [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
169
+ def with_analysed(analysed, keep_empty: false)
170
+ analysed.each_with_object({}) do |(id, results), reanalysed|
171
+ reanalysed[id] = yield(self[id], results)
172
+ end.yield_self do |reanalysed|
173
+ reanalysed = clean_empty(reanalysed) unless keep_empty
174
+ reanalysed
175
+ end.tap {|out| "with_analysed... returns #{out.count} records"}
176
+ end
177
+
66
178
  # Launches a reanalyis on `analysed` based on `options`
67
179
  # @param analysed [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
68
- def re_analyse(analysed, **options)
69
- analysed.each_with_object({}) do |(id, results), out|
70
- out[id] = results.relevant_results(**options)
180
+ def rearrange(analysed, **options)
181
+ with_analysed(analysed) do |person, results|
182
+ results.relevant_results(**options)
71
183
  end
72
184
  end
73
185
 
74
- # @!group Helpers
186
+ # Reanalyses by using a block to treat the needle and item values
187
+ def reanalyse(analysed, msg: "Reanalysing", **options, &block)
188
+ options = { read: self.attribute }.merge(options)
189
+ total = analysed.count; i = 1
190
+ with_analysed(analysed) do |person, results|
191
+ print_progress(msg, total, i)
192
+ i += 1
193
+ recalculate_results(results, &block)
194
+ end
195
+ end
196
+
197
+ # Renalyses by ignoring matching words between the `needle` and those found in `results`
198
+ def ignore_matching_words(analysed, **options)
199
+ prompt = "Reanalysing by ignoring matching words"
200
+ reanalyse(analysed, msg: prompt, **options) do |needle_str, item_str, needle, item|
201
+ self.class.remove_matching_words(needle_str, item_str)
202
+ end
203
+ end
204
+
205
+ # Renalyses by ignoring matching words between the `needle` and those found in `results`
206
+ def ignore_matching_words_old(analysed, **options)
207
+ options = { read: self.attribute }.merge(options)
208
+ total = analysed.count; i = 1
209
+ with_analysed(analysed) do |person, results|
210
+ print_progress("Reanalysing by ignoring matching words", total, i)
211
+ i += 1
212
+ ignore_same_words_score(results, **options)
213
+ end
214
+ end
215
+
216
+ # @!endgroup
217
+
218
+ # @!group Reporting Helpers
75
219
 
76
220
  # @return [String] well structured text
77
- def analysis(analysed, format: :txt)
221
+ def report(analysed, format: :txt)
78
222
  case
79
223
  when format == :txt
80
224
  analysed.each_with_object("") do |(id, results), out|
81
225
  msg = results.results.map {|r| r.print}.join("\n ")
82
- "'#{self[id].identify}':\n " + msg
226
+ out << "#{self[id].identify}:\n " + msg + "\n"
83
227
  end
84
228
  end
85
229
  end
@@ -91,7 +235,7 @@ module Eco
91
235
  def print_analysis(**options)
92
236
  analysed = options[:analysed] || results_with_false_positives.analyse(**options)
93
237
  analysed.each_with_object({}) do |(id, results), out|
94
- puts analysis(analysed)
238
+ puts report(analysed)
95
239
  end
96
240
  end
97
241
  # @!endgroup
@@ -105,6 +249,22 @@ module Eco
105
249
 
106
250
  private
107
251
 
252
+ def print_progress(msg, total, num)
253
+ return unless total > 10
254
+ puts "" unless num > 1
255
+ @print_msg_len ||= 0
256
+ percent = (100 * num.to_f / total).round(1)
257
+ msg = " #{msg}: #{percent}% (#{num} of #{total})\r"
258
+ @print_msg_len = msg.length unless @print_msg_len > msg.length
259
+ print msg
260
+ $stdout.flush
261
+ if percent > 99.9
262
+ sleep(0.2)
263
+ print "#{" " * @print_msg_len}\r"
264
+ $stdout.flush
265
+ end
266
+ end
267
+
108
268
 
109
269
  end
110
270
  end