eco-helpers 2.0.19 → 2.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +77 -1
  3. data/eco-helpers.gemspec +4 -1
  4. data/lib/eco/api/common/base_loader.rb +9 -5
  5. data/lib/eco/api/common/loaders/parser.rb +1 -0
  6. data/lib/eco/api/common/people/default_parsers.rb +1 -0
  7. data/lib/eco/api/common/people/default_parsers/xls_parser.rb +53 -0
  8. data/lib/eco/api/common/people/entries.rb +1 -0
  9. data/lib/eco/api/common/people/entry_factory.rb +64 -16
  10. data/lib/eco/api/common/people/person_parser.rb +1 -1
  11. data/lib/eco/api/common/version_patches/exception.rb +5 -2
  12. data/lib/eco/api/organization/people.rb +8 -2
  13. data/lib/eco/api/organization/people_similarity.rb +171 -11
  14. data/lib/eco/api/organization/tag_tree.rb +33 -0
  15. data/lib/eco/api/session.rb +15 -7
  16. data/lib/eco/api/session/batch.rb +1 -1
  17. data/lib/eco/api/session/batch/job.rb +34 -9
  18. data/lib/eco/api/usecases.rb +2 -2
  19. data/lib/eco/api/usecases/base_case.rb +2 -2
  20. data/lib/eco/api/usecases/base_io.rb +17 -4
  21. data/lib/eco/api/usecases/default_cases.rb +1 -0
  22. data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +189 -19
  23. data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
  24. data/lib/eco/api/usecases/default_cases/hris_case.rb +20 -0
  25. data/lib/eco/cli/config/default/input.rb +61 -8
  26. data/lib/eco/cli/config/default/options.rb +46 -2
  27. data/lib/eco/cli/config/default/people.rb +18 -24
  28. data/lib/eco/cli/config/default/usecases.rb +31 -2
  29. data/lib/eco/cli/config/default/workflow.rb +8 -6
  30. data/lib/eco/cli/scripting/args_helpers.rb +2 -2
  31. data/lib/eco/csv/table.rb +121 -21
  32. data/lib/eco/data/fuzzy_match.rb +52 -12
  33. data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
  34. data/lib/eco/data/fuzzy_match/ngrams_score.rb +13 -9
  35. data/lib/eco/data/fuzzy_match/pairing.rb +12 -18
  36. data/lib/eco/data/fuzzy_match/result.rb +15 -1
  37. data/lib/eco/data/fuzzy_match/results.rb +18 -0
  38. data/lib/eco/data/fuzzy_match/score.rb +12 -7
  39. data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
  40. data/lib/eco/language/models/collection.rb +5 -2
  41. data/lib/eco/version.rb +1 -1
  42. metadata +64 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 14260868c76936513a93d4d104eacebbd11e47ed05806d4102ee76196a300d2b
4
- data.tar.gz: 35784d03a18f89d2ce8bf5c4105e0eaa647dd10b4e1fee03897319d9ad838760
3
+ metadata.gz: 06a58306abadf9b27421583990eb14960f7f30368515481b16aa474de1bc1b08
4
+ data.tar.gz: 0eef93068fdb31bc6d1949f1022eac325403ac3dbb47c95b593a5b9623655773
5
5
  SHA512:
6
- metadata.gz: 514d71e93bfa4fb854d9062be03306e154a4dfd184256ab03da30e2bf4bb2a45fb305efd5e2206821e522dc7cc0bfbc69e75285e3c6292dca78f359bd166f52a
7
- data.tar.gz: c99a424905916cef61333c18bb31726e90fb9759a4a1b747b72ab123f4bc09ecc6962205a966b3ea46aa1f0a4cc3dd7f72a8c53829cc764a7993d64ad45495bf
6
+ metadata.gz: 80b0d2fc7bedb99deabae6d7d273cb4967eb0022db2e743078a82cace02d4f499fe8ad51ec02b7c5bcef549aac9fb03b0ea7ef5358fb602c65856654c7c20814
7
+ data.tar.gz: 553e1342f38c244ab57bb259b639d55ddc4a4d5d6f72bd54ed9290111636f4dffb29834f69a5b7d2707ee3d44951fa52efccf81589194f11dfa1a709309ddb77
data/CHANGELOG.md CHANGED
@@ -1,7 +1,83 @@
1
1
  # Change Log
2
2
  All notable changes to this project will be documented in this file.
3
3
 
4
- ## [2.0.19] - 2021-05-xx
4
+ ## [2.0.25] - 2021-06-xx
5
+
6
+ ### Added
7
+ - `Eco::API::UseCases::DefaultCases::HrisCase` validation error to require `-schema-id` command line when there are people in schemas other than the active one
8
+
9
+ ### Changed
10
+ - `Eco::API::Session::Batch::Job`
11
+ * for backwards compatibility `-include-only-excluded` should bring an options structure compatible with `-include-excluded`
12
+
13
+ ### Fixed
14
+ - `Eco::API::Session::Batch` fixed typo that would prevent `prompt_user` to work
15
+
16
+ ## [2.0.24] - 2021-06-22
17
+
18
+ ### Added
19
+ - `Eco::API::Session::Batch::Job` made **native** `-include-excluded`
20
+ * also added new option `-include-only-excluded` to be able to only target people HRIS excluded
21
+
22
+
23
+ ## [2.0.23] - 2021-06-22
24
+
25
+ ### Added
26
+ - `Eco::API::Session::Batch::Job` **new** option (`-save-requests`) to save requests even if in `dry-run` (`-simulate`)
27
+ ### Changed
28
+ - `Eco::API::Session::Batch::Job` new people won't create updates unless they have either details or account
29
+ * because that entry is not supposed to be created unless has account or details
30
+
31
+ ## [2.0.22] - 2021-06-18
32
+
33
+ ### Added
34
+ - exposed `logger` in `BaseLoader` and
35
+ - support for multiple input files
36
+ * `Eco::API::Common::People::EntryFactory#entries`:
37
+ - refactored to allow multiple input files parsing
38
+ - moreover to `idx`, hash entries will get their `source_file`
39
+ * Input callback at `lib/eco/cli/config/default/input` refactored format detection and enabled folder input
40
+ * `SCR.get_file` language extended to also mention folder (not just file)
41
+ - support for `.xls` and `.xlsx` files
42
+ * `Eco::API::Common::People::DefaultParsers::XLSParser` the Excel files **parser**
43
+ * `Eco::API::Common::People::PersonParser` added `:xls` as an accepted format
44
+ * `Eco::API::Session#fields_mapper` exposed mapper through a method to allow **headers detection**
45
+ - The external names of the fields are the column headers of the input file
46
+ * `Eco::API::UseCases::BaseIO` when arguments validation rails, now it raises with specific `MissingParameter` error
47
+
48
+ ### Changed
49
+ - dry out `BaseLoader` (only session is set as instance variable)
50
+ - removed `creek` **dependency** (it was not used anywhere in the gem)
51
+ * we just kept `roo` and `roo-xls`
52
+ - custom `Error` classes now all inherit from `StandardError` (rather than `Exception`)
53
+
54
+
55
+
56
+ ## [2.0.21] - 2021-06-04
57
+
58
+ ### Added
59
+ - `Eco::CSV::Table`, support to create the table out of an `Array<Hash>`
60
+ - This opens new methods to transform input Excel file to this data structure and unify input data structures.
61
+ - **new** use case `Eco::API::UseCases::DefaultCases::CleanUnknownTags` invokable via `clean-unknown-tags`
62
+
63
+ ### Changed
64
+ - `Eco::API::Common::People::EntryFactory` slight **refactor** to boost better support for multiple input formats
65
+
66
+
67
+ ## [2.0.20] - 2021-05-31
68
+
69
+ ### Added
70
+ - **dependencies** to `creek`, `roo` and `roo-xls`
71
+ - **dependencies** to `hashdiff`
72
+ - `Eco::API::Session#parse_attribute` => added missing parameter `deps:`
73
+ - new option `-stdout [file]` to redirect the output to a file
74
+ - `Eco::CSV::Table`, **added** more helper methods `#group_by`, `#transform_values`, `#slice`, `#slice_columns`, `#delete_column`
75
+ - `Eco::API::Organization::TagTree` **added** more helper methods: `top?`, `tag=`, `as_json`, `dup`, `diff`
76
+
77
+ ### Fixed
78
+ - `Exception` patch: when `SystemStackError` there is not `backtrace` :/
79
+
80
+ ## [2.0.19] - 2021-05-31
5
81
 
6
82
  ### Added
7
83
  - Better error message for people searches & **offer** to select among the candidates:
data/eco-helpers.gemspec CHANGED
@@ -14,7 +14,7 @@ Gem::Specification.new do |spec|
14
14
  spec.homepage = "https://www.ecoportal.com"
15
15
  spec.licenses = %w[MIT]
16
16
 
17
- spec.required_ruby_version = '>= 2.4.4'
17
+ spec.required_ruby_version = '>= 2.5.0'
18
18
 
19
19
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
20
20
  f.match(%r{^(test|spec|features)/})
@@ -36,7 +36,10 @@ Gem::Specification.new do |spec|
36
36
  spec.add_dependency 'aws-sdk-ses', '>= 1.36.0', '< 2'
37
37
  spec.add_dependency 'dotenv', '>= 2.7.6', '< 2.8'
38
38
  spec.add_dependency 'net-sftp', '>= 3.0.0', '< 3.1'
39
+ spec.add_dependency 'hashdiff', '>= 1.0.1', '< 1.1'
39
40
  spec.add_dependency 'fuzzy_match', '>= 2.1.0', '< 2.2'
40
41
  spec.add_dependency 'amatch', '>= 0.4.0', '< 0.5'
41
42
  spec.add_dependency 'jaro_winkler', '>= 1.5.4', '< 1.6'
43
+ spec.add_dependency 'roo', '>= 2.8.3', '< 2.9'
44
+ spec.add_dependency 'roo-xls', '>= 1.2.0', '< 1.3'
42
45
  end
@@ -51,15 +51,19 @@ module Eco
51
51
  private
52
52
 
53
53
  def session
54
- @session ||= ASSETS.session
54
+ ASSETS.session
55
55
  end
56
56
 
57
- def micro
58
- session.micro
57
+ def config
58
+ session.config
59
59
  end
60
60
 
61
- def config
62
- @config ||= ASSETS.config
61
+ def logger
62
+ session.logger
63
+ end
64
+
65
+ def micro
66
+ session.micro
63
67
  end
64
68
 
65
69
  end
@@ -19,6 +19,7 @@ module Eco
19
19
  @attribute = value
20
20
  end
21
21
 
22
+ # TODO: it migh rather merge?
22
23
  # Some parsers require dependencies to do their job.
23
24
  def dependencies(value = nil)
24
25
  @dependencies ||= {}
@@ -44,3 +44,4 @@ require_relative 'default_parsers/freemium_parser'
44
44
  require_relative 'default_parsers/policy_groups_parser'
45
45
  require_relative 'default_parsers/login_providers_parser'
46
46
  require_relative 'default_parsers/csv_parser'
47
+ require_relative 'default_parsers/xls_parser'
@@ -0,0 +1,53 @@
1
+ class Eco::API::Common::People::DefaultParsers::XLSParser < Eco::API::Common::Loaders::Parser
2
+ attribute :xls
3
+
4
+ attr_accessor :already_required
5
+ attr_reader :file
6
+
7
+ def parser(file, deps)
8
+ @file = file
9
+ rows.tap {|r| @file = nil}
10
+ end
11
+
12
+ def serializer(array_hash, deps)
13
+ raise "Not implemented. TODO: using axlsx or rubyXL gems. See: https://spin.atomicobject.com/2017/03/22/parsing-excel-files-ruby/"
14
+ end
15
+
16
+ private
17
+
18
+ def headers
19
+ raise "You should implement this method"
20
+ end
21
+
22
+ def sheet_name
23
+ 0
24
+ end
25
+
26
+ def workbook
27
+ require_reading_libs!
28
+ Roo::Spreadsheet.open(file)
29
+ end
30
+
31
+ def spreadheet(name_or_index = sheet_name)
32
+ workbook.sheet(name_or_index)
33
+ end
34
+
35
+ def rows(target = headers)
36
+ begin
37
+ spreadheet.parse(header_search: target)
38
+ rescue Roo::HeaderRowNotFoundError => e
39
+ missing = JSON.parse(e.message)
40
+ logger.warn("The input file is missing these headers: #{missing}")
41
+ present = target - missing
42
+ rows(present)
43
+ end
44
+ end
45
+
46
+ def require_reading_libs!
47
+ return if already_required
48
+ require 'roo'
49
+ require 'roo-xls'
50
+ already_required = true
51
+ end
52
+
53
+ end
@@ -99,6 +99,7 @@ module Eco
99
99
  newFrom to_a - discarded
100
100
  end
101
101
 
102
+ # TODO: it should rather use the the people-to-csv case somehow
102
103
  # Helper to dump the entries into a CSV
103
104
  # @param filename [String] the destination file
104
105
  def export(filename)
@@ -2,7 +2,7 @@ module Eco
2
2
  module API
3
3
  module Common
4
4
  module People
5
- # TODO: EntryFactory should suppport multiple schemas itself
5
+ # TODO: EntryFactory should suppport multiple schemas itself (rather that being done on `Session`)
6
6
  # => currently, it's through session.entry_factory(schema: id), but this is wrong
7
7
  # => This way, Entries and PersonEntry will be able to refer to attr_map and person_parser linked to schema_id
8
8
  # => "schema_id" should be an optional column in the input file, or parsable via a custom parser to scope the schema
@@ -88,26 +88,63 @@ module Eco
88
88
  fatal("Format should be a Symbol. Given '#{format}'") if format && !format.is_a?(Symbol)
89
89
  fatal("There is no parser/serializer for format ':#{format.to_s}'") unless no_format || @person_parser.defined?(format)
90
90
 
91
- if file
92
- arr_hash = []
93
- if Eco::API::Common::Session::FileManager.file_exists?(file)
94
- encoding ||= Eco::API::Common::Session::FileManager.encoding(file)
95
- encoding = (encoding != "utf-8")? "#{encoding}|utf-8": encoding
96
- file_content = File.read(file, encoding: encoding)
97
- arr_hash = person_parser.parse(format, file_content).map.each_with_index do |entry_hash, i|
98
- j = (format == :csv)? i + 2 : i + 1
99
- entry_hash.tap {|hash| hash["idx"] = j}
100
- end
91
+ kargs = {}
92
+ kargs.merge!(content: data) unless no_data
93
+ kargs.merge!(file: file) unless no_file
94
+ kargs.merge!(format: format) unless no_format
95
+ kargs.merge!(encoding: encoding) if encoding
96
+
97
+ Entries.new(to_array_of_hashes(**kargs), klass: PersonEntry, factory: self)
98
+ end
99
+
100
+ def to_array_of_hashes(**kargs)
101
+ data = []
102
+ content, file, encoding, format = kargs.values_at(:content, :file, :encoding, :format)
103
+
104
+ # Support for multiple file
105
+ if file.is_a?(Array)
106
+ return file.each_with_object([]) do |f, out|
107
+ logger.info("Parsing file '#{f}'")
108
+ curr = to_array_of_hashes(**kargs.merge(file: f))
109
+ out.concat(curr)
110
+ end
111
+ end
112
+ # Get content only when it's not :xls
113
+ # note: even if content was provided, file takes precedence
114
+ content = get_file_content(file, format, encoding) if (format != :xls) && file
115
+
116
+ case content
117
+ when Hash
118
+ logger.error("Input data as 'Hash' not supported. Expecting 'Enumerable' or 'String'")
119
+ exit(1)
120
+ when String
121
+ to_array_of_hashes(content: person_parser.parse(format, content))
122
+ when Enumerable
123
+ sample = content.to_a.first
124
+ case sample
125
+ when Hash, Array, ::CSV::Row
126
+ Eco::CSV::Table.new(content).to_array_of_hashes
101
127
  else
102
- logger.warn("File does not exist: #{file}")
128
+ logger.error("Input content 'Array' of '#{sample.class}' is not supported.")
103
129
  end
104
-
105
- entries(data: arr_hash)
106
130
  else
107
- Entries.new(data, klass: PersonEntry, factory: self)
131
+ if file && format == :xls
132
+ person_parser.parse(format, file)
133
+ else
134
+ logger.error("Could not obtain any data out of these: #{kargs}. Given content: '#{content.class}'")
135
+ exit(1)
136
+ end
137
+ end.tap do |out_array|
138
+ start_from_two = (format == :csv) || format == :xls
139
+ out_array.each_with_index do |entry_hash, i|
140
+ entry_hash["idx"] = start_from_two ? i + 2 : i + 1
141
+ entry_hash["source_file"] = file
142
+ end
108
143
  end
144
+
109
145
  end
110
146
 
147
+
111
148
  # Helper that generates a file out of `data:`.
112
149
  # @raise Exception
113
150
  # - if you try to provide `data:` in the wrong format.
@@ -127,7 +164,7 @@ module Eco
127
164
 
128
165
  run = true
129
166
  if Eco::API::Common::Session::FileManager.file_exists?(file)
130
- prompt_user("The file '#{file}' already exists. Do you want to overwrite it? (Y/n):", default: "Y") do |response|
167
+ prompt_user("Do you want to overwrite it? (Y/n):", explanation: "The file '#{file}' already exists.", default: "Y") do |response|
131
168
  run = (response == "") || reponse.upcase.start_with?("Y")
132
169
  end
133
170
  end
@@ -150,6 +187,17 @@ module Eco
150
187
 
151
188
  private
152
189
 
190
+ def get_file_content(file, format, encoding)
191
+ unless Eco::API::Common::Session::FileManager.file_exists?(file)
192
+ logger.error("File does not exist: #{file}")
193
+ exit(1)
194
+ end
195
+ ext = File.extname(file)
196
+ encoding ||= Eco::API::Common::Session::FileManager.encoding(file)
197
+ encoding = (encoding != "utf-8")? "#{encoding}|utf-8": encoding
198
+ content = File.read(file, encoding: encoding)
199
+ end
200
+
153
201
  def fatal(msg)
154
202
  logger.fatal(msg)
155
203
  raise msg
@@ -16,7 +16,7 @@ module Eco
16
16
  CORE_ATTRS = ["id", "external_id", "email", "name", "supervisor_id", "filter_tags", "freemium"]
17
17
  ACCOUNT_ATTRS = ["policy_group_ids", "default_tag", "send_invites", "landing_page_id", "login_provider_ids"]
18
18
  TYPE = [:select, :text, :date, :number, :phone_number, :boolean, :multiple]
19
- FORMAT = [:csv, :xml, :json]
19
+ FORMAT = [:csv, :xml, :json, :xls]
20
20
 
21
21
  attr_reader :schema
22
22
  attr_reader :details_attrs, :all_model_attrs
@@ -2,8 +2,11 @@ class ::Exception
2
2
  def patch_full_message
3
3
  begin
4
4
  msg = []
5
- msg << "\n#{backtrace.first} \n#{message} (#{self.class.to_s})"
6
- backtrace[1..-1].each_with_index {|bt, i| msg << "#{" "*8}#{i+1}: from #{bt}"}
5
+ tracing = backtrace ? backtrace : []
6
+ tracing = (self.class == SystemStackError) ? tracing[1..30] : tracing[1..-1]
7
+ tracing ||= []
8
+ msg << "\n#{tracing.first} \n#{message} (#{self.class.to_s})"
9
+ tracing.each_with_index {|bt, i| msg << "#{" "*8}#{i+1}: from #{bt}"}
7
10
  msg.join("\n")
8
11
  rescue Exception => e
9
12
  puts "Something is wrong with 'patch_full_message': #{e}"
@@ -210,14 +210,20 @@ module Eco
210
210
  to_h(:supervisor_id)
211
211
  end
212
212
 
213
+ def group_by_schema
214
+ to_h do |person|
215
+ person.details && person.details.schema_id
216
+ end
217
+ end
218
+
213
219
  def to_h(attr = "id")
214
220
  super(attr || "id")
215
221
  end
216
222
  # @!endgroup
217
223
 
218
224
  # @!group Helper methods
219
- def analytics
220
- Eco::API::Organization::PeopleAnalytics.new(self.to_a)
225
+ def similarity
226
+ Eco::API::Organization::PeopleSimilarity.new(self.to_a)
221
227
  end
222
228
  # @!endgroup
223
229
 
@@ -13,7 +13,19 @@ module Eco
13
13
  # @!group Config
14
14
  # @return [String, Proc, nil] the target attribute to be read.
15
15
  def attribute=(attr)
16
- @attribute ||= "name"
16
+ @attribute = attr
17
+ end
18
+
19
+ def attribute
20
+ @attribute ||= :name
21
+ end
22
+
23
+ # Returns the target value to analyse
24
+ # @param person [Ecoportal::API::V1::Person]
25
+ def item_value(person)
26
+ return attr.call(item) if attribute.is_a?(Proc)
27
+ attr = attribute.to_sym
28
+ return item.send(attr) if item.respond_to?(attr)
17
29
  end
18
30
 
19
31
  # Define the order or relevant of per user matches
@@ -37,6 +49,16 @@ module Eco
37
49
  @threshold ||= 0.15
38
50
  end
39
51
 
52
+ # Generates a new object with same config but different base `data`.
53
+ # @return [Eco::API::Organization::PeopleSimilarity]
54
+ def newFrom(data)
55
+ super(data).tap do |simil|
56
+ simil.threshold = threshold
57
+ simil.order = order
58
+ simil.attribute = attribute
59
+ end
60
+ end
61
+
40
62
  # @!endgroup
41
63
 
42
64
  # @!group Searchers
@@ -50,36 +72,158 @@ module Eco
50
72
  end
51
73
  end
52
74
 
75
+ # It returns all people with no name
76
+ # @return [Eco::API::Organization::PeopleSimilarity]
77
+ def unnamed
78
+ select do |person|
79
+ person.name.to_s.strip.length < 2
80
+ end.yield_self do |results|
81
+ newFrom(results)
82
+ end
83
+ end
84
+
85
+ # It returns all people with no name
86
+ # @return [Eco::API::Organization::PeopleSimilarity]
87
+ def named
88
+ reject do |person|
89
+ person.name.to_s.strip.length < 2
90
+ end.yield_self do |results|
91
+ newFrom(results)
92
+ end
93
+ end
94
+
95
+ # It returns all the entries with `attribute` empty
96
+ # @return [Eco::API::Organization::PeopleSimilarity]
97
+ def blank_attribute
98
+ select do |person|
99
+ item_value(person).to_s.strip.length < 2
100
+ end.yield_self do |results|
101
+ newFrom(results)
102
+ end
103
+ end
104
+
105
+ # It returns all the entries with `attribute` **n0t** empty
106
+ # @return [Eco::API::Organization::PeopleSimilarity]
107
+ def attribute_present
108
+ reject do |person|
109
+ item_value(person).to_s.strip.length < 2
110
+ end.yield_self do |results|
111
+ newFrom(results)
112
+ end
113
+ end
114
+
53
115
  # @!endgroup
54
116
 
55
- # @!group Analysers
117
+ # @!group Analisys starters
56
118
 
57
119
  # Analyses People bases on `options`
120
+ # @param needle_read [Proc, Symbol] when the value to read from `needle` object is different to the `:read` (`attribute`).
121
+ # This allows to for example, facet `needle.name` (needle_read) against `haystack_item.details[alt_id]` (read).
122
+ # @param keep_empty [Boolean] to indicate if it should get rid of people with no results (based on threshold)
58
123
  # @return [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
59
- def analyse(**options)
124
+ def analyse(needle_read: nil, keep_empty: false, **options)
60
125
  options = { read: self.attribute }.merge(options)
126
+ total = count; i = 1
61
127
  each_with_object({}) do |person, results|
62
- results[person.id] = find_all_with_score(person, **options)
128
+ needle_str = needle_read ? item_string(person, needle_read) : nil
129
+ results[person.id] = find_all_with_score(person, needle_str: needle_str, **options)
130
+ print_progress("Analysed", total, i)
131
+ i += 1
132
+ end.yield_self do |analysed|
133
+ analysed = clean_empty(analysed) unless keep_empty
134
+ #puts "... #{analysed.count} results after cleaning empty"
135
+ analysed
136
+ end
137
+ end
138
+
139
+ # @!endgroup
140
+
141
+ # @!group Results Treatment
142
+
143
+ # Gets a new instance object of this class, with only people in results
144
+ # @param analysed [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
145
+ # @return [Eco::API::Organization::PeopleSimilarity]
146
+ def newSimilarity(analysed)
147
+ newFrom(people_in_results(analysed))
148
+ end
149
+
150
+ def people_in_results(analysed)
151
+ analysed.each_with_object([]) do |(id, results), people|
152
+ related = results.each_with_object([self[id]]) do |result, related|
153
+ related << result.match
154
+ end
155
+ related.each {|person| people << person unless people.include?(person)}
63
156
  end
64
157
  end
65
158
 
159
+ # Removes from results those that do not have similar entries
160
+ def clean_empty(analysed)
161
+ analysed.select do |id, results|
162
+ !results.empty?
163
+ end
164
+ end
165
+
166
+ # Helper to do some treatment fo the results
167
+ # @param analysed [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
168
+ # @return [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
169
+ def with_analysed(analysed, keep_empty: false)
170
+ analysed.each_with_object({}) do |(id, results), reanalysed|
171
+ reanalysed[id] = yield(self[id], results)
172
+ end.yield_self do |reanalysed|
173
+ reanalysed = clean_empty(reanalysed) unless keep_empty
174
+ reanalysed
175
+ end.tap {|out| "with_analysed... returns #{out.count} records"}
176
+ end
177
+
66
178
  # Launches a reanalyis on `analysed` based on `options`
67
179
  # @param analysed [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
68
- def re_analyse(analysed, **options)
69
- analysed.each_with_object({}) do |(id, results), out|
70
- out[id] = results.relevant_results(**options)
180
+ def rearrange(analysed, **options)
181
+ with_analysed(analysed) do |person, results|
182
+ results.relevant_results(**options)
71
183
  end
72
184
  end
73
185
 
74
- # @!group Helpers
186
+ # Reanalyses by using a block to treat the needle and item values
187
+ def reanalyse(analysed, msg: "Reanalysing", **options, &block)
188
+ options = { read: self.attribute }.merge(options)
189
+ total = analysed.count; i = 1
190
+ with_analysed(analysed) do |person, results|
191
+ print_progress(msg, total, i)
192
+ i += 1
193
+ recalculate_results(results, &block)
194
+ end
195
+ end
196
+
197
+ # Renalyses by ignoring matching words between the `needle` and those found in `results`
198
+ def ignore_matching_words(analysed, **options)
199
+ prompt = "Reanalysing by ignoring matching words"
200
+ reanalyse(analysed, msg: prompt, **options) do |needle_str, item_str, needle, item|
201
+ self.class.remove_matching_words(needle_str, item_str)
202
+ end
203
+ end
204
+
205
+ # Renalyses by ignoring matching words between the `needle` and those found in `results`
206
+ def ignore_matching_words_old(analysed, **options)
207
+ options = { read: self.attribute }.merge(options)
208
+ total = analysed.count; i = 1
209
+ with_analysed(analysed) do |person, results|
210
+ print_progress("Reanalysing by ignoring matching words", total, i)
211
+ i += 1
212
+ ignore_same_words_score(results, **options)
213
+ end
214
+ end
215
+
216
+ # @!endgroup
217
+
218
+ # @!group Reporting Helpers
75
219
 
76
220
  # @return [String] well structured text
77
- def analysis(analysed, format: :txt)
221
+ def report(analysed, format: :txt)
78
222
  case
79
223
  when format == :txt
80
224
  analysed.each_with_object("") do |(id, results), out|
81
225
  msg = results.results.map {|r| r.print}.join("\n ")
82
- "'#{self[id].identify}':\n " + msg
226
+ out << "#{self[id].identify}:\n " + msg + "\n"
83
227
  end
84
228
  end
85
229
  end
@@ -91,7 +235,7 @@ module Eco
91
235
  def print_analysis(**options)
92
236
  analysed = options[:analysed] || results_with_false_positives.analyse(**options)
93
237
  analysed.each_with_object({}) do |(id, results), out|
94
- puts analysis(analysed)
238
+ puts report(analysed)
95
239
  end
96
240
  end
97
241
  # @!endgroup
@@ -105,6 +249,22 @@ module Eco
105
249
 
106
250
  private
107
251
 
252
+ def print_progress(msg, total, num)
253
+ return unless total > 10
254
+ puts "" unless num > 1
255
+ @print_msg_len ||= 0
256
+ percent = (100 * num.to_f / total).round(1)
257
+ msg = " #{msg}: #{percent}% (#{num} of #{total})\r"
258
+ @print_msg_len = msg.length unless @print_msg_len > msg.length
259
+ print msg
260
+ $stdout.flush
261
+ if percent > 99.9
262
+ sleep(0.2)
263
+ print "#{" " * @print_msg_len}\r"
264
+ $stdout.flush
265
+ end
266
+ end
267
+
108
268
 
109
269
  end
110
270
  end