eco-helpers 2.0.19 → 2.0.25
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +77 -1
- data/eco-helpers.gemspec +4 -1
- data/lib/eco/api/common/base_loader.rb +9 -5
- data/lib/eco/api/common/loaders/parser.rb +1 -0
- data/lib/eco/api/common/people/default_parsers.rb +1 -0
- data/lib/eco/api/common/people/default_parsers/xls_parser.rb +53 -0
- data/lib/eco/api/common/people/entries.rb +1 -0
- data/lib/eco/api/common/people/entry_factory.rb +64 -16
- data/lib/eco/api/common/people/person_parser.rb +1 -1
- data/lib/eco/api/common/version_patches/exception.rb +5 -2
- data/lib/eco/api/organization/people.rb +8 -2
- data/lib/eco/api/organization/people_similarity.rb +171 -11
- data/lib/eco/api/organization/tag_tree.rb +33 -0
- data/lib/eco/api/session.rb +15 -7
- data/lib/eco/api/session/batch.rb +1 -1
- data/lib/eco/api/session/batch/job.rb +34 -9
- data/lib/eco/api/usecases.rb +2 -2
- data/lib/eco/api/usecases/base_case.rb +2 -2
- data/lib/eco/api/usecases/base_io.rb +17 -4
- data/lib/eco/api/usecases/default_cases.rb +1 -0
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +189 -19
- data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
- data/lib/eco/api/usecases/default_cases/hris_case.rb +20 -0
- data/lib/eco/cli/config/default/input.rb +61 -8
- data/lib/eco/cli/config/default/options.rb +46 -2
- data/lib/eco/cli/config/default/people.rb +18 -24
- data/lib/eco/cli/config/default/usecases.rb +31 -2
- data/lib/eco/cli/config/default/workflow.rb +8 -6
- data/lib/eco/cli/scripting/args_helpers.rb +2 -2
- data/lib/eco/csv/table.rb +121 -21
- data/lib/eco/data/fuzzy_match.rb +52 -12
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +13 -9
- data/lib/eco/data/fuzzy_match/pairing.rb +12 -18
- data/lib/eco/data/fuzzy_match/result.rb +15 -1
- data/lib/eco/data/fuzzy_match/results.rb +18 -0
- data/lib/eco/data/fuzzy_match/score.rb +12 -7
- data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
- data/lib/eco/language/models/collection.rb +5 -2
- data/lib/eco/version.rb +1 -1
- metadata +64 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 06a58306abadf9b27421583990eb14960f7f30368515481b16aa474de1bc1b08
|
4
|
+
data.tar.gz: 0eef93068fdb31bc6d1949f1022eac325403ac3dbb47c95b593a5b9623655773
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 80b0d2fc7bedb99deabae6d7d273cb4967eb0022db2e743078a82cace02d4f499fe8ad51ec02b7c5bcef549aac9fb03b0ea7ef5358fb602c65856654c7c20814
|
7
|
+
data.tar.gz: 553e1342f38c244ab57bb259b639d55ddc4a4d5d6f72bd54ed9290111636f4dffb29834f69a5b7d2707ee3d44951fa52efccf81589194f11dfa1a709309ddb77
|
data/CHANGELOG.md
CHANGED
@@ -1,7 +1,83 @@
|
|
1
1
|
# Change Log
|
2
2
|
All notable changes to this project will be documented in this file.
|
3
3
|
|
4
|
-
## [2.0.
|
4
|
+
## [2.0.25] - 2021-06-xx
|
5
|
+
|
6
|
+
### Added
|
7
|
+
- `Eco::API::UseCases::DefaultCases::HrisCase` validation error to require `-schema-id` command line when there are people in schemas other than the active one
|
8
|
+
|
9
|
+
### Changed
|
10
|
+
- `Eco::API::Session::Batch::Job`
|
11
|
+
* for backwards compatibility `-include-only-excluded` should bring an options structure compatible with `-include-excluded`
|
12
|
+
|
13
|
+
### Fixed
|
14
|
+
- `Eco::API::Session::Batch` fixed typo that would prevent `prompt_user` to work
|
15
|
+
|
16
|
+
## [2.0.24] - 2021-06-22
|
17
|
+
|
18
|
+
### Added
|
19
|
+
- `Eco::API::Session::Batch::Job` made **native** `-include-excluded`
|
20
|
+
* also added new option `-include-only-excluded` to be able to only target people HRIS excluded
|
21
|
+
|
22
|
+
|
23
|
+
## [2.0.23] - 2021-06-22
|
24
|
+
|
25
|
+
### Added
|
26
|
+
- `Eco::API::Session::Batch::Job` **new** option (`-save-requests`) to save requests even if in `dry-run` (`-simulate`)
|
27
|
+
### Changed
|
28
|
+
- `Eco::API::Session::Batch::Job` new people won't create updates unless they have either details or account
|
29
|
+
* because that entry is not supposed to be created unless has account or details
|
30
|
+
|
31
|
+
## [2.0.22] - 2021-06-18
|
32
|
+
|
33
|
+
### Added
|
34
|
+
- exposed `logger` in `BaseLoader` and
|
35
|
+
- support for multiple input files
|
36
|
+
* `Eco::API::Common::People::EntryFactory#entries`:
|
37
|
+
- refactored to allow multiple input files parsing
|
38
|
+
- moreover to `idx`, hash entries will get their `source_file`
|
39
|
+
* Input callback at `lib/eco/cli/config/default/input` refactored format detection and enabled folder input
|
40
|
+
* `SCR.get_file` language extended to also mention folder (not just file)
|
41
|
+
- support for `.xls` and `.xlsx` files
|
42
|
+
* `Eco::API::Common::People::DefaultParsers::XLSParser` the Excel files **parser**
|
43
|
+
* `Eco::API::Common::People::PersonParser` added `:xls` as an accepted format
|
44
|
+
* `Eco::API::Session#fields_mapper` exposed mapper through a method to allow **headers detection**
|
45
|
+
- The external names of the fields are the column headers of the input file
|
46
|
+
* `Eco::API::UseCases::BaseIO` when arguments validation rails, now it raises with specific `MissingParameter` error
|
47
|
+
|
48
|
+
### Changed
|
49
|
+
- dry out `BaseLoader` (only session is set as instance variable)
|
50
|
+
- removed `creek` **dependency** (it was not used anywhere in the gem)
|
51
|
+
* we just kept `roo` and `roo-xls`
|
52
|
+
- custom `Error` classes now all inherit from `StandardError` (rather than `Exception`)
|
53
|
+
|
54
|
+
|
55
|
+
|
56
|
+
## [2.0.21] - 2021-06-04
|
57
|
+
|
58
|
+
### Added
|
59
|
+
- `Eco::CSV::Table`, support to create the table out of an `Array<Hash>`
|
60
|
+
- This opens new methods to transform input Excel file to this data structure and unify input data structures.
|
61
|
+
- **new** use case `Eco::API::UseCases::DefaultCases::CleanUnknownTags` invokable via `clean-unknown-tags`
|
62
|
+
|
63
|
+
### Changed
|
64
|
+
- `Eco::API::Common::People::EntryFactory` slight **refactor** to boost better support for multiple input formats
|
65
|
+
|
66
|
+
|
67
|
+
## [2.0.20] - 2021-05-31
|
68
|
+
|
69
|
+
### Added
|
70
|
+
- **dependencies** to `creek`, `roo` and `roo-xls`
|
71
|
+
- **dependencies** to `hashdiff`
|
72
|
+
- `Eco::API::Session#parse_attribute` => added missing parameter `deps:`
|
73
|
+
- new option `-stdout [file]` to redirect the output to a file
|
74
|
+
- `Eco::CSV::Table`, **added** more helper methods `#group_by`, `#transform_values`, `#slice`, `#slice_columns`, `#delete_column`
|
75
|
+
- `Eco::API::Organization::TagTree` **added** more helper methods: `top?`, `tag=`, `as_json`, `dup`, `diff`
|
76
|
+
|
77
|
+
### Fixed
|
78
|
+
- `Exception` patch: when `SystemStackError` there is not `backtrace` :/
|
79
|
+
|
80
|
+
## [2.0.19] - 2021-05-31
|
5
81
|
|
6
82
|
### Added
|
7
83
|
- Better error message for people searches & **offer** to select among the candidates:
|
data/eco-helpers.gemspec
CHANGED
@@ -14,7 +14,7 @@ Gem::Specification.new do |spec|
|
|
14
14
|
spec.homepage = "https://www.ecoportal.com"
|
15
15
|
spec.licenses = %w[MIT]
|
16
16
|
|
17
|
-
spec.required_ruby_version = '>= 2.
|
17
|
+
spec.required_ruby_version = '>= 2.5.0'
|
18
18
|
|
19
19
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
20
20
|
f.match(%r{^(test|spec|features)/})
|
@@ -36,7 +36,10 @@ Gem::Specification.new do |spec|
|
|
36
36
|
spec.add_dependency 'aws-sdk-ses', '>= 1.36.0', '< 2'
|
37
37
|
spec.add_dependency 'dotenv', '>= 2.7.6', '< 2.8'
|
38
38
|
spec.add_dependency 'net-sftp', '>= 3.0.0', '< 3.1'
|
39
|
+
spec.add_dependency 'hashdiff', '>= 1.0.1', '< 1.1'
|
39
40
|
spec.add_dependency 'fuzzy_match', '>= 2.1.0', '< 2.2'
|
40
41
|
spec.add_dependency 'amatch', '>= 0.4.0', '< 0.5'
|
41
42
|
spec.add_dependency 'jaro_winkler', '>= 1.5.4', '< 1.6'
|
43
|
+
spec.add_dependency 'roo', '>= 2.8.3', '< 2.9'
|
44
|
+
spec.add_dependency 'roo-xls', '>= 1.2.0', '< 1.3'
|
42
45
|
end
|
@@ -51,15 +51,19 @@ module Eco
|
|
51
51
|
private
|
52
52
|
|
53
53
|
def session
|
54
|
-
|
54
|
+
ASSETS.session
|
55
55
|
end
|
56
56
|
|
57
|
-
def
|
58
|
-
session.
|
57
|
+
def config
|
58
|
+
session.config
|
59
59
|
end
|
60
60
|
|
61
|
-
def
|
62
|
-
|
61
|
+
def logger
|
62
|
+
session.logger
|
63
|
+
end
|
64
|
+
|
65
|
+
def micro
|
66
|
+
session.micro
|
63
67
|
end
|
64
68
|
|
65
69
|
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
class Eco::API::Common::People::DefaultParsers::XLSParser < Eco::API::Common::Loaders::Parser
|
2
|
+
attribute :xls
|
3
|
+
|
4
|
+
attr_accessor :already_required
|
5
|
+
attr_reader :file
|
6
|
+
|
7
|
+
def parser(file, deps)
|
8
|
+
@file = file
|
9
|
+
rows.tap {|r| @file = nil}
|
10
|
+
end
|
11
|
+
|
12
|
+
def serializer(array_hash, deps)
|
13
|
+
raise "Not implemented. TODO: using axlsx or rubyXL gems. See: https://spin.atomicobject.com/2017/03/22/parsing-excel-files-ruby/"
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def headers
|
19
|
+
raise "You should implement this method"
|
20
|
+
end
|
21
|
+
|
22
|
+
def sheet_name
|
23
|
+
0
|
24
|
+
end
|
25
|
+
|
26
|
+
def workbook
|
27
|
+
require_reading_libs!
|
28
|
+
Roo::Spreadsheet.open(file)
|
29
|
+
end
|
30
|
+
|
31
|
+
def spreadheet(name_or_index = sheet_name)
|
32
|
+
workbook.sheet(name_or_index)
|
33
|
+
end
|
34
|
+
|
35
|
+
def rows(target = headers)
|
36
|
+
begin
|
37
|
+
spreadheet.parse(header_search: target)
|
38
|
+
rescue Roo::HeaderRowNotFoundError => e
|
39
|
+
missing = JSON.parse(e.message)
|
40
|
+
logger.warn("The input file is missing these headers: #{missing}")
|
41
|
+
present = target - missing
|
42
|
+
rows(present)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def require_reading_libs!
|
47
|
+
return if already_required
|
48
|
+
require 'roo'
|
49
|
+
require 'roo-xls'
|
50
|
+
already_required = true
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
@@ -2,7 +2,7 @@ module Eco
|
|
2
2
|
module API
|
3
3
|
module Common
|
4
4
|
module People
|
5
|
-
# TODO: EntryFactory should suppport multiple schemas itself
|
5
|
+
# TODO: EntryFactory should suppport multiple schemas itself (rather that being done on `Session`)
|
6
6
|
# => currently, it's through session.entry_factory(schema: id), but this is wrong
|
7
7
|
# => This way, Entries and PersonEntry will be able to refer to attr_map and person_parser linked to schema_id
|
8
8
|
# => "schema_id" should be an optional column in the input file, or parsable via a custom parser to scope the schema
|
@@ -88,26 +88,63 @@ module Eco
|
|
88
88
|
fatal("Format should be a Symbol. Given '#{format}'") if format && !format.is_a?(Symbol)
|
89
89
|
fatal("There is no parser/serializer for format ':#{format.to_s}'") unless no_format || @person_parser.defined?(format)
|
90
90
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
91
|
+
kargs = {}
|
92
|
+
kargs.merge!(content: data) unless no_data
|
93
|
+
kargs.merge!(file: file) unless no_file
|
94
|
+
kargs.merge!(format: format) unless no_format
|
95
|
+
kargs.merge!(encoding: encoding) if encoding
|
96
|
+
|
97
|
+
Entries.new(to_array_of_hashes(**kargs), klass: PersonEntry, factory: self)
|
98
|
+
end
|
99
|
+
|
100
|
+
def to_array_of_hashes(**kargs)
|
101
|
+
data = []
|
102
|
+
content, file, encoding, format = kargs.values_at(:content, :file, :encoding, :format)
|
103
|
+
|
104
|
+
# Support for multiple file
|
105
|
+
if file.is_a?(Array)
|
106
|
+
return file.each_with_object([]) do |f, out|
|
107
|
+
logger.info("Parsing file '#{f}'")
|
108
|
+
curr = to_array_of_hashes(**kargs.merge(file: f))
|
109
|
+
out.concat(curr)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
# Get content only when it's not :xls
|
113
|
+
# note: even if content was provided, file takes precedence
|
114
|
+
content = get_file_content(file, format, encoding) if (format != :xls) && file
|
115
|
+
|
116
|
+
case content
|
117
|
+
when Hash
|
118
|
+
logger.error("Input data as 'Hash' not supported. Expecting 'Enumerable' or 'String'")
|
119
|
+
exit(1)
|
120
|
+
when String
|
121
|
+
to_array_of_hashes(content: person_parser.parse(format, content))
|
122
|
+
when Enumerable
|
123
|
+
sample = content.to_a.first
|
124
|
+
case sample
|
125
|
+
when Hash, Array, ::CSV::Row
|
126
|
+
Eco::CSV::Table.new(content).to_array_of_hashes
|
101
127
|
else
|
102
|
-
logger.
|
128
|
+
logger.error("Input content 'Array' of '#{sample.class}' is not supported.")
|
103
129
|
end
|
104
|
-
|
105
|
-
entries(data: arr_hash)
|
106
130
|
else
|
107
|
-
|
131
|
+
if file && format == :xls
|
132
|
+
person_parser.parse(format, file)
|
133
|
+
else
|
134
|
+
logger.error("Could not obtain any data out of these: #{kargs}. Given content: '#{content.class}'")
|
135
|
+
exit(1)
|
136
|
+
end
|
137
|
+
end.tap do |out_array|
|
138
|
+
start_from_two = (format == :csv) || format == :xls
|
139
|
+
out_array.each_with_index do |entry_hash, i|
|
140
|
+
entry_hash["idx"] = start_from_two ? i + 2 : i + 1
|
141
|
+
entry_hash["source_file"] = file
|
142
|
+
end
|
108
143
|
end
|
144
|
+
|
109
145
|
end
|
110
146
|
|
147
|
+
|
111
148
|
# Helper that generates a file out of `data:`.
|
112
149
|
# @raise Exception
|
113
150
|
# - if you try to provide `data:` in the wrong format.
|
@@ -127,7 +164,7 @@ module Eco
|
|
127
164
|
|
128
165
|
run = true
|
129
166
|
if Eco::API::Common::Session::FileManager.file_exists?(file)
|
130
|
-
prompt_user("
|
167
|
+
prompt_user("Do you want to overwrite it? (Y/n):", explanation: "The file '#{file}' already exists.", default: "Y") do |response|
|
131
168
|
run = (response == "") || reponse.upcase.start_with?("Y")
|
132
169
|
end
|
133
170
|
end
|
@@ -150,6 +187,17 @@ module Eco
|
|
150
187
|
|
151
188
|
private
|
152
189
|
|
190
|
+
def get_file_content(file, format, encoding)
|
191
|
+
unless Eco::API::Common::Session::FileManager.file_exists?(file)
|
192
|
+
logger.error("File does not exist: #{file}")
|
193
|
+
exit(1)
|
194
|
+
end
|
195
|
+
ext = File.extname(file)
|
196
|
+
encoding ||= Eco::API::Common::Session::FileManager.encoding(file)
|
197
|
+
encoding = (encoding != "utf-8")? "#{encoding}|utf-8": encoding
|
198
|
+
content = File.read(file, encoding: encoding)
|
199
|
+
end
|
200
|
+
|
153
201
|
def fatal(msg)
|
154
202
|
logger.fatal(msg)
|
155
203
|
raise msg
|
@@ -16,7 +16,7 @@ module Eco
|
|
16
16
|
CORE_ATTRS = ["id", "external_id", "email", "name", "supervisor_id", "filter_tags", "freemium"]
|
17
17
|
ACCOUNT_ATTRS = ["policy_group_ids", "default_tag", "send_invites", "landing_page_id", "login_provider_ids"]
|
18
18
|
TYPE = [:select, :text, :date, :number, :phone_number, :boolean, :multiple]
|
19
|
-
FORMAT = [:csv, :xml, :json]
|
19
|
+
FORMAT = [:csv, :xml, :json, :xls]
|
20
20
|
|
21
21
|
attr_reader :schema
|
22
22
|
attr_reader :details_attrs, :all_model_attrs
|
@@ -2,8 +2,11 @@ class ::Exception
|
|
2
2
|
def patch_full_message
|
3
3
|
begin
|
4
4
|
msg = []
|
5
|
-
|
6
|
-
|
5
|
+
tracing = backtrace ? backtrace : []
|
6
|
+
tracing = (self.class == SystemStackError) ? tracing[1..30] : tracing[1..-1]
|
7
|
+
tracing ||= []
|
8
|
+
msg << "\n#{tracing.first} \n#{message} (#{self.class.to_s})"
|
9
|
+
tracing.each_with_index {|bt, i| msg << "#{" "*8}#{i+1}: from #{bt}"}
|
7
10
|
msg.join("\n")
|
8
11
|
rescue Exception => e
|
9
12
|
puts "Something is wrong with 'patch_full_message': #{e}"
|
@@ -210,14 +210,20 @@ module Eco
|
|
210
210
|
to_h(:supervisor_id)
|
211
211
|
end
|
212
212
|
|
213
|
+
def group_by_schema
|
214
|
+
to_h do |person|
|
215
|
+
person.details && person.details.schema_id
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
213
219
|
def to_h(attr = "id")
|
214
220
|
super(attr || "id")
|
215
221
|
end
|
216
222
|
# @!endgroup
|
217
223
|
|
218
224
|
# @!group Helper methods
|
219
|
-
def
|
220
|
-
Eco::API::Organization::
|
225
|
+
def similarity
|
226
|
+
Eco::API::Organization::PeopleSimilarity.new(self.to_a)
|
221
227
|
end
|
222
228
|
# @!endgroup
|
223
229
|
|
@@ -13,7 +13,19 @@ module Eco
|
|
13
13
|
# @!group Config
|
14
14
|
# @return [String, Proc, nil] the target attribute to be read.
|
15
15
|
def attribute=(attr)
|
16
|
-
@attribute
|
16
|
+
@attribute = attr
|
17
|
+
end
|
18
|
+
|
19
|
+
def attribute
|
20
|
+
@attribute ||= :name
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns the target value to analyse
|
24
|
+
# @param person [Ecoportal::API::V1::Person]
|
25
|
+
def item_value(person)
|
26
|
+
return attr.call(item) if attribute.is_a?(Proc)
|
27
|
+
attr = attribute.to_sym
|
28
|
+
return item.send(attr) if item.respond_to?(attr)
|
17
29
|
end
|
18
30
|
|
19
31
|
# Define the order or relevant of per user matches
|
@@ -37,6 +49,16 @@ module Eco
|
|
37
49
|
@threshold ||= 0.15
|
38
50
|
end
|
39
51
|
|
52
|
+
# Generates a new object with same config but different base `data`.
|
53
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
54
|
+
def newFrom(data)
|
55
|
+
super(data).tap do |simil|
|
56
|
+
simil.threshold = threshold
|
57
|
+
simil.order = order
|
58
|
+
simil.attribute = attribute
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
40
62
|
# @!endgroup
|
41
63
|
|
42
64
|
# @!group Searchers
|
@@ -50,36 +72,158 @@ module Eco
|
|
50
72
|
end
|
51
73
|
end
|
52
74
|
|
75
|
+
# It returns all people with no name
|
76
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
77
|
+
def unnamed
|
78
|
+
select do |person|
|
79
|
+
person.name.to_s.strip.length < 2
|
80
|
+
end.yield_self do |results|
|
81
|
+
newFrom(results)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# It returns all people with no name
|
86
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
87
|
+
def named
|
88
|
+
reject do |person|
|
89
|
+
person.name.to_s.strip.length < 2
|
90
|
+
end.yield_self do |results|
|
91
|
+
newFrom(results)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# It returns all the entries with `attribute` empty
|
96
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
97
|
+
def blank_attribute
|
98
|
+
select do |person|
|
99
|
+
item_value(person).to_s.strip.length < 2
|
100
|
+
end.yield_self do |results|
|
101
|
+
newFrom(results)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# It returns all the entries with `attribute` **n0t** empty
|
106
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
107
|
+
def attribute_present
|
108
|
+
reject do |person|
|
109
|
+
item_value(person).to_s.strip.length < 2
|
110
|
+
end.yield_self do |results|
|
111
|
+
newFrom(results)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
53
115
|
# @!endgroup
|
54
116
|
|
55
|
-
# @!group
|
117
|
+
# @!group Analisys starters
|
56
118
|
|
57
119
|
# Analyses People bases on `options`
|
120
|
+
# @param needle_read [Proc, Symbol] when the value to read from `needle` object is different to the `:read` (`attribute`).
|
121
|
+
# This allows to for example, facet `needle.name` (needle_read) against `haystack_item.details[alt_id]` (read).
|
122
|
+
# @param keep_empty [Boolean] to indicate if it should get rid of people with no results (based on threshold)
|
58
123
|
# @return [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
|
59
|
-
def analyse(**options)
|
124
|
+
def analyse(needle_read: nil, keep_empty: false, **options)
|
60
125
|
options = { read: self.attribute }.merge(options)
|
126
|
+
total = count; i = 1
|
61
127
|
each_with_object({}) do |person, results|
|
62
|
-
|
128
|
+
needle_str = needle_read ? item_string(person, needle_read) : nil
|
129
|
+
results[person.id] = find_all_with_score(person, needle_str: needle_str, **options)
|
130
|
+
print_progress("Analysed", total, i)
|
131
|
+
i += 1
|
132
|
+
end.yield_self do |analysed|
|
133
|
+
analysed = clean_empty(analysed) unless keep_empty
|
134
|
+
#puts "... #{analysed.count} results after cleaning empty"
|
135
|
+
analysed
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# @!endgroup
|
140
|
+
|
141
|
+
# @!group Results Treatment
|
142
|
+
|
143
|
+
# Gets a new instance object of this class, with only people in results
|
144
|
+
# @param analysed [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
|
145
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
146
|
+
def newSimilarity(analysed)
|
147
|
+
newFrom(people_in_results(analysed))
|
148
|
+
end
|
149
|
+
|
150
|
+
def people_in_results(analysed)
|
151
|
+
analysed.each_with_object([]) do |(id, results), people|
|
152
|
+
related = results.each_with_object([self[id]]) do |result, related|
|
153
|
+
related << result.match
|
154
|
+
end
|
155
|
+
related.each {|person| people << person unless people.include?(person)}
|
63
156
|
end
|
64
157
|
end
|
65
158
|
|
159
|
+
# Removes from results those that do not have similar entries
|
160
|
+
def clean_empty(analysed)
|
161
|
+
analysed.select do |id, results|
|
162
|
+
!results.empty?
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
# Helper to do some treatment fo the results
|
167
|
+
# @param analysed [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
|
168
|
+
# @return [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
|
169
|
+
def with_analysed(analysed, keep_empty: false)
|
170
|
+
analysed.each_with_object({}) do |(id, results), reanalysed|
|
171
|
+
reanalysed[id] = yield(self[id], results)
|
172
|
+
end.yield_self do |reanalysed|
|
173
|
+
reanalysed = clean_empty(reanalysed) unless keep_empty
|
174
|
+
reanalysed
|
175
|
+
end.tap {|out| "with_analysed... returns #{out.count} records"}
|
176
|
+
end
|
177
|
+
|
66
178
|
# Launches a reanalyis on `analysed` based on `options`
|
67
179
|
# @param analysed [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
|
68
|
-
def
|
69
|
-
analysed
|
70
|
-
|
180
|
+
def rearrange(analysed, **options)
|
181
|
+
with_analysed(analysed) do |person, results|
|
182
|
+
results.relevant_results(**options)
|
71
183
|
end
|
72
184
|
end
|
73
185
|
|
74
|
-
#
|
186
|
+
# Reanalyses by using a block to treat the needle and item values
|
187
|
+
def reanalyse(analysed, msg: "Reanalysing", **options, &block)
|
188
|
+
options = { read: self.attribute }.merge(options)
|
189
|
+
total = analysed.count; i = 1
|
190
|
+
with_analysed(analysed) do |person, results|
|
191
|
+
print_progress(msg, total, i)
|
192
|
+
i += 1
|
193
|
+
recalculate_results(results, &block)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# Renalyses by ignoring matching words between the `needle` and those found in `results`
|
198
|
+
def ignore_matching_words(analysed, **options)
|
199
|
+
prompt = "Reanalysing by ignoring matching words"
|
200
|
+
reanalyse(analysed, msg: prompt, **options) do |needle_str, item_str, needle, item|
|
201
|
+
self.class.remove_matching_words(needle_str, item_str)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# Renalyses by ignoring matching words between the `needle` and those found in `results`
|
206
|
+
def ignore_matching_words_old(analysed, **options)
|
207
|
+
options = { read: self.attribute }.merge(options)
|
208
|
+
total = analysed.count; i = 1
|
209
|
+
with_analysed(analysed) do |person, results|
|
210
|
+
print_progress("Reanalysing by ignoring matching words", total, i)
|
211
|
+
i += 1
|
212
|
+
ignore_same_words_score(results, **options)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
# @!endgroup
|
217
|
+
|
218
|
+
# @!group Reporting Helpers
|
75
219
|
|
76
220
|
# @return [String] well structured text
|
77
|
-
def
|
221
|
+
def report(analysed, format: :txt)
|
78
222
|
case
|
79
223
|
when format == :txt
|
80
224
|
analysed.each_with_object("") do |(id, results), out|
|
81
225
|
msg = results.results.map {|r| r.print}.join("\n ")
|
82
|
-
"
|
226
|
+
out << "#{self[id].identify}:\n " + msg + "\n"
|
83
227
|
end
|
84
228
|
end
|
85
229
|
end
|
@@ -91,7 +235,7 @@ module Eco
|
|
91
235
|
def print_analysis(**options)
|
92
236
|
analysed = options[:analysed] || results_with_false_positives.analyse(**options)
|
93
237
|
analysed.each_with_object({}) do |(id, results), out|
|
94
|
-
puts
|
238
|
+
puts report(analysed)
|
95
239
|
end
|
96
240
|
end
|
97
241
|
# @!endgroup
|
@@ -105,6 +249,22 @@ module Eco
|
|
105
249
|
|
106
250
|
private
|
107
251
|
|
252
|
+
def print_progress(msg, total, num)
|
253
|
+
return unless total > 10
|
254
|
+
puts "" unless num > 1
|
255
|
+
@print_msg_len ||= 0
|
256
|
+
percent = (100 * num.to_f / total).round(1)
|
257
|
+
msg = " #{msg}: #{percent}% (#{num} of #{total})\r"
|
258
|
+
@print_msg_len = msg.length unless @print_msg_len > msg.length
|
259
|
+
print msg
|
260
|
+
$stdout.flush
|
261
|
+
if percent > 99.9
|
262
|
+
sleep(0.2)
|
263
|
+
print "#{" " * @print_msg_len}\r"
|
264
|
+
$stdout.flush
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
108
268
|
|
109
269
|
end
|
110
270
|
end
|