eco-helpers 2.0.19 → 2.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +77 -1
- data/eco-helpers.gemspec +4 -1
- data/lib/eco/api/common/base_loader.rb +9 -5
- data/lib/eco/api/common/loaders/parser.rb +1 -0
- data/lib/eco/api/common/people/default_parsers.rb +1 -0
- data/lib/eco/api/common/people/default_parsers/xls_parser.rb +53 -0
- data/lib/eco/api/common/people/entries.rb +1 -0
- data/lib/eco/api/common/people/entry_factory.rb +64 -16
- data/lib/eco/api/common/people/person_parser.rb +1 -1
- data/lib/eco/api/common/version_patches/exception.rb +5 -2
- data/lib/eco/api/organization/people.rb +8 -2
- data/lib/eco/api/organization/people_similarity.rb +171 -11
- data/lib/eco/api/organization/tag_tree.rb +33 -0
- data/lib/eco/api/session.rb +15 -7
- data/lib/eco/api/session/batch.rb +1 -1
- data/lib/eco/api/session/batch/job.rb +34 -9
- data/lib/eco/api/usecases.rb +2 -2
- data/lib/eco/api/usecases/base_case.rb +2 -2
- data/lib/eco/api/usecases/base_io.rb +17 -4
- data/lib/eco/api/usecases/default_cases.rb +1 -0
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +189 -19
- data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
- data/lib/eco/api/usecases/default_cases/hris_case.rb +20 -0
- data/lib/eco/cli/config/default/input.rb +61 -8
- data/lib/eco/cli/config/default/options.rb +46 -2
- data/lib/eco/cli/config/default/people.rb +18 -24
- data/lib/eco/cli/config/default/usecases.rb +31 -2
- data/lib/eco/cli/config/default/workflow.rb +8 -6
- data/lib/eco/cli/scripting/args_helpers.rb +2 -2
- data/lib/eco/csv/table.rb +121 -21
- data/lib/eco/data/fuzzy_match.rb +52 -12
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +13 -9
- data/lib/eco/data/fuzzy_match/pairing.rb +12 -18
- data/lib/eco/data/fuzzy_match/result.rb +15 -1
- data/lib/eco/data/fuzzy_match/results.rb +18 -0
- data/lib/eco/data/fuzzy_match/score.rb +12 -7
- data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
- data/lib/eco/language/models/collection.rb +5 -2
- data/lib/eco/version.rb +1 -1
- metadata +64 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 06a58306abadf9b27421583990eb14960f7f30368515481b16aa474de1bc1b08
|
4
|
+
data.tar.gz: 0eef93068fdb31bc6d1949f1022eac325403ac3dbb47c95b593a5b9623655773
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 80b0d2fc7bedb99deabae6d7d273cb4967eb0022db2e743078a82cace02d4f499fe8ad51ec02b7c5bcef549aac9fb03b0ea7ef5358fb602c65856654c7c20814
|
7
|
+
data.tar.gz: 553e1342f38c244ab57bb259b639d55ddc4a4d5d6f72bd54ed9290111636f4dffb29834f69a5b7d2707ee3d44951fa52efccf81589194f11dfa1a709309ddb77
|
data/CHANGELOG.md
CHANGED
@@ -1,7 +1,83 @@
|
|
1
1
|
# Change Log
|
2
2
|
All notable changes to this project will be documented in this file.
|
3
3
|
|
4
|
-
## [2.0.
|
4
|
+
## [2.0.25] - 2021-06-xx
|
5
|
+
|
6
|
+
### Added
|
7
|
+
- `Eco::API::UseCases::DefaultCases::HrisCase` validation error to require `-schema-id` command line when there are people in schemas other than the active one
|
8
|
+
|
9
|
+
### Changed
|
10
|
+
- `Eco::API::Session::Batch::Job`
|
11
|
+
* for backwards compatibility `-include-only-excluded` should bring an options structure compatible with `-include-excluded`
|
12
|
+
|
13
|
+
### Fixed
|
14
|
+
- `Eco::API::Session::Batch` fixed typo that would prevent `prompt_user` to work
|
15
|
+
|
16
|
+
## [2.0.24] - 2021-06-22
|
17
|
+
|
18
|
+
### Added
|
19
|
+
- `Eco::API::Session::Batch::Job` made **native** `-include-excluded`
|
20
|
+
* also added new option `-include-only-excluded` to be able to only target people HRIS excluded
|
21
|
+
|
22
|
+
|
23
|
+
## [2.0.23] - 2021-06-22
|
24
|
+
|
25
|
+
### Added
|
26
|
+
- `Eco::API::Session::Batch::Job` **new** option (`-save-requests`) to save requests even if in `dry-run` (`-simulate`)
|
27
|
+
### Changed
|
28
|
+
- `Eco::API::Session::Batch::Job` new people won't create updates unless they have either details or account
|
29
|
+
* because that entry is not supposed to be created unless has account or details
|
30
|
+
|
31
|
+
## [2.0.22] - 2021-06-18
|
32
|
+
|
33
|
+
### Added
|
34
|
+
- exposed `logger` in `BaseLoader` and
|
35
|
+
- support for multiple input files
|
36
|
+
* `Eco::API::Common::People::EntryFactory#entries`:
|
37
|
+
- refactored to allow multiple input files parsing
|
38
|
+
- moreover to `idx`, hash entries will get their `source_file`
|
39
|
+
* Input callback at `lib/eco/cli/config/default/input` refactored format detection and enabled folder input
|
40
|
+
* `SCR.get_file` language extended to also mention folder (not just file)
|
41
|
+
- support for `.xls` and `.xlsx` files
|
42
|
+
* `Eco::API::Common::People::DefaultParsers::XLSParser` the Excel files **parser**
|
43
|
+
* `Eco::API::Common::People::PersonParser` added `:xls` as an accepted format
|
44
|
+
* `Eco::API::Session#fields_mapper` exposed mapper through a method to allow **headers detection**
|
45
|
+
- The external names of the fields are the column headers of the input file
|
46
|
+
* `Eco::API::UseCases::BaseIO` when arguments validation rails, now it raises with specific `MissingParameter` error
|
47
|
+
|
48
|
+
### Changed
|
49
|
+
- dry out `BaseLoader` (only session is set as instance variable)
|
50
|
+
- removed `creek` **dependency** (it was not used anywhere in the gem)
|
51
|
+
* we just kept `roo` and `roo-xls`
|
52
|
+
- custom `Error` classes now all inherit from `StandardError` (rather than `Exception`)
|
53
|
+
|
54
|
+
|
55
|
+
|
56
|
+
## [2.0.21] - 2021-06-04
|
57
|
+
|
58
|
+
### Added
|
59
|
+
- `Eco::CSV::Table`, support to create the table out of an `Array<Hash>`
|
60
|
+
- This opens new methods to transform input Excel file to this data structure and unify input data structures.
|
61
|
+
- **new** use case `Eco::API::UseCases::DefaultCases::CleanUnknownTags` invokable via `clean-unknown-tags`
|
62
|
+
|
63
|
+
### Changed
|
64
|
+
- `Eco::API::Common::People::EntryFactory` slight **refactor** to boost better support for multiple input formats
|
65
|
+
|
66
|
+
|
67
|
+
## [2.0.20] - 2021-05-31
|
68
|
+
|
69
|
+
### Added
|
70
|
+
- **dependencies** to `creek`, `roo` and `roo-xls`
|
71
|
+
- **dependencies** to `hashdiff`
|
72
|
+
- `Eco::API::Session#parse_attribute` => added missing parameter `deps:`
|
73
|
+
- new option `-stdout [file]` to redirect the output to a file
|
74
|
+
- `Eco::CSV::Table`, **added** more helper methods `#group_by`, `#transform_values`, `#slice`, `#slice_columns`, `#delete_column`
|
75
|
+
- `Eco::API::Organization::TagTree` **added** more helper methods: `top?`, `tag=`, `as_json`, `dup`, `diff`
|
76
|
+
|
77
|
+
### Fixed
|
78
|
+
- `Exception` patch: when `SystemStackError` there is not `backtrace` :/
|
79
|
+
|
80
|
+
## [2.0.19] - 2021-05-31
|
5
81
|
|
6
82
|
### Added
|
7
83
|
- Better error message for people searches & **offer** to select among the candidates:
|
data/eco-helpers.gemspec
CHANGED
@@ -14,7 +14,7 @@ Gem::Specification.new do |spec|
|
|
14
14
|
spec.homepage = "https://www.ecoportal.com"
|
15
15
|
spec.licenses = %w[MIT]
|
16
16
|
|
17
|
-
spec.required_ruby_version = '>= 2.
|
17
|
+
spec.required_ruby_version = '>= 2.5.0'
|
18
18
|
|
19
19
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
20
20
|
f.match(%r{^(test|spec|features)/})
|
@@ -36,7 +36,10 @@ Gem::Specification.new do |spec|
|
|
36
36
|
spec.add_dependency 'aws-sdk-ses', '>= 1.36.0', '< 2'
|
37
37
|
spec.add_dependency 'dotenv', '>= 2.7.6', '< 2.8'
|
38
38
|
spec.add_dependency 'net-sftp', '>= 3.0.0', '< 3.1'
|
39
|
+
spec.add_dependency 'hashdiff', '>= 1.0.1', '< 1.1'
|
39
40
|
spec.add_dependency 'fuzzy_match', '>= 2.1.0', '< 2.2'
|
40
41
|
spec.add_dependency 'amatch', '>= 0.4.0', '< 0.5'
|
41
42
|
spec.add_dependency 'jaro_winkler', '>= 1.5.4', '< 1.6'
|
43
|
+
spec.add_dependency 'roo', '>= 2.8.3', '< 2.9'
|
44
|
+
spec.add_dependency 'roo-xls', '>= 1.2.0', '< 1.3'
|
42
45
|
end
|
@@ -51,15 +51,19 @@ module Eco
|
|
51
51
|
private
|
52
52
|
|
53
53
|
def session
|
54
|
-
|
54
|
+
ASSETS.session
|
55
55
|
end
|
56
56
|
|
57
|
-
def
|
58
|
-
session.
|
57
|
+
def config
|
58
|
+
session.config
|
59
59
|
end
|
60
60
|
|
61
|
-
def
|
62
|
-
|
61
|
+
def logger
|
62
|
+
session.logger
|
63
|
+
end
|
64
|
+
|
65
|
+
def micro
|
66
|
+
session.micro
|
63
67
|
end
|
64
68
|
|
65
69
|
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
class Eco::API::Common::People::DefaultParsers::XLSParser < Eco::API::Common::Loaders::Parser
|
2
|
+
attribute :xls
|
3
|
+
|
4
|
+
attr_accessor :already_required
|
5
|
+
attr_reader :file
|
6
|
+
|
7
|
+
def parser(file, deps)
|
8
|
+
@file = file
|
9
|
+
rows.tap {|r| @file = nil}
|
10
|
+
end
|
11
|
+
|
12
|
+
def serializer(array_hash, deps)
|
13
|
+
raise "Not implemented. TODO: using axlsx or rubyXL gems. See: https://spin.atomicobject.com/2017/03/22/parsing-excel-files-ruby/"
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def headers
|
19
|
+
raise "You should implement this method"
|
20
|
+
end
|
21
|
+
|
22
|
+
def sheet_name
|
23
|
+
0
|
24
|
+
end
|
25
|
+
|
26
|
+
def workbook
|
27
|
+
require_reading_libs!
|
28
|
+
Roo::Spreadsheet.open(file)
|
29
|
+
end
|
30
|
+
|
31
|
+
def spreadheet(name_or_index = sheet_name)
|
32
|
+
workbook.sheet(name_or_index)
|
33
|
+
end
|
34
|
+
|
35
|
+
def rows(target = headers)
|
36
|
+
begin
|
37
|
+
spreadheet.parse(header_search: target)
|
38
|
+
rescue Roo::HeaderRowNotFoundError => e
|
39
|
+
missing = JSON.parse(e.message)
|
40
|
+
logger.warn("The input file is missing these headers: #{missing}")
|
41
|
+
present = target - missing
|
42
|
+
rows(present)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def require_reading_libs!
|
47
|
+
return if already_required
|
48
|
+
require 'roo'
|
49
|
+
require 'roo-xls'
|
50
|
+
already_required = true
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
@@ -2,7 +2,7 @@ module Eco
|
|
2
2
|
module API
|
3
3
|
module Common
|
4
4
|
module People
|
5
|
-
# TODO: EntryFactory should suppport multiple schemas itself
|
5
|
+
# TODO: EntryFactory should suppport multiple schemas itself (rather that being done on `Session`)
|
6
6
|
# => currently, it's through session.entry_factory(schema: id), but this is wrong
|
7
7
|
# => This way, Entries and PersonEntry will be able to refer to attr_map and person_parser linked to schema_id
|
8
8
|
# => "schema_id" should be an optional column in the input file, or parsable via a custom parser to scope the schema
|
@@ -88,26 +88,63 @@ module Eco
|
|
88
88
|
fatal("Format should be a Symbol. Given '#{format}'") if format && !format.is_a?(Symbol)
|
89
89
|
fatal("There is no parser/serializer for format ':#{format.to_s}'") unless no_format || @person_parser.defined?(format)
|
90
90
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
91
|
+
kargs = {}
|
92
|
+
kargs.merge!(content: data) unless no_data
|
93
|
+
kargs.merge!(file: file) unless no_file
|
94
|
+
kargs.merge!(format: format) unless no_format
|
95
|
+
kargs.merge!(encoding: encoding) if encoding
|
96
|
+
|
97
|
+
Entries.new(to_array_of_hashes(**kargs), klass: PersonEntry, factory: self)
|
98
|
+
end
|
99
|
+
|
100
|
+
def to_array_of_hashes(**kargs)
|
101
|
+
data = []
|
102
|
+
content, file, encoding, format = kargs.values_at(:content, :file, :encoding, :format)
|
103
|
+
|
104
|
+
# Support for multiple file
|
105
|
+
if file.is_a?(Array)
|
106
|
+
return file.each_with_object([]) do |f, out|
|
107
|
+
logger.info("Parsing file '#{f}'")
|
108
|
+
curr = to_array_of_hashes(**kargs.merge(file: f))
|
109
|
+
out.concat(curr)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
# Get content only when it's not :xls
|
113
|
+
# note: even if content was provided, file takes precedence
|
114
|
+
content = get_file_content(file, format, encoding) if (format != :xls) && file
|
115
|
+
|
116
|
+
case content
|
117
|
+
when Hash
|
118
|
+
logger.error("Input data as 'Hash' not supported. Expecting 'Enumerable' or 'String'")
|
119
|
+
exit(1)
|
120
|
+
when String
|
121
|
+
to_array_of_hashes(content: person_parser.parse(format, content))
|
122
|
+
when Enumerable
|
123
|
+
sample = content.to_a.first
|
124
|
+
case sample
|
125
|
+
when Hash, Array, ::CSV::Row
|
126
|
+
Eco::CSV::Table.new(content).to_array_of_hashes
|
101
127
|
else
|
102
|
-
logger.
|
128
|
+
logger.error("Input content 'Array' of '#{sample.class}' is not supported.")
|
103
129
|
end
|
104
|
-
|
105
|
-
entries(data: arr_hash)
|
106
130
|
else
|
107
|
-
|
131
|
+
if file && format == :xls
|
132
|
+
person_parser.parse(format, file)
|
133
|
+
else
|
134
|
+
logger.error("Could not obtain any data out of these: #{kargs}. Given content: '#{content.class}'")
|
135
|
+
exit(1)
|
136
|
+
end
|
137
|
+
end.tap do |out_array|
|
138
|
+
start_from_two = (format == :csv) || format == :xls
|
139
|
+
out_array.each_with_index do |entry_hash, i|
|
140
|
+
entry_hash["idx"] = start_from_two ? i + 2 : i + 1
|
141
|
+
entry_hash["source_file"] = file
|
142
|
+
end
|
108
143
|
end
|
144
|
+
|
109
145
|
end
|
110
146
|
|
147
|
+
|
111
148
|
# Helper that generates a file out of `data:`.
|
112
149
|
# @raise Exception
|
113
150
|
# - if you try to provide `data:` in the wrong format.
|
@@ -127,7 +164,7 @@ module Eco
|
|
127
164
|
|
128
165
|
run = true
|
129
166
|
if Eco::API::Common::Session::FileManager.file_exists?(file)
|
130
|
-
prompt_user("
|
167
|
+
prompt_user("Do you want to overwrite it? (Y/n):", explanation: "The file '#{file}' already exists.", default: "Y") do |response|
|
131
168
|
run = (response == "") || reponse.upcase.start_with?("Y")
|
132
169
|
end
|
133
170
|
end
|
@@ -150,6 +187,17 @@ module Eco
|
|
150
187
|
|
151
188
|
private
|
152
189
|
|
190
|
+
def get_file_content(file, format, encoding)
|
191
|
+
unless Eco::API::Common::Session::FileManager.file_exists?(file)
|
192
|
+
logger.error("File does not exist: #{file}")
|
193
|
+
exit(1)
|
194
|
+
end
|
195
|
+
ext = File.extname(file)
|
196
|
+
encoding ||= Eco::API::Common::Session::FileManager.encoding(file)
|
197
|
+
encoding = (encoding != "utf-8")? "#{encoding}|utf-8": encoding
|
198
|
+
content = File.read(file, encoding: encoding)
|
199
|
+
end
|
200
|
+
|
153
201
|
def fatal(msg)
|
154
202
|
logger.fatal(msg)
|
155
203
|
raise msg
|
@@ -16,7 +16,7 @@ module Eco
|
|
16
16
|
CORE_ATTRS = ["id", "external_id", "email", "name", "supervisor_id", "filter_tags", "freemium"]
|
17
17
|
ACCOUNT_ATTRS = ["policy_group_ids", "default_tag", "send_invites", "landing_page_id", "login_provider_ids"]
|
18
18
|
TYPE = [:select, :text, :date, :number, :phone_number, :boolean, :multiple]
|
19
|
-
FORMAT = [:csv, :xml, :json]
|
19
|
+
FORMAT = [:csv, :xml, :json, :xls]
|
20
20
|
|
21
21
|
attr_reader :schema
|
22
22
|
attr_reader :details_attrs, :all_model_attrs
|
@@ -2,8 +2,11 @@ class ::Exception
|
|
2
2
|
def patch_full_message
|
3
3
|
begin
|
4
4
|
msg = []
|
5
|
-
|
6
|
-
|
5
|
+
tracing = backtrace ? backtrace : []
|
6
|
+
tracing = (self.class == SystemStackError) ? tracing[1..30] : tracing[1..-1]
|
7
|
+
tracing ||= []
|
8
|
+
msg << "\n#{tracing.first} \n#{message} (#{self.class.to_s})"
|
9
|
+
tracing.each_with_index {|bt, i| msg << "#{" "*8}#{i+1}: from #{bt}"}
|
7
10
|
msg.join("\n")
|
8
11
|
rescue Exception => e
|
9
12
|
puts "Something is wrong with 'patch_full_message': #{e}"
|
@@ -210,14 +210,20 @@ module Eco
|
|
210
210
|
to_h(:supervisor_id)
|
211
211
|
end
|
212
212
|
|
213
|
+
def group_by_schema
|
214
|
+
to_h do |person|
|
215
|
+
person.details && person.details.schema_id
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
213
219
|
def to_h(attr = "id")
|
214
220
|
super(attr || "id")
|
215
221
|
end
|
216
222
|
# @!endgroup
|
217
223
|
|
218
224
|
# @!group Helper methods
|
219
|
-
def
|
220
|
-
Eco::API::Organization::
|
225
|
+
def similarity
|
226
|
+
Eco::API::Organization::PeopleSimilarity.new(self.to_a)
|
221
227
|
end
|
222
228
|
# @!endgroup
|
223
229
|
|
@@ -13,7 +13,19 @@ module Eco
|
|
13
13
|
# @!group Config
|
14
14
|
# @return [String, Proc, nil] the target attribute to be read.
|
15
15
|
def attribute=(attr)
|
16
|
-
@attribute
|
16
|
+
@attribute = attr
|
17
|
+
end
|
18
|
+
|
19
|
+
def attribute
|
20
|
+
@attribute ||= :name
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns the target value to analyse
|
24
|
+
# @param person [Ecoportal::API::V1::Person]
|
25
|
+
def item_value(person)
|
26
|
+
return attr.call(item) if attribute.is_a?(Proc)
|
27
|
+
attr = attribute.to_sym
|
28
|
+
return item.send(attr) if item.respond_to?(attr)
|
17
29
|
end
|
18
30
|
|
19
31
|
# Define the order or relevant of per user matches
|
@@ -37,6 +49,16 @@ module Eco
|
|
37
49
|
@threshold ||= 0.15
|
38
50
|
end
|
39
51
|
|
52
|
+
# Generates a new object with same config but different base `data`.
|
53
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
54
|
+
def newFrom(data)
|
55
|
+
super(data).tap do |simil|
|
56
|
+
simil.threshold = threshold
|
57
|
+
simil.order = order
|
58
|
+
simil.attribute = attribute
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
40
62
|
# @!endgroup
|
41
63
|
|
42
64
|
# @!group Searchers
|
@@ -50,36 +72,158 @@ module Eco
|
|
50
72
|
end
|
51
73
|
end
|
52
74
|
|
75
|
+
# It returns all people with no name
|
76
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
77
|
+
def unnamed
|
78
|
+
select do |person|
|
79
|
+
person.name.to_s.strip.length < 2
|
80
|
+
end.yield_self do |results|
|
81
|
+
newFrom(results)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# It returns all people with no name
|
86
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
87
|
+
def named
|
88
|
+
reject do |person|
|
89
|
+
person.name.to_s.strip.length < 2
|
90
|
+
end.yield_self do |results|
|
91
|
+
newFrom(results)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# It returns all the entries with `attribute` empty
|
96
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
97
|
+
def blank_attribute
|
98
|
+
select do |person|
|
99
|
+
item_value(person).to_s.strip.length < 2
|
100
|
+
end.yield_self do |results|
|
101
|
+
newFrom(results)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# It returns all the entries with `attribute` **n0t** empty
|
106
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
107
|
+
def attribute_present
|
108
|
+
reject do |person|
|
109
|
+
item_value(person).to_s.strip.length < 2
|
110
|
+
end.yield_self do |results|
|
111
|
+
newFrom(results)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
53
115
|
# @!endgroup
|
54
116
|
|
55
|
-
# @!group
|
117
|
+
# @!group Analisys starters
|
56
118
|
|
57
119
|
# Analyses People bases on `options`
|
120
|
+
# @param needle_read [Proc, Symbol] when the value to read from `needle` object is different to the `:read` (`attribute`).
|
121
|
+
# This allows to for example, facet `needle.name` (needle_read) against `haystack_item.details[alt_id]` (read).
|
122
|
+
# @param keep_empty [Boolean] to indicate if it should get rid of people with no results (based on threshold)
|
58
123
|
# @return [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
|
59
|
-
def analyse(**options)
|
124
|
+
def analyse(needle_read: nil, keep_empty: false, **options)
|
60
125
|
options = { read: self.attribute }.merge(options)
|
126
|
+
total = count; i = 1
|
61
127
|
each_with_object({}) do |person, results|
|
62
|
-
|
128
|
+
needle_str = needle_read ? item_string(person, needle_read) : nil
|
129
|
+
results[person.id] = find_all_with_score(person, needle_str: needle_str, **options)
|
130
|
+
print_progress("Analysed", total, i)
|
131
|
+
i += 1
|
132
|
+
end.yield_self do |analysed|
|
133
|
+
analysed = clean_empty(analysed) unless keep_empty
|
134
|
+
#puts "... #{analysed.count} results after cleaning empty"
|
135
|
+
analysed
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# @!endgroup
|
140
|
+
|
141
|
+
# @!group Results Treatment
|
142
|
+
|
143
|
+
# Gets a new instance object of this class, with only people in results
|
144
|
+
# @param analysed [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
|
145
|
+
# @return [Eco::API::Organization::PeopleSimilarity]
|
146
|
+
def newSimilarity(analysed)
|
147
|
+
newFrom(people_in_results(analysed))
|
148
|
+
end
|
149
|
+
|
150
|
+
def people_in_results(analysed)
|
151
|
+
analysed.each_with_object([]) do |(id, results), people|
|
152
|
+
related = results.each_with_object([self[id]]) do |result, related|
|
153
|
+
related << result.match
|
154
|
+
end
|
155
|
+
related.each {|person| people << person unless people.include?(person)}
|
63
156
|
end
|
64
157
|
end
|
65
158
|
|
159
|
+
# Removes from results those that do not have similar entries
|
160
|
+
def clean_empty(analysed)
|
161
|
+
analysed.select do |id, results|
|
162
|
+
!results.empty?
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
# Helper to do some treatment fo the results
|
167
|
+
# @param analysed [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
|
168
|
+
# @return [Hash] where the _keys_ are the people `id`s and _values_ the `Eco::Data::FuzzyMatch::Results`
|
169
|
+
def with_analysed(analysed, keep_empty: false)
|
170
|
+
analysed.each_with_object({}) do |(id, results), reanalysed|
|
171
|
+
reanalysed[id] = yield(self[id], results)
|
172
|
+
end.yield_self do |reanalysed|
|
173
|
+
reanalysed = clean_empty(reanalysed) unless keep_empty
|
174
|
+
reanalysed
|
175
|
+
end.tap {|out| "with_analysed... returns #{out.count} records"}
|
176
|
+
end
|
177
|
+
|
66
178
|
# Launches a reanalyis on `analysed` based on `options`
|
67
179
|
# @param analysed [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
|
68
|
-
def
|
69
|
-
analysed
|
70
|
-
|
180
|
+
def rearrange(analysed, **options)
|
181
|
+
with_analysed(analysed) do |person, results|
|
182
|
+
results.relevant_results(**options)
|
71
183
|
end
|
72
184
|
end
|
73
185
|
|
74
|
-
#
|
186
|
+
# Reanalyses by using a block to treat the needle and item values
|
187
|
+
def reanalyse(analysed, msg: "Reanalysing", **options, &block)
|
188
|
+
options = { read: self.attribute }.merge(options)
|
189
|
+
total = analysed.count; i = 1
|
190
|
+
with_analysed(analysed) do |person, results|
|
191
|
+
print_progress(msg, total, i)
|
192
|
+
i += 1
|
193
|
+
recalculate_results(results, &block)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# Renalyses by ignoring matching words between the `needle` and those found in `results`
|
198
|
+
def ignore_matching_words(analysed, **options)
|
199
|
+
prompt = "Reanalysing by ignoring matching words"
|
200
|
+
reanalyse(analysed, msg: prompt, **options) do |needle_str, item_str, needle, item|
|
201
|
+
self.class.remove_matching_words(needle_str, item_str)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# Renalyses by ignoring matching words between the `needle` and those found in `results`
|
206
|
+
def ignore_matching_words_old(analysed, **options)
|
207
|
+
options = { read: self.attribute }.merge(options)
|
208
|
+
total = analysed.count; i = 1
|
209
|
+
with_analysed(analysed) do |person, results|
|
210
|
+
print_progress("Reanalysing by ignoring matching words", total, i)
|
211
|
+
i += 1
|
212
|
+
ignore_same_words_score(results, **options)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
# @!endgroup
|
217
|
+
|
218
|
+
# @!group Reporting Helpers
|
75
219
|
|
76
220
|
# @return [String] well structured text
|
77
|
-
def
|
221
|
+
def report(analysed, format: :txt)
|
78
222
|
case
|
79
223
|
when format == :txt
|
80
224
|
analysed.each_with_object("") do |(id, results), out|
|
81
225
|
msg = results.results.map {|r| r.print}.join("\n ")
|
82
|
-
"
|
226
|
+
out << "#{self[id].identify}:\n " + msg + "\n"
|
83
227
|
end
|
84
228
|
end
|
85
229
|
end
|
@@ -91,7 +235,7 @@ module Eco
|
|
91
235
|
def print_analysis(**options)
|
92
236
|
analysed = options[:analysed] || results_with_false_positives.analyse(**options)
|
93
237
|
analysed.each_with_object({}) do |(id, results), out|
|
94
|
-
puts
|
238
|
+
puts report(analysed)
|
95
239
|
end
|
96
240
|
end
|
97
241
|
# @!endgroup
|
@@ -105,6 +249,22 @@ module Eco
|
|
105
249
|
|
106
250
|
private
|
107
251
|
|
252
|
+
def print_progress(msg, total, num)
|
253
|
+
return unless total > 10
|
254
|
+
puts "" unless num > 1
|
255
|
+
@print_msg_len ||= 0
|
256
|
+
percent = (100 * num.to_f / total).round(1)
|
257
|
+
msg = " #{msg}: #{percent}% (#{num} of #{total})\r"
|
258
|
+
@print_msg_len = msg.length unless @print_msg_len > msg.length
|
259
|
+
print msg
|
260
|
+
$stdout.flush
|
261
|
+
if percent > 99.9
|
262
|
+
sleep(0.2)
|
263
|
+
print "#{" " * @print_msg_len}\r"
|
264
|
+
$stdout.flush
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
108
268
|
|
109
269
|
end
|
110
270
|
end
|