eco-helpers 2.0.19 → 2.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -1
- data/eco-helpers.gemspec +5 -1
- data/lib/eco/api/common/loaders/parser.rb +1 -0
- data/lib/eco/api/common/people/entries.rb +1 -0
- data/lib/eco/api/common/people/entry_factory.rb +49 -15
- data/lib/eco/api/common/version_patches/exception.rb +5 -2
- data/lib/eco/api/organization/people.rb +2 -2
- data/lib/eco/api/organization/people_similarity.rb +171 -11
- data/lib/eco/api/organization/tag_tree.rb +33 -0
- data/lib/eco/api/session.rb +4 -2
- data/lib/eco/api/usecases/default_cases.rb +1 -0
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +189 -19
- data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
- data/lib/eco/cli/config/default/options.rb +29 -1
- data/lib/eco/cli/config/default/people.rb +18 -24
- data/lib/eco/cli/config/default/usecases.rb +31 -2
- data/lib/eco/cli/config/default/workflow.rb +7 -5
- data/lib/eco/csv/table.rb +121 -21
- data/lib/eco/data/fuzzy_match.rb +52 -12
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +13 -9
- data/lib/eco/data/fuzzy_match/pairing.rb +12 -18
- data/lib/eco/data/fuzzy_match/result.rb +15 -1
- data/lib/eco/data/fuzzy_match/results.rb +18 -0
- data/lib/eco/data/fuzzy_match/score.rb +12 -7
- data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
- data/lib/eco/version.rb +1 -1
- metadata +83 -2
data/lib/eco/api/session.rb
CHANGED
@@ -106,11 +106,13 @@ module Eco
|
|
106
106
|
# @param attr [String] type (`Symbol`) or attribute (`String`) to target a specific parser.
|
107
107
|
# @param source [Any] source value to be parsed.
|
108
108
|
# @param phase [Symbol] the phase when this parser should be active.
|
109
|
-
|
109
|
+
# @param phase [Symbol] the phase when this parser should be active.
|
110
|
+
# @return [Object] the parsed attribute.
|
111
|
+
def parse_attribute(attr, source, phase = :internal, deps: {})
|
110
112
|
unless parsers = entry_factory.person_parser
|
111
113
|
raise "There are no parsers defined"
|
112
114
|
end
|
113
|
-
parsers.parse(attr, source, phase)
|
115
|
+
parsers.parse(attr, source, phase, deps: deps)
|
114
116
|
end
|
115
117
|
|
116
118
|
# @see Eco::API::Common::People::EntryFactory#export
|
@@ -13,6 +13,7 @@ require_relative 'default_cases/abstract_policygroup_abilities_case.rb'
|
|
13
13
|
require_relative 'default_cases/analyse_people_case'
|
14
14
|
require_relative 'default_cases/append_usergroups_case'
|
15
15
|
require_relative 'default_cases/change_email_case'
|
16
|
+
require_relative 'default_cases/clean_unknown_tags_case'
|
16
17
|
require_relative 'default_cases/codes_to_tags_case'
|
17
18
|
require_relative 'default_cases/create_case'
|
18
19
|
require_relative 'default_cases/create_details_case'
|
@@ -5,41 +5,158 @@ class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loader
|
|
5
5
|
attr_reader :session, :people, :options
|
6
6
|
|
7
7
|
def main(people, session, options, usecase)
|
8
|
+
options[:end_get] = false
|
8
9
|
@session = session; @options = options; @people = people
|
9
10
|
|
10
|
-
|
11
|
+
case
|
12
|
+
when case_options[:identify_duplicates]
|
13
|
+
identify_duplicates
|
14
|
+
when case_options[:identify_unnamed]
|
15
|
+
identify_unnamed
|
16
|
+
else
|
17
|
+
session.logger.info("No analysis operation was specified")
|
18
|
+
end.tap do |people_involved|
|
19
|
+
if people_involved
|
20
|
+
to_csv(people_involved) if to_csv?
|
21
|
+
create_people_backup(people_involved) if results_people_backup?
|
22
|
+
end
|
23
|
+
end
|
11
24
|
end
|
12
25
|
|
13
26
|
private
|
14
27
|
|
15
|
-
def
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
analytics.re_analyse(analysed, threshold: 0.5, order: [:average])
|
28
|
+
def identify_unnamed
|
29
|
+
similarity_analytics.unnamed.tap do |unnamed|
|
30
|
+
if unnamed.empty?
|
31
|
+
session.logger.info("There were no people with no name!!")
|
32
|
+
end
|
33
|
+
end
|
22
34
|
end
|
23
35
|
|
24
|
-
def
|
25
|
-
|
36
|
+
def identify_duplicates
|
37
|
+
analysed = similarity_screening
|
38
|
+
if case_options[:ignore_matching_words]
|
39
|
+
puts "Fine tune results by ignoring matching words..."
|
40
|
+
analysed = strict_similarity(analysed)
|
41
|
+
end
|
42
|
+
|
43
|
+
similarity_analytics.newSimilarity(analysed).tap do |related_people|
|
44
|
+
if related_people.empty?
|
45
|
+
session.logger.info("There were no possible duplicates identified!!")
|
46
|
+
else
|
47
|
+
report = similarity_analytics.report(analysed, format: :txt)
|
48
|
+
save!(report)
|
49
|
+
end
|
50
|
+
end
|
26
51
|
end
|
27
52
|
|
28
|
-
def
|
29
|
-
|
53
|
+
def strict_similarity(analysed)
|
54
|
+
similarity_analytics.ignore_matching_words(analysed, **{
|
55
|
+
threshold: 0.5,
|
56
|
+
order: [:ngrams]
|
57
|
+
})
|
30
58
|
end
|
31
59
|
|
32
|
-
def
|
33
|
-
|
34
|
-
|
35
|
-
|
60
|
+
def similarity_screening
|
61
|
+
similarity_analytics.attribute = field_similarity
|
62
|
+
options = {
|
63
|
+
threshold: 0.4,
|
64
|
+
order: [:average, :dice]
|
65
|
+
}.tap do |opts|
|
66
|
+
opts.merge!(needle_read: facet_field_proc) if facet_field?
|
67
|
+
opts.merge!(unique_words: true) if unique_words?
|
68
|
+
end
|
69
|
+
analysed = similarity_analytics.analyse(**options)
|
70
|
+
puts "Got #{analysed.count} results after basic screening with #{options}"
|
71
|
+
|
72
|
+
return analysed if case_options[:only_screening]
|
73
|
+
options = {threshold: 0.5, order: [:average]}
|
74
|
+
puts "Going to rearrange results... with #{options}"
|
75
|
+
similarity_analytics.rearrange(analysed, **options).tap do |analysed|
|
76
|
+
puts "... got #{analysed.count} results after rearranging"
|
36
77
|
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def similarity_analytics
|
81
|
+
@analytics ||= people.similarity
|
82
|
+
end
|
83
|
+
|
84
|
+
def create_people_backup(cut = people, file = results_people_backup)
|
85
|
+
session.file_manager.save_json(cut, file)
|
86
|
+
end
|
87
|
+
|
88
|
+
def to_csv(data = people, file = csv_file)
|
89
|
+
opts = {}
|
90
|
+
opts.deep_merge!(export: {file: {name: file, format: :csv}})
|
91
|
+
opts.deep_merge!(export: {options: {nice_header: true}})
|
92
|
+
opts.deep_merge!(export: {options: {internal_names: true}})
|
93
|
+
#opts.deep_merge!(export: {options: {split_schemas: true}})
|
94
|
+
session.process_case("to-csv", type: :export, people: data, options: opts.merge(options.slice(:export)))
|
95
|
+
end
|
96
|
+
|
97
|
+
def unique_words?
|
98
|
+
case_options[:unique_words]
|
99
|
+
end
|
100
|
+
|
101
|
+
def field_similarity
|
102
|
+
return :name unless use_field?
|
103
|
+
use_field_proc
|
104
|
+
end
|
105
|
+
|
106
|
+
def use_field_proc
|
107
|
+
proc_value_access(use_field)
|
108
|
+
end
|
109
|
+
|
110
|
+
def facet_field_proc
|
111
|
+
proc_value_access(facet_field)
|
112
|
+
end
|
113
|
+
|
114
|
+
def use_field
|
115
|
+
case_options.dig(:use_field)
|
116
|
+
end
|
37
117
|
|
38
|
-
|
118
|
+
def use_field?
|
119
|
+
!!use_field
|
120
|
+
end
|
121
|
+
|
122
|
+
def facet_field
|
123
|
+
case_options.dig(:facet_field)
|
124
|
+
end
|
125
|
+
|
126
|
+
def facet_field?
|
127
|
+
!!facet_field
|
128
|
+
end
|
129
|
+
|
130
|
+
def csv_file
|
131
|
+
case_options.dig(:csv_file)
|
132
|
+
end
|
133
|
+
|
134
|
+
def to_csv?
|
135
|
+
!!csv_file
|
136
|
+
end
|
137
|
+
|
138
|
+
def results_people_backup
|
139
|
+
case_options.dig(:backup_people)
|
140
|
+
end
|
141
|
+
|
142
|
+
def results_people_backup?
|
143
|
+
!!results_people_backup
|
144
|
+
end
|
145
|
+
|
146
|
+
def case_options
|
147
|
+
options.dig(:usecase, :analyse_people) || {}
|
148
|
+
end
|
39
149
|
|
40
|
-
|
150
|
+
def output_file
|
151
|
+
@output_file ||= options.dig(:output, :file) || "analytics.txt"
|
152
|
+
end
|
153
|
+
|
154
|
+
def save!(data)
|
155
|
+
ext = File.extname(output_file).downcase.delete(".")
|
156
|
+
session.logger.info("Generating file '#{output_file}'")
|
157
|
+
File.open(output_file, "w") do |fd|
|
41
158
|
if ext == "txt"
|
42
|
-
fd <<
|
159
|
+
fd << data
|
43
160
|
elsif ext == "html"
|
44
161
|
puts "html is still not supported"
|
45
162
|
exit(1)
|
@@ -50,4 +167,57 @@ class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loader
|
|
50
167
|
end
|
51
168
|
end
|
52
169
|
|
170
|
+
# A way to use command line to specify part
|
171
|
+
# => i.e. details[first-name] AND details[surname]
|
172
|
+
def proc_value_access(expression)
|
173
|
+
#return expression.to_sym if expression.start_with?(":")
|
174
|
+
subexpressions = expression.split(" AND ")
|
175
|
+
Proc.new do |person|
|
176
|
+
values = subexpressions.map {|exp| attribute_access(person, exp)}
|
177
|
+
values.compact.join(" ")
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# A way to use command line to specify part
|
182
|
+
# => i.e. person.details[first-name]
|
183
|
+
def attribute_access(person, expression)
|
184
|
+
parts = expression.split(".")
|
185
|
+
parts_to_value(person, parts).tap do |value|
|
186
|
+
unless value.is_a?(String) || !value
|
187
|
+
raise "Something is wrong with #{expression} to parts #{parts}. Expecting String, obtained: #{value.class}"
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def parts_to_value(obj, parts)
|
193
|
+
parts.reduce(obj) do |object, part|
|
194
|
+
get_attr(object, part)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def get_attr(obj, part)
|
199
|
+
case
|
200
|
+
when !obj
|
201
|
+
nil
|
202
|
+
when part.is_a?(Symbol) || obj.respond_to?(part.to_sym)
|
203
|
+
obj.send(part.to_sym)
|
204
|
+
when part.start_with?(":")
|
205
|
+
get_attr(obj, part[1..-1])
|
206
|
+
when part.start_with?("details[")
|
207
|
+
if (obj.respond_to?(:details)) && details = obj.details
|
208
|
+
if match = part.match(/details\[(?<field>.*)\]/)
|
209
|
+
details[match[:field]]
|
210
|
+
else
|
211
|
+
raise "Review your -use-field expression. It should read: person.details[target-alt_id]"
|
212
|
+
end
|
213
|
+
end
|
214
|
+
when part.start_with?("account")
|
215
|
+
obj.account if obj.respond_to?(:account)
|
216
|
+
when part.start_with?("person")
|
217
|
+
obj
|
218
|
+
else
|
219
|
+
raise "Review your expression. Cannot recognize '#{part}' as part of '#{obj.class}'"
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
53
223
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
class Eco::API::UseCases::DefaultCases::CleanUnknownTags < Eco::API::Common::Loaders::UseCase
|
2
|
+
name "clean-unknown-tags"
|
3
|
+
type :transform
|
4
|
+
|
5
|
+
REGISTER_TAGS = [
|
6
|
+
"EVENT", "INJURY", "RISK", "CONTRACTOR", "PERMIT",
|
7
|
+
"AUDIT", "JSEA",
|
8
|
+
"TRAINING", "INDUCTION",
|
9
|
+
"MEETING", "PPE", "CHEMICAL",
|
10
|
+
"PLANT", "ASSET",
|
11
|
+
"POLICY", "IDEA", "REPORTS"
|
12
|
+
]
|
13
|
+
|
14
|
+
attr_reader :session, :options
|
15
|
+
|
16
|
+
def main(people, session, options, usecase)
|
17
|
+
@session = session; @options = options
|
18
|
+
|
19
|
+
update = session.new_job("main", "update", :update, usecase)
|
20
|
+
people.each do |person|
|
21
|
+
unknown_tags = person.filter_tags.select {|tag| !tag?(tag)}
|
22
|
+
person.filter_tags -= unknown_tags
|
23
|
+
update.add(person)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def tag?(value)
|
30
|
+
tagtree.tag?(value) || REGISTER_TAGS.any? {|reg| value == reg}
|
31
|
+
end
|
32
|
+
|
33
|
+
def tagtree
|
34
|
+
@tagtree ||= ASSETS.config.tagtree
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
@@ -18,6 +18,12 @@ ASSETS.cli.config do |cnf|
|
|
18
18
|
exit
|
19
19
|
end
|
20
20
|
|
21
|
+
desc = "Redirect Standard Ouput to file"
|
22
|
+
options_set.add("-stdout", desc) do |options, session|
|
23
|
+
file = SCR.get_arg("-stdout", with_param: true) || "output.txt"
|
24
|
+
STDOUT.reopen(file, "w+")
|
25
|
+
end
|
26
|
+
|
21
27
|
desc = "Fix the current session to work with this schema"
|
22
28
|
options_set.add("-schema-id", desc) do |options, session|
|
23
29
|
sch_name = SCR.get_arg("-schema-id", with_param: true)
|
@@ -41,6 +47,18 @@ ASSETS.cli.config do |cnf|
|
|
41
47
|
options.deep_merge!(input: {entries_from: true})
|
42
48
|
end
|
43
49
|
|
50
|
+
desc = "Used to only get the people from the input file. It will also include their current and new supervisors."
|
51
|
+
options_set.add("-get-partial", desc) do |options, session|
|
52
|
+
options.deep_merge!(people: {
|
53
|
+
get: {from: :remote, type: :partial}
|
54
|
+
})
|
55
|
+
end
|
56
|
+
|
57
|
+
desc = "Do not load any people for this run."
|
58
|
+
options_set.add("-no-people", desc) do |options, session|
|
59
|
+
options.deep_merge!(people: {get: false})
|
60
|
+
end
|
61
|
+
|
44
62
|
desc = "Locally cache all the people manager by retrieving from the server"
|
45
63
|
options_set.add("-get-people", desc) do |options, session|
|
46
64
|
options.deep_merge!(people: {
|
@@ -48,7 +66,17 @@ ASSETS.cli.config do |cnf|
|
|
48
66
|
})
|
49
67
|
end
|
50
68
|
|
51
|
-
|
69
|
+
desc = "Used to specify the cache file of people to be used. "
|
70
|
+
desc += "It is useful to use as people reference those stored in cached file diffrent to the last one."
|
71
|
+
options_set.add("-people-from-backup", desc) do |options, session|
|
72
|
+
file = SCR.get_file("-people-from-backup", required: true, should_exist: true)
|
73
|
+
options.deep_merge!(people: {
|
74
|
+
get: {from: :local, type: :file, file: file}
|
75
|
+
})
|
76
|
+
end
|
77
|
+
|
78
|
+
desc = "Runs in dry-run (no requests sent to server)"
|
79
|
+
options_set.add(["-dry-run", "-simulate"], desc) do |options, session|
|
52
80
|
options[:dry_run] = true
|
53
81
|
options[:simulate] = true
|
54
82
|
session.config.dry_run!
|
@@ -1,29 +1,23 @@
|
|
1
1
|
ASSETS.cli.config do |cnf|
|
2
2
|
cnf.people do |input, session, options|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
people
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
options.deep_merge!(people: {
|
14
|
-
get: {from: :local, type: :backup}
|
15
|
-
})
|
16
|
-
people = JSON.parse(File.read(file))
|
17
|
-
people = Eco::API::Organization::People.new(people)
|
18
|
-
elsif SCR.get_arg("-get-partial")
|
19
|
-
unless input && input.is_a?(Enumerable)
|
3
|
+
get = options.dig(:people, :get) || {}
|
4
|
+
case
|
5
|
+
when get == false
|
6
|
+
Eco::API::Organization::People.new([])
|
7
|
+
when (get[:from] == :remote) && get[:type] == :full
|
8
|
+
# -get-people
|
9
|
+
session.micro.people_cache
|
10
|
+
when (get[:from] == :remote) && get[:type] == :partial
|
11
|
+
# -get-partial
|
12
|
+
unless (input && input.is_a?(Enumerable))
|
20
13
|
raise "To use -get-partial (partial updates), you need to use -entries-from"
|
21
14
|
end
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
people =
|
15
|
+
session.micro.people_search(input, options: options)
|
16
|
+
when (get[:from] == :local) && get[:type] == :file
|
17
|
+
# -people-from-backup
|
18
|
+
session.micro.people_load(get[:file], modifier: :file)
|
19
|
+
#people = JSON.parse(File.read(get[:file]))
|
20
|
+
#Eco::API::Organization::People.new(people)
|
27
21
|
else
|
28
22
|
options.deep_merge!(people: {
|
29
23
|
get: {from: :local, type: :full}
|
@@ -33,9 +27,9 @@ ASSETS.cli.config do |cnf|
|
|
33
27
|
options.deep_merge!(people: {
|
34
28
|
get: {from: :remote, type: :full}
|
35
29
|
})
|
36
|
-
people = session.micro.people_cache
|
30
|
+
people = session.micro.people_cache
|
37
31
|
end
|
32
|
+
people
|
38
33
|
end
|
39
|
-
people
|
40
34
|
end
|
41
35
|
end
|
@@ -26,11 +26,36 @@ ASSETS.cli.config do |cnf|
|
|
26
26
|
end
|
27
27
|
|
28
28
|
desc = "Provides a set of tools to analyse a set of people (i.e. detect duplicates)"
|
29
|
-
cases.add("-analyse-people", :export, desc, case_name: "
|
29
|
+
cases.add("-analyse-people", :export, desc, case_name: "analyse-people") do |people, session, options|
|
30
30
|
options.deep_merge!(output: {file: "people_analysis.txt"}) unless options.dig(:output, :file)
|
31
|
-
|
31
|
+
#unless options.dig(:usecase, :analyse_people, :use_field)
|
32
|
+
# options.deep_merge!(usecase: {analyse_people: {use_field: :name}})
|
33
|
+
#end
|
34
|
+
end.add_option("-to", "Specify the output file.") do |options|
|
32
35
|
file = SCR.get_file("-to", required: true, should_exist: false)
|
33
36
|
options.deep_merge!(output: {file: file})
|
37
|
+
end.add_option("-identify-duplicates", "Generates a list of people with possible duplicates.") do |options|
|
38
|
+
options.deep_merge!(usecase: {analyse_people: {identify_duplicates: true}})
|
39
|
+
end.add_option("-use-field", "Works with -identify-duplicates. Sets field to be used in the comparison.") do |options|
|
40
|
+
expression = SCR.get_arg("-use-field", with_param: true)
|
41
|
+
options.deep_merge!(usecase: {analyse_people: {use_field: expression}})
|
42
|
+
end.add_option("-facet-field", "Works with -identify-duplicates. Adds an additional layer of comparison.") do |options|
|
43
|
+
expression = SCR.get_arg("-facet-field", with_param: true)
|
44
|
+
options.deep_merge!(usecase: {analyse_people: {facet_field: expression}})
|
45
|
+
end.add_option("-only-screening", "Works with -identify-duplicates. Skips the rearrangement stage.") do |options|
|
46
|
+
options.deep_merge!(usecase: {analyse_people: {only_screening: true}})
|
47
|
+
end.add_option("-ignore-matching-words", "Works with -identify-duplicates. Re-adjust scores ignoring matching words.") do |options|
|
48
|
+
options.deep_merge!(usecase: {analyse_people: {ignore_matching_words: true}})
|
49
|
+
end.add_option("-unique-words", "Works with -identify-duplicates. Re-adjust the comparing strings to do not have repeated words.") do |options|
|
50
|
+
options.deep_merge!(usecase: {analyse_people: {unique_words: true}})
|
51
|
+
end.add_option("-identify-unnamed", "Identifies all people with no names.") do |options|
|
52
|
+
options.deep_merge!(usecase: {analyse_people: {identify_unnamed: true}})
|
53
|
+
end.add_option("-backup-people-results", "Generates a json file with all the people involved in the final results of the analysis.") do |options|
|
54
|
+
file = SCR.get_file("-backup-people-results", required: true, should_exist: false)
|
55
|
+
options.deep_merge!(usecase: {analyse_people: {backup_people: File.expand_path(file)}})
|
56
|
+
end.add_option("-to-csv", "Genarates a CSV file with all people of the final results.") do |options|
|
57
|
+
file = SCR.get_file("-to-csv", required: true, should_exist: false) || "Results.csv"
|
58
|
+
options.deep_merge!(usecase: {analyse_people: {csv_file: File.expand_path(file)}})
|
34
59
|
end
|
35
60
|
|
36
61
|
desc = "It exports to a CSV the (filtered) people"
|
@@ -62,6 +87,10 @@ ASSETS.cli.config do |cnf|
|
|
62
87
|
options.deep_merge!(other: {file: {codes_column: col_codes}})
|
63
88
|
end
|
64
89
|
|
90
|
+
desc = "Cleans from filter_tags those tags that are not present in the tagtree (as per tagtree.json file)."
|
91
|
+
desc += " It will preserve standard register tags of most common registers (i.e. EVENT, RISK)."
|
92
|
+
cases.add("-clean-unknown-tags", :transform, desc, case_name: "clean-unknown-tags")
|
93
|
+
|
65
94
|
desc = "Removes the landing page or sets it to -page-id"
|
66
95
|
cases.add("-reset-landing-page", :transform, desc, case_name: "reset-landing-page")
|
67
96
|
.add_option("-page-id", "Target landing page to set to the users") do |options|
|