eco-helpers 2.0.19 → 2.0.21
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -1
- data/eco-helpers.gemspec +5 -1
- data/lib/eco/api/common/loaders/parser.rb +1 -0
- data/lib/eco/api/common/people/entries.rb +1 -0
- data/lib/eco/api/common/people/entry_factory.rb +49 -15
- data/lib/eco/api/common/version_patches/exception.rb +5 -2
- data/lib/eco/api/organization/people.rb +2 -2
- data/lib/eco/api/organization/people_similarity.rb +171 -11
- data/lib/eco/api/organization/tag_tree.rb +33 -0
- data/lib/eco/api/session.rb +4 -2
- data/lib/eco/api/usecases/default_cases.rb +1 -0
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +189 -19
- data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
- data/lib/eco/cli/config/default/options.rb +29 -1
- data/lib/eco/cli/config/default/people.rb +18 -24
- data/lib/eco/cli/config/default/usecases.rb +31 -2
- data/lib/eco/cli/config/default/workflow.rb +7 -5
- data/lib/eco/csv/table.rb +121 -21
- data/lib/eco/data/fuzzy_match.rb +52 -12
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +13 -9
- data/lib/eco/data/fuzzy_match/pairing.rb +12 -18
- data/lib/eco/data/fuzzy_match/result.rb +15 -1
- data/lib/eco/data/fuzzy_match/results.rb +18 -0
- data/lib/eco/data/fuzzy_match/score.rb +12 -7
- data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
- data/lib/eco/version.rb +1 -1
- metadata +83 -2
data/lib/eco/api/session.rb
CHANGED
@@ -106,11 +106,13 @@ module Eco
|
|
106
106
|
# @param attr [String] type (`Symbol`) or attribute (`String`) to target a specific parser.
|
107
107
|
# @param source [Any] source value to be parsed.
|
108
108
|
# @param phase [Symbol] the phase when this parser should be active.
|
109
|
-
|
109
|
+
# @param phase [Symbol] the phase when this parser should be active.
|
110
|
+
# @return [Object] the parsed attribute.
|
111
|
+
def parse_attribute(attr, source, phase = :internal, deps: {})
|
110
112
|
unless parsers = entry_factory.person_parser
|
111
113
|
raise "There are no parsers defined"
|
112
114
|
end
|
113
|
-
parsers.parse(attr, source, phase)
|
115
|
+
parsers.parse(attr, source, phase, deps: deps)
|
114
116
|
end
|
115
117
|
|
116
118
|
# @see Eco::API::Common::People::EntryFactory#export
|
@@ -13,6 +13,7 @@ require_relative 'default_cases/abstract_policygroup_abilities_case.rb'
|
|
13
13
|
require_relative 'default_cases/analyse_people_case'
|
14
14
|
require_relative 'default_cases/append_usergroups_case'
|
15
15
|
require_relative 'default_cases/change_email_case'
|
16
|
+
require_relative 'default_cases/clean_unknown_tags_case'
|
16
17
|
require_relative 'default_cases/codes_to_tags_case'
|
17
18
|
require_relative 'default_cases/create_case'
|
18
19
|
require_relative 'default_cases/create_details_case'
|
@@ -5,41 +5,158 @@ class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loader
|
|
5
5
|
attr_reader :session, :people, :options
|
6
6
|
|
7
7
|
def main(people, session, options, usecase)
|
8
|
+
options[:end_get] = false
|
8
9
|
@session = session; @options = options; @people = people
|
9
10
|
|
10
|
-
|
11
|
+
case
|
12
|
+
when case_options[:identify_duplicates]
|
13
|
+
identify_duplicates
|
14
|
+
when case_options[:identify_unnamed]
|
15
|
+
identify_unnamed
|
16
|
+
else
|
17
|
+
session.logger.info("No analysis operation was specified")
|
18
|
+
end.tap do |people_involved|
|
19
|
+
if people_involved
|
20
|
+
to_csv(people_involved) if to_csv?
|
21
|
+
create_people_backup(people_involved) if results_people_backup?
|
22
|
+
end
|
23
|
+
end
|
11
24
|
end
|
12
25
|
|
13
26
|
private
|
14
27
|
|
15
|
-
def
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
analytics.re_analyse(analysed, threshold: 0.5, order: [:average])
|
28
|
+
def identify_unnamed
|
29
|
+
similarity_analytics.unnamed.tap do |unnamed|
|
30
|
+
if unnamed.empty?
|
31
|
+
session.logger.info("There were no people with no name!!")
|
32
|
+
end
|
33
|
+
end
|
22
34
|
end
|
23
35
|
|
24
|
-
def
|
25
|
-
|
36
|
+
def identify_duplicates
|
37
|
+
analysed = similarity_screening
|
38
|
+
if case_options[:ignore_matching_words]
|
39
|
+
puts "Fine tune results by ignoring matching words..."
|
40
|
+
analysed = strict_similarity(analysed)
|
41
|
+
end
|
42
|
+
|
43
|
+
similarity_analytics.newSimilarity(analysed).tap do |related_people|
|
44
|
+
if related_people.empty?
|
45
|
+
session.logger.info("There were no possible duplicates identified!!")
|
46
|
+
else
|
47
|
+
report = similarity_analytics.report(analysed, format: :txt)
|
48
|
+
save!(report)
|
49
|
+
end
|
50
|
+
end
|
26
51
|
end
|
27
52
|
|
28
|
-
def
|
29
|
-
|
53
|
+
def strict_similarity(analysed)
|
54
|
+
similarity_analytics.ignore_matching_words(analysed, **{
|
55
|
+
threshold: 0.5,
|
56
|
+
order: [:ngrams]
|
57
|
+
})
|
30
58
|
end
|
31
59
|
|
32
|
-
def
|
33
|
-
|
34
|
-
|
35
|
-
|
60
|
+
def similarity_screening
|
61
|
+
similarity_analytics.attribute = field_similarity
|
62
|
+
options = {
|
63
|
+
threshold: 0.4,
|
64
|
+
order: [:average, :dice]
|
65
|
+
}.tap do |opts|
|
66
|
+
opts.merge!(needle_read: facet_field_proc) if facet_field?
|
67
|
+
opts.merge!(unique_words: true) if unique_words?
|
68
|
+
end
|
69
|
+
analysed = similarity_analytics.analyse(**options)
|
70
|
+
puts "Got #{analysed.count} results after basic screening with #{options}"
|
71
|
+
|
72
|
+
return analysed if case_options[:only_screening]
|
73
|
+
options = {threshold: 0.5, order: [:average]}
|
74
|
+
puts "Going to rearrange results... with #{options}"
|
75
|
+
similarity_analytics.rearrange(analysed, **options).tap do |analysed|
|
76
|
+
puts "... got #{analysed.count} results after rearranging"
|
36
77
|
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def similarity_analytics
|
81
|
+
@analytics ||= people.similarity
|
82
|
+
end
|
83
|
+
|
84
|
+
def create_people_backup(cut = people, file = results_people_backup)
|
85
|
+
session.file_manager.save_json(cut, file)
|
86
|
+
end
|
87
|
+
|
88
|
+
def to_csv(data = people, file = csv_file)
|
89
|
+
opts = {}
|
90
|
+
opts.deep_merge!(export: {file: {name: file, format: :csv}})
|
91
|
+
opts.deep_merge!(export: {options: {nice_header: true}})
|
92
|
+
opts.deep_merge!(export: {options: {internal_names: true}})
|
93
|
+
#opts.deep_merge!(export: {options: {split_schemas: true}})
|
94
|
+
session.process_case("to-csv", type: :export, people: data, options: opts.merge(options.slice(:export)))
|
95
|
+
end
|
96
|
+
|
97
|
+
def unique_words?
|
98
|
+
case_options[:unique_words]
|
99
|
+
end
|
100
|
+
|
101
|
+
def field_similarity
|
102
|
+
return :name unless use_field?
|
103
|
+
use_field_proc
|
104
|
+
end
|
105
|
+
|
106
|
+
def use_field_proc
|
107
|
+
proc_value_access(use_field)
|
108
|
+
end
|
109
|
+
|
110
|
+
def facet_field_proc
|
111
|
+
proc_value_access(facet_field)
|
112
|
+
end
|
113
|
+
|
114
|
+
def use_field
|
115
|
+
case_options.dig(:use_field)
|
116
|
+
end
|
37
117
|
|
38
|
-
|
118
|
+
def use_field?
|
119
|
+
!!use_field
|
120
|
+
end
|
121
|
+
|
122
|
+
def facet_field
|
123
|
+
case_options.dig(:facet_field)
|
124
|
+
end
|
125
|
+
|
126
|
+
def facet_field?
|
127
|
+
!!facet_field
|
128
|
+
end
|
129
|
+
|
130
|
+
def csv_file
|
131
|
+
case_options.dig(:csv_file)
|
132
|
+
end
|
133
|
+
|
134
|
+
def to_csv?
|
135
|
+
!!csv_file
|
136
|
+
end
|
137
|
+
|
138
|
+
def results_people_backup
|
139
|
+
case_options.dig(:backup_people)
|
140
|
+
end
|
141
|
+
|
142
|
+
def results_people_backup?
|
143
|
+
!!results_people_backup
|
144
|
+
end
|
145
|
+
|
146
|
+
def case_options
|
147
|
+
options.dig(:usecase, :analyse_people) || {}
|
148
|
+
end
|
39
149
|
|
40
|
-
|
150
|
+
def output_file
|
151
|
+
@output_file ||= options.dig(:output, :file) || "analytics.txt"
|
152
|
+
end
|
153
|
+
|
154
|
+
def save!(data)
|
155
|
+
ext = File.extname(output_file).downcase.delete(".")
|
156
|
+
session.logger.info("Generating file '#{output_file}'")
|
157
|
+
File.open(output_file, "w") do |fd|
|
41
158
|
if ext == "txt"
|
42
|
-
fd <<
|
159
|
+
fd << data
|
43
160
|
elsif ext == "html"
|
44
161
|
puts "html is still not supported"
|
45
162
|
exit(1)
|
@@ -50,4 +167,57 @@ class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loader
|
|
50
167
|
end
|
51
168
|
end
|
52
169
|
|
170
|
+
# A way to use command line to specify part
|
171
|
+
# => i.e. details[first-name] AND details[surname]
|
172
|
+
def proc_value_access(expression)
|
173
|
+
#return expression.to_sym if expression.start_with?(":")
|
174
|
+
subexpressions = expression.split(" AND ")
|
175
|
+
Proc.new do |person|
|
176
|
+
values = subexpressions.map {|exp| attribute_access(person, exp)}
|
177
|
+
values.compact.join(" ")
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# A way to use command line to specify part
|
182
|
+
# => i.e. person.details[first-name]
|
183
|
+
def attribute_access(person, expression)
|
184
|
+
parts = expression.split(".")
|
185
|
+
parts_to_value(person, parts).tap do |value|
|
186
|
+
unless value.is_a?(String) || !value
|
187
|
+
raise "Something is wrong with #{expression} to parts #{parts}. Expecting String, obtained: #{value.class}"
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def parts_to_value(obj, parts)
|
193
|
+
parts.reduce(obj) do |object, part|
|
194
|
+
get_attr(object, part)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def get_attr(obj, part)
|
199
|
+
case
|
200
|
+
when !obj
|
201
|
+
nil
|
202
|
+
when part.is_a?(Symbol) || obj.respond_to?(part.to_sym)
|
203
|
+
obj.send(part.to_sym)
|
204
|
+
when part.start_with?(":")
|
205
|
+
get_attr(obj, part[1..-1])
|
206
|
+
when part.start_with?("details[")
|
207
|
+
if (obj.respond_to?(:details)) && details = obj.details
|
208
|
+
if match = part.match(/details\[(?<field>.*)\]/)
|
209
|
+
details[match[:field]]
|
210
|
+
else
|
211
|
+
raise "Review your -use-field expression. It should read: person.details[target-alt_id]"
|
212
|
+
end
|
213
|
+
end
|
214
|
+
when part.start_with?("account")
|
215
|
+
obj.account if obj.respond_to?(:account)
|
216
|
+
when part.start_with?("person")
|
217
|
+
obj
|
218
|
+
else
|
219
|
+
raise "Review your expression. Cannot recognize '#{part}' as part of '#{obj.class}'"
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
53
223
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
class Eco::API::UseCases::DefaultCases::CleanUnknownTags < Eco::API::Common::Loaders::UseCase
|
2
|
+
name "clean-unknown-tags"
|
3
|
+
type :transform
|
4
|
+
|
5
|
+
REGISTER_TAGS = [
|
6
|
+
"EVENT", "INJURY", "RISK", "CONTRACTOR", "PERMIT",
|
7
|
+
"AUDIT", "JSEA",
|
8
|
+
"TRAINING", "INDUCTION",
|
9
|
+
"MEETING", "PPE", "CHEMICAL",
|
10
|
+
"PLANT", "ASSET",
|
11
|
+
"POLICY", "IDEA", "REPORTS"
|
12
|
+
]
|
13
|
+
|
14
|
+
attr_reader :session, :options
|
15
|
+
|
16
|
+
def main(people, session, options, usecase)
|
17
|
+
@session = session; @options = options
|
18
|
+
|
19
|
+
update = session.new_job("main", "update", :update, usecase)
|
20
|
+
people.each do |person|
|
21
|
+
unknown_tags = person.filter_tags.select {|tag| !tag?(tag)}
|
22
|
+
person.filter_tags -= unknown_tags
|
23
|
+
update.add(person)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def tag?(value)
|
30
|
+
tagtree.tag?(value) || REGISTER_TAGS.any? {|reg| value == reg}
|
31
|
+
end
|
32
|
+
|
33
|
+
def tagtree
|
34
|
+
@tagtree ||= ASSETS.config.tagtree
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
@@ -18,6 +18,12 @@ ASSETS.cli.config do |cnf|
|
|
18
18
|
exit
|
19
19
|
end
|
20
20
|
|
21
|
+
desc = "Redirect Standard Ouput to file"
|
22
|
+
options_set.add("-stdout", desc) do |options, session|
|
23
|
+
file = SCR.get_arg("-stdout", with_param: true) || "output.txt"
|
24
|
+
STDOUT.reopen(file, "w+")
|
25
|
+
end
|
26
|
+
|
21
27
|
desc = "Fix the current session to work with this schema"
|
22
28
|
options_set.add("-schema-id", desc) do |options, session|
|
23
29
|
sch_name = SCR.get_arg("-schema-id", with_param: true)
|
@@ -41,6 +47,18 @@ ASSETS.cli.config do |cnf|
|
|
41
47
|
options.deep_merge!(input: {entries_from: true})
|
42
48
|
end
|
43
49
|
|
50
|
+
desc = "Used to only get the people from the input file. It will also include their current and new supervisors."
|
51
|
+
options_set.add("-get-partial", desc) do |options, session|
|
52
|
+
options.deep_merge!(people: {
|
53
|
+
get: {from: :remote, type: :partial}
|
54
|
+
})
|
55
|
+
end
|
56
|
+
|
57
|
+
desc = "Do not load any people for this run."
|
58
|
+
options_set.add("-no-people", desc) do |options, session|
|
59
|
+
options.deep_merge!(people: {get: false})
|
60
|
+
end
|
61
|
+
|
44
62
|
desc = "Locally cache all the people manager by retrieving from the server"
|
45
63
|
options_set.add("-get-people", desc) do |options, session|
|
46
64
|
options.deep_merge!(people: {
|
@@ -48,7 +66,17 @@ ASSETS.cli.config do |cnf|
|
|
48
66
|
})
|
49
67
|
end
|
50
68
|
|
51
|
-
|
69
|
+
desc = "Used to specify the cache file of people to be used. "
|
70
|
+
desc += "It is useful to use as people reference those stored in cached file diffrent to the last one."
|
71
|
+
options_set.add("-people-from-backup", desc) do |options, session|
|
72
|
+
file = SCR.get_file("-people-from-backup", required: true, should_exist: true)
|
73
|
+
options.deep_merge!(people: {
|
74
|
+
get: {from: :local, type: :file, file: file}
|
75
|
+
})
|
76
|
+
end
|
77
|
+
|
78
|
+
desc = "Runs in dry-run (no requests sent to server)"
|
79
|
+
options_set.add(["-dry-run", "-simulate"], desc) do |options, session|
|
52
80
|
options[:dry_run] = true
|
53
81
|
options[:simulate] = true
|
54
82
|
session.config.dry_run!
|
@@ -1,29 +1,23 @@
|
|
1
1
|
ASSETS.cli.config do |cnf|
|
2
2
|
cnf.people do |input, session, options|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
people
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
options.deep_merge!(people: {
|
14
|
-
get: {from: :local, type: :backup}
|
15
|
-
})
|
16
|
-
people = JSON.parse(File.read(file))
|
17
|
-
people = Eco::API::Organization::People.new(people)
|
18
|
-
elsif SCR.get_arg("-get-partial")
|
19
|
-
unless input && input.is_a?(Enumerable)
|
3
|
+
get = options.dig(:people, :get) || {}
|
4
|
+
case
|
5
|
+
when get == false
|
6
|
+
Eco::API::Organization::People.new([])
|
7
|
+
when (get[:from] == :remote) && get[:type] == :full
|
8
|
+
# -get-people
|
9
|
+
session.micro.people_cache
|
10
|
+
when (get[:from] == :remote) && get[:type] == :partial
|
11
|
+
# -get-partial
|
12
|
+
unless (input && input.is_a?(Enumerable))
|
20
13
|
raise "To use -get-partial (partial updates), you need to use -entries-from"
|
21
14
|
end
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
people =
|
15
|
+
session.micro.people_search(input, options: options)
|
16
|
+
when (get[:from] == :local) && get[:type] == :file
|
17
|
+
# -people-from-backup
|
18
|
+
session.micro.people_load(get[:file], modifier: :file)
|
19
|
+
#people = JSON.parse(File.read(get[:file]))
|
20
|
+
#Eco::API::Organization::People.new(people)
|
27
21
|
else
|
28
22
|
options.deep_merge!(people: {
|
29
23
|
get: {from: :local, type: :full}
|
@@ -33,9 +27,9 @@ ASSETS.cli.config do |cnf|
|
|
33
27
|
options.deep_merge!(people: {
|
34
28
|
get: {from: :remote, type: :full}
|
35
29
|
})
|
36
|
-
people = session.micro.people_cache
|
30
|
+
people = session.micro.people_cache
|
37
31
|
end
|
32
|
+
people
|
38
33
|
end
|
39
|
-
people
|
40
34
|
end
|
41
35
|
end
|
@@ -26,11 +26,36 @@ ASSETS.cli.config do |cnf|
|
|
26
26
|
end
|
27
27
|
|
28
28
|
desc = "Provides a set of tools to analyse a set of people (i.e. detect duplicates)"
|
29
|
-
cases.add("-analyse-people", :export, desc, case_name: "
|
29
|
+
cases.add("-analyse-people", :export, desc, case_name: "analyse-people") do |people, session, options|
|
30
30
|
options.deep_merge!(output: {file: "people_analysis.txt"}) unless options.dig(:output, :file)
|
31
|
-
|
31
|
+
#unless options.dig(:usecase, :analyse_people, :use_field)
|
32
|
+
# options.deep_merge!(usecase: {analyse_people: {use_field: :name}})
|
33
|
+
#end
|
34
|
+
end.add_option("-to", "Specify the output file.") do |options|
|
32
35
|
file = SCR.get_file("-to", required: true, should_exist: false)
|
33
36
|
options.deep_merge!(output: {file: file})
|
37
|
+
end.add_option("-identify-duplicates", "Generates a list of people with possible duplicates.") do |options|
|
38
|
+
options.deep_merge!(usecase: {analyse_people: {identify_duplicates: true}})
|
39
|
+
end.add_option("-use-field", "Works with -identify-duplicates. Sets field to be used in the comparison.") do |options|
|
40
|
+
expression = SCR.get_arg("-use-field", with_param: true)
|
41
|
+
options.deep_merge!(usecase: {analyse_people: {use_field: expression}})
|
42
|
+
end.add_option("-facet-field", "Works with -identify-duplicates. Adds an additional layer of comparison.") do |options|
|
43
|
+
expression = SCR.get_arg("-facet-field", with_param: true)
|
44
|
+
options.deep_merge!(usecase: {analyse_people: {facet_field: expression}})
|
45
|
+
end.add_option("-only-screening", "Works with -identify-duplicates. Skips the rearrangement stage.") do |options|
|
46
|
+
options.deep_merge!(usecase: {analyse_people: {only_screening: true}})
|
47
|
+
end.add_option("-ignore-matching-words", "Works with -identify-duplicates. Re-adjust scores ignoring matching words.") do |options|
|
48
|
+
options.deep_merge!(usecase: {analyse_people: {ignore_matching_words: true}})
|
49
|
+
end.add_option("-unique-words", "Works with -identify-duplicates. Re-adjust the comparing strings to do not have repeated words.") do |options|
|
50
|
+
options.deep_merge!(usecase: {analyse_people: {unique_words: true}})
|
51
|
+
end.add_option("-identify-unnamed", "Identifies all people with no names.") do |options|
|
52
|
+
options.deep_merge!(usecase: {analyse_people: {identify_unnamed: true}})
|
53
|
+
end.add_option("-backup-people-results", "Generates a json file with all the people involved in the final results of the analysis.") do |options|
|
54
|
+
file = SCR.get_file("-backup-people-results", required: true, should_exist: false)
|
55
|
+
options.deep_merge!(usecase: {analyse_people: {backup_people: File.expand_path(file)}})
|
56
|
+
end.add_option("-to-csv", "Genarates a CSV file with all people of the final results.") do |options|
|
57
|
+
file = SCR.get_file("-to-csv", required: true, should_exist: false) || "Results.csv"
|
58
|
+
options.deep_merge!(usecase: {analyse_people: {csv_file: File.expand_path(file)}})
|
34
59
|
end
|
35
60
|
|
36
61
|
desc = "It exports to a CSV the (filtered) people"
|
@@ -62,6 +87,10 @@ ASSETS.cli.config do |cnf|
|
|
62
87
|
options.deep_merge!(other: {file: {codes_column: col_codes}})
|
63
88
|
end
|
64
89
|
|
90
|
+
desc = "Cleans from filter_tags those tags that are not present in the tagtree (as per tagtree.json file)."
|
91
|
+
desc += " It will preserve standard register tags of most common registers (i.e. EVENT, RISK)."
|
92
|
+
cases.add("-clean-unknown-tags", :transform, desc, case_name: "clean-unknown-tags")
|
93
|
+
|
65
94
|
desc = "Removes the landing page or sets it to -page-id"
|
66
95
|
cases.add("-reset-landing-page", :transform, desc, case_name: "reset-landing-page")
|
67
96
|
.add_option("-page-id", "Target landing page to set to the users") do |options|
|