eco-helpers 2.0.19 → 2.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -106,11 +106,13 @@ module Eco
106
106
  # @param attr [String] type (`Symbol`) or attribute (`String`) to target a specific parser.
107
107
  # @param source [Any] source value to be parsed.
108
108
  # @param phase [Symbol] the phase when this parser should be active.
109
- def parse_attribute(attr, source, phase = :internal)
109
+ # @param phase [Symbol] the phase when this parser should be active.
110
+ # @return [Object] the parsed attribute.
111
+ def parse_attribute(attr, source, phase = :internal, deps: {})
110
112
  unless parsers = entry_factory.person_parser
111
113
  raise "There are no parsers defined"
112
114
  end
113
- parsers.parse(attr, source, phase)
115
+ parsers.parse(attr, source, phase, deps: deps)
114
116
  end
115
117
 
116
118
  # @see Eco::API::Common::People::EntryFactory#export
@@ -13,6 +13,7 @@ require_relative 'default_cases/abstract_policygroup_abilities_case.rb'
13
13
  require_relative 'default_cases/analyse_people_case'
14
14
  require_relative 'default_cases/append_usergroups_case'
15
15
  require_relative 'default_cases/change_email_case'
16
+ require_relative 'default_cases/clean_unknown_tags_case'
16
17
  require_relative 'default_cases/codes_to_tags_case'
17
18
  require_relative 'default_cases/create_case'
18
19
  require_relative 'default_cases/create_details_case'
@@ -5,41 +5,158 @@ class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loader
5
5
  attr_reader :session, :people, :options
6
6
 
7
7
  def main(people, session, options, usecase)
8
+ options[:end_get] = false
8
9
  @session = session; @options = options; @people = people
9
10
 
10
- save!(analysis)
11
+ case
12
+ when case_options[:identify_duplicates]
13
+ identify_duplicates
14
+ when case_options[:identify_unnamed]
15
+ identify_unnamed
16
+ else
17
+ session.logger.info("No analysis operation was specified")
18
+ end.tap do |people_involved|
19
+ if people_involved
20
+ to_csv(people_involved) if to_csv?
21
+ create_people_backup(people_involved) if results_people_backup?
22
+ end
23
+ end
11
24
  end
12
25
 
13
26
  private
14
27
 
15
- def analysis
16
- analytics.attribute = :name
17
- analysed = analytics.analyse({
18
- threshold: 0.20,
19
- order: [:average, :dice]
20
- })
21
- analytics.re_analyse(analysed, threshold: 0.5, order: [:average])
28
+ def identify_unnamed
29
+ similarity_analytics.unnamed.tap do |unnamed|
30
+ if unnamed.empty?
31
+ session.logger.info("There were no people with no name!!")
32
+ end
33
+ end
22
34
  end
23
35
 
24
- def analytics
25
- @analytics ||= people.analytics
36
+ def identify_duplicates
37
+ analysed = similarity_screening
38
+ if case_options[:ignore_matching_words]
39
+ puts "Fine tune results by ignoring matching words..."
40
+ analysed = strict_similarity(analysed)
41
+ end
42
+
43
+ similarity_analytics.newSimilarity(analysed).tap do |related_people|
44
+ if related_people.empty?
45
+ session.logger.info("There were no possible duplicates identified!!")
46
+ else
47
+ report = similarity_analytics.report(analysed, format: :txt)
48
+ save!(report)
49
+ end
50
+ end
26
51
  end
27
52
 
28
- def file
29
- @file ||= options.dig(:output, :file) || "analytics.txt"
53
+ def strict_similarity(analysed)
54
+ similarity_analytics.ignore_matching_words(analysed, **{
55
+ threshold: 0.5,
56
+ order: [:ngrams]
57
+ })
30
58
  end
31
59
 
32
- def save!(analysed)
33
- if analysed.empty?
34
- session.logger.info("There were results identified!!")
35
- return
60
+ def similarity_screening
61
+ similarity_analytics.attribute = field_similarity
62
+ options = {
63
+ threshold: 0.4,
64
+ order: [:average, :dice]
65
+ }.tap do |opts|
66
+ opts.merge!(needle_read: facet_field_proc) if facet_field?
67
+ opts.merge!(unique_words: true) if unique_words?
68
+ end
69
+ analysed = similarity_analytics.analyse(**options)
70
+ puts "Got #{analysed.count} results after basic screening with #{options}"
71
+
72
+ return analysed if case_options[:only_screening]
73
+ options = {threshold: 0.5, order: [:average]}
74
+ puts "Going to rearrange results... with #{options}"
75
+ similarity_analytics.rearrange(analysed, **options).tap do |analysed|
76
+ puts "... got #{analysed.count} results after rearranging"
36
77
  end
78
+ end
79
+
80
+ def similarity_analytics
81
+ @analytics ||= people.similarity
82
+ end
83
+
84
+ def create_people_backup(cut = people, file = results_people_backup)
85
+ session.file_manager.save_json(cut, file)
86
+ end
87
+
88
+ def to_csv(data = people, file = csv_file)
89
+ opts = {}
90
+ opts.deep_merge!(export: {file: {name: file, format: :csv}})
91
+ opts.deep_merge!(export: {options: {nice_header: true}})
92
+ opts.deep_merge!(export: {options: {internal_names: true}})
93
+ #opts.deep_merge!(export: {options: {split_schemas: true}})
94
+ session.process_case("to-csv", type: :export, people: data, options: opts.merge(options.slice(:export)))
95
+ end
96
+
97
+ def unique_words?
98
+ case_options[:unique_words]
99
+ end
100
+
101
+ def field_similarity
102
+ return :name unless use_field?
103
+ use_field_proc
104
+ end
105
+
106
+ def use_field_proc
107
+ proc_value_access(use_field)
108
+ end
109
+
110
+ def facet_field_proc
111
+ proc_value_access(facet_field)
112
+ end
113
+
114
+ def use_field
115
+ case_options.dig(:use_field)
116
+ end
37
117
 
38
- ext = File.extname(file).downcase.delete(".")
118
+ def use_field?
119
+ !!use_field
120
+ end
121
+
122
+ def facet_field
123
+ case_options.dig(:facet_field)
124
+ end
125
+
126
+ def facet_field?
127
+ !!facet_field
128
+ end
129
+
130
+ def csv_file
131
+ case_options.dig(:csv_file)
132
+ end
133
+
134
+ def to_csv?
135
+ !!csv_file
136
+ end
137
+
138
+ def results_people_backup
139
+ case_options.dig(:backup_people)
140
+ end
141
+
142
+ def results_people_backup?
143
+ !!results_people_backup
144
+ end
145
+
146
+ def case_options
147
+ options.dig(:usecase, :analyse_people) || {}
148
+ end
39
149
 
40
- File.open(file, "w") do |fd|
150
+ def output_file
151
+ @output_file ||= options.dig(:output, :file) || "analytics.txt"
152
+ end
153
+
154
+ def save!(data)
155
+ ext = File.extname(output_file).downcase.delete(".")
156
+ session.logger.info("Generating file '#{output_file}'")
157
+ File.open(output_file, "w") do |fd|
41
158
  if ext == "txt"
42
- fd << analytics.analysis(analysed, format: :txt)
159
+ fd << data
43
160
  elsif ext == "html"
44
161
  puts "html is still not supported"
45
162
  exit(1)
@@ -50,4 +167,57 @@ class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loader
50
167
  end
51
168
  end
52
169
 
170
+ # A way to use command line to specify part
171
+ # => i.e. details[first-name] AND details[surname]
172
+ def proc_value_access(expression)
173
+ #return expression.to_sym if expression.start_with?(":")
174
+ subexpressions = expression.split(" AND ")
175
+ Proc.new do |person|
176
+ values = subexpressions.map {|exp| attribute_access(person, exp)}
177
+ values.compact.join(" ")
178
+ end
179
+ end
180
+
181
+ # A way to use command line to specify part
182
+ # => i.e. person.details[first-name]
183
+ def attribute_access(person, expression)
184
+ parts = expression.split(".")
185
+ parts_to_value(person, parts).tap do |value|
186
+ unless value.is_a?(String) || !value
187
+ raise "Something is wrong with #{expression} to parts #{parts}. Expecting String, obtained: #{value.class}"
188
+ end
189
+ end
190
+ end
191
+
192
+ def parts_to_value(obj, parts)
193
+ parts.reduce(obj) do |object, part|
194
+ get_attr(object, part)
195
+ end
196
+ end
197
+
198
+ def get_attr(obj, part)
199
+ case
200
+ when !obj
201
+ nil
202
+ when part.is_a?(Symbol) || obj.respond_to?(part.to_sym)
203
+ obj.send(part.to_sym)
204
+ when part.start_with?(":")
205
+ get_attr(obj, part[1..-1])
206
+ when part.start_with?("details[")
207
+ if (obj.respond_to?(:details)) && details = obj.details
208
+ if match = part.match(/details\[(?<field>.*)\]/)
209
+ details[match[:field]]
210
+ else
211
+ raise "Review your -use-field expression. It should read: person.details[target-alt_id]"
212
+ end
213
+ end
214
+ when part.start_with?("account")
215
+ obj.account if obj.respond_to?(:account)
216
+ when part.start_with?("person")
217
+ obj
218
+ else
219
+ raise "Review your expression. Cannot recognize '#{part}' as part of '#{obj.class}'"
220
+ end
221
+ end
222
+
53
223
  end
@@ -0,0 +1,37 @@
1
+ class Eco::API::UseCases::DefaultCases::CleanUnknownTags < Eco::API::Common::Loaders::UseCase
2
+ name "clean-unknown-tags"
3
+ type :transform
4
+
5
+ REGISTER_TAGS = [
6
+ "EVENT", "INJURY", "RISK", "CONTRACTOR", "PERMIT",
7
+ "AUDIT", "JSEA",
8
+ "TRAINING", "INDUCTION",
9
+ "MEETING", "PPE", "CHEMICAL",
10
+ "PLANT", "ASSET",
11
+ "POLICY", "IDEA", "REPORTS"
12
+ ]
13
+
14
+ attr_reader :session, :options
15
+
16
+ def main(people, session, options, usecase)
17
+ @session = session; @options = options
18
+
19
+ update = session.new_job("main", "update", :update, usecase)
20
+ people.each do |person|
21
+ unknown_tags = person.filter_tags.select {|tag| !tag?(tag)}
22
+ person.filter_tags -= unknown_tags
23
+ update.add(person)
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def tag?(value)
30
+ tagtree.tag?(value) || REGISTER_TAGS.any? {|reg| value == reg}
31
+ end
32
+
33
+ def tagtree
34
+ @tagtree ||= ASSETS.config.tagtree
35
+ end
36
+
37
+ end
@@ -18,6 +18,12 @@ ASSETS.cli.config do |cnf|
18
18
  exit
19
19
  end
20
20
 
21
+ desc = "Redirect Standard Ouput to file"
22
+ options_set.add("-stdout", desc) do |options, session|
23
+ file = SCR.get_arg("-stdout", with_param: true) || "output.txt"
24
+ STDOUT.reopen(file, "w+")
25
+ end
26
+
21
27
  desc = "Fix the current session to work with this schema"
22
28
  options_set.add("-schema-id", desc) do |options, session|
23
29
  sch_name = SCR.get_arg("-schema-id", with_param: true)
@@ -41,6 +47,18 @@ ASSETS.cli.config do |cnf|
41
47
  options.deep_merge!(input: {entries_from: true})
42
48
  end
43
49
 
50
+ desc = "Used to only get the people from the input file. It will also include their current and new supervisors."
51
+ options_set.add("-get-partial", desc) do |options, session|
52
+ options.deep_merge!(people: {
53
+ get: {from: :remote, type: :partial}
54
+ })
55
+ end
56
+
57
+ desc = "Do not load any people for this run."
58
+ options_set.add("-no-people", desc) do |options, session|
59
+ options.deep_merge!(people: {get: false})
60
+ end
61
+
44
62
  desc = "Locally cache all the people manager by retrieving from the server"
45
63
  options_set.add("-get-people", desc) do |options, session|
46
64
  options.deep_merge!(people: {
@@ -48,7 +66,17 @@ ASSETS.cli.config do |cnf|
48
66
  })
49
67
  end
50
68
 
51
- options_set.add(["-dry-run", "-simulate"], "Runs in dry-run (no requests sent to server)") do |options, session|
69
+ desc = "Used to specify the cache file of people to be used. "
70
+ desc += "It is useful to use as people reference those stored in cached file diffrent to the last one."
71
+ options_set.add("-people-from-backup", desc) do |options, session|
72
+ file = SCR.get_file("-people-from-backup", required: true, should_exist: true)
73
+ options.deep_merge!(people: {
74
+ get: {from: :local, type: :file, file: file}
75
+ })
76
+ end
77
+
78
+ desc = "Runs in dry-run (no requests sent to server)"
79
+ options_set.add(["-dry-run", "-simulate"], desc) do |options, session|
52
80
  options[:dry_run] = true
53
81
  options[:simulate] = true
54
82
  session.config.dry_run!
@@ -1,29 +1,23 @@
1
1
  ASSETS.cli.config do |cnf|
2
2
  cnf.people do |input, session, options|
3
- people = Eco::API::Organization::People.new([])
4
- if SCR.get_arg("-get-people")
5
- options.deep_merge!(people: {
6
- get: {from: :remote, type: :full}
7
- })
8
- people = session.micro.people_cache
9
- elsif SCR.get_arg("-no-people")
10
- people = Eco::API::Organization::People.new([])
11
- elsif SCR.get_arg("-people-from-backup")
12
- file = SCR.get_arg("-people-from-backup", with_param: true)
13
- options.deep_merge!(people: {
14
- get: {from: :local, type: :backup}
15
- })
16
- people = JSON.parse(File.read(file))
17
- people = Eco::API::Organization::People.new(people)
18
- elsif SCR.get_arg("-get-partial")
19
- unless input && input.is_a?(Enumerable)
3
+ get = options.dig(:people, :get) || {}
4
+ case
5
+ when get == false
6
+ Eco::API::Organization::People.new([])
7
+ when (get[:from] == :remote) && get[:type] == :full
8
+ # -get-people
9
+ session.micro.people_cache
10
+ when (get[:from] == :remote) && get[:type] == :partial
11
+ # -get-partial
12
+ unless (input && input.is_a?(Enumerable))
20
13
  raise "To use -get-partial (partial updates), you need to use -entries-from"
21
14
  end
22
- options.deep_merge!(people: {
23
- get: {from: :remote, type: :partial}
24
- })
25
-
26
- people = session.micro.people_search(input, options: options)
15
+ session.micro.people_search(input, options: options)
16
+ when (get[:from] == :local) && get[:type] == :file
17
+ # -people-from-backup
18
+ session.micro.people_load(get[:file], modifier: :file)
19
+ #people = JSON.parse(File.read(get[:file]))
20
+ #Eco::API::Organization::People.new(people)
27
21
  else
28
22
  options.deep_merge!(people: {
29
23
  get: {from: :local, type: :full}
@@ -33,9 +27,9 @@ ASSETS.cli.config do |cnf|
33
27
  options.deep_merge!(people: {
34
28
  get: {from: :remote, type: :full}
35
29
  })
36
- people = session.micro.people_cache if people.empty?
30
+ people = session.micro.people_cache
37
31
  end
32
+ people
38
33
  end
39
- people
40
34
  end
41
35
  end
@@ -26,11 +26,36 @@ ASSETS.cli.config do |cnf|
26
26
  end
27
27
 
28
28
  desc = "Provides a set of tools to analyse a set of people (i.e. detect duplicates)"
29
- cases.add("-analyse-people", :export, desc, case_name: "-analyse-people") do |people, session, options|
29
+ cases.add("-analyse-people", :export, desc, case_name: "analyse-people") do |people, session, options|
30
30
  options.deep_merge!(output: {file: "people_analysis.txt"}) unless options.dig(:output, :file)
31
- end.add_option("-to", "Specify the output file") do |options|
31
+ #unless options.dig(:usecase, :analyse_people, :use_field)
32
+ # options.deep_merge!(usecase: {analyse_people: {use_field: :name}})
33
+ #end
34
+ end.add_option("-to", "Specify the output file.") do |options|
32
35
  file = SCR.get_file("-to", required: true, should_exist: false)
33
36
  options.deep_merge!(output: {file: file})
37
+ end.add_option("-identify-duplicates", "Generates a list of people with possible duplicates.") do |options|
38
+ options.deep_merge!(usecase: {analyse_people: {identify_duplicates: true}})
39
+ end.add_option("-use-field", "Works with -identify-duplicates. Sets field to be used in the comparison.") do |options|
40
+ expression = SCR.get_arg("-use-field", with_param: true)
41
+ options.deep_merge!(usecase: {analyse_people: {use_field: expression}})
42
+ end.add_option("-facet-field", "Works with -identify-duplicates. Adds an additional layer of comparison.") do |options|
43
+ expression = SCR.get_arg("-facet-field", with_param: true)
44
+ options.deep_merge!(usecase: {analyse_people: {facet_field: expression}})
45
+ end.add_option("-only-screening", "Works with -identify-duplicates. Skips the rearrangement stage.") do |options|
46
+ options.deep_merge!(usecase: {analyse_people: {only_screening: true}})
47
+ end.add_option("-ignore-matching-words", "Works with -identify-duplicates. Re-adjust scores ignoring matching words.") do |options|
48
+ options.deep_merge!(usecase: {analyse_people: {ignore_matching_words: true}})
49
+ end.add_option("-unique-words", "Works with -identify-duplicates. Re-adjust the comparing strings to do not have repeated words.") do |options|
50
+ options.deep_merge!(usecase: {analyse_people: {unique_words: true}})
51
+ end.add_option("-identify-unnamed", "Identifies all people with no names.") do |options|
52
+ options.deep_merge!(usecase: {analyse_people: {identify_unnamed: true}})
53
+ end.add_option("-backup-people-results", "Generates a json file with all the people involved in the final results of the analysis.") do |options|
54
+ file = SCR.get_file("-backup-people-results", required: true, should_exist: false)
55
+ options.deep_merge!(usecase: {analyse_people: {backup_people: File.expand_path(file)}})
56
+ end.add_option("-to-csv", "Genarates a CSV file with all people of the final results.") do |options|
57
+ file = SCR.get_file("-to-csv", required: true, should_exist: false) || "Results.csv"
58
+ options.deep_merge!(usecase: {analyse_people: {csv_file: File.expand_path(file)}})
34
59
  end
35
60
 
36
61
  desc = "It exports to a CSV the (filtered) people"
@@ -62,6 +87,10 @@ ASSETS.cli.config do |cnf|
62
87
  options.deep_merge!(other: {file: {codes_column: col_codes}})
63
88
  end
64
89
 
90
+ desc = "Cleans from filter_tags those tags that are not present in the tagtree (as per tagtree.json file)."
91
+ desc += " It will preserve standard register tags of most common registers (i.e. EVENT, RISK)."
92
+ cases.add("-clean-unknown-tags", :transform, desc, case_name: "clean-unknown-tags")
93
+
65
94
  desc = "Removes the landing page or sets it to -page-id"
66
95
  cases.add("-reset-landing-page", :transform, desc, case_name: "reset-landing-page")
67
96
  .add_option("-page-id", "Target landing page to set to the users") do |options|