eco-helpers 2.0.19 → 2.0.21

Sign up to get free protection for your applications and to get access to all the features.
@@ -106,11 +106,13 @@ module Eco
106
106
  # @param attr [String] type (`Symbol`) or attribute (`String`) to target a specific parser.
107
107
  # @param source [Any] source value to be parsed.
108
108
  # @param phase [Symbol] the phase when this parser should be active.
109
- def parse_attribute(attr, source, phase = :internal)
109
+ # @param phase [Symbol] the phase when this parser should be active.
110
+ # @return [Object] the parsed attribute.
111
+ def parse_attribute(attr, source, phase = :internal, deps: {})
110
112
  unless parsers = entry_factory.person_parser
111
113
  raise "There are no parsers defined"
112
114
  end
113
- parsers.parse(attr, source, phase)
115
+ parsers.parse(attr, source, phase, deps: deps)
114
116
  end
115
117
 
116
118
  # @see Eco::API::Common::People::EntryFactory#export
@@ -13,6 +13,7 @@ require_relative 'default_cases/abstract_policygroup_abilities_case.rb'
13
13
  require_relative 'default_cases/analyse_people_case'
14
14
  require_relative 'default_cases/append_usergroups_case'
15
15
  require_relative 'default_cases/change_email_case'
16
+ require_relative 'default_cases/clean_unknown_tags_case'
16
17
  require_relative 'default_cases/codes_to_tags_case'
17
18
  require_relative 'default_cases/create_case'
18
19
  require_relative 'default_cases/create_details_case'
@@ -5,41 +5,158 @@ class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loader
5
5
  attr_reader :session, :people, :options
6
6
 
7
7
  def main(people, session, options, usecase)
8
+ options[:end_get] = false
8
9
  @session = session; @options = options; @people = people
9
10
 
10
- save!(analysis)
11
+ case
12
+ when case_options[:identify_duplicates]
13
+ identify_duplicates
14
+ when case_options[:identify_unnamed]
15
+ identify_unnamed
16
+ else
17
+ session.logger.info("No analysis operation was specified")
18
+ end.tap do |people_involved|
19
+ if people_involved
20
+ to_csv(people_involved) if to_csv?
21
+ create_people_backup(people_involved) if results_people_backup?
22
+ end
23
+ end
11
24
  end
12
25
 
13
26
  private
14
27
 
15
- def analysis
16
- analytics.attribute = :name
17
- analysed = analytics.analyse({
18
- threshold: 0.20,
19
- order: [:average, :dice]
20
- })
21
- analytics.re_analyse(analysed, threshold: 0.5, order: [:average])
28
+ def identify_unnamed
29
+ similarity_analytics.unnamed.tap do |unnamed|
30
+ if unnamed.empty?
31
+ session.logger.info("There were no people with no name!!")
32
+ end
33
+ end
22
34
  end
23
35
 
24
- def analytics
25
- @analytics ||= people.analytics
36
+ def identify_duplicates
37
+ analysed = similarity_screening
38
+ if case_options[:ignore_matching_words]
39
+ puts "Fine tune results by ignoring matching words..."
40
+ analysed = strict_similarity(analysed)
41
+ end
42
+
43
+ similarity_analytics.newSimilarity(analysed).tap do |related_people|
44
+ if related_people.empty?
45
+ session.logger.info("There were no possible duplicates identified!!")
46
+ else
47
+ report = similarity_analytics.report(analysed, format: :txt)
48
+ save!(report)
49
+ end
50
+ end
26
51
  end
27
52
 
28
- def file
29
- @file ||= options.dig(:output, :file) || "analytics.txt"
53
+ def strict_similarity(analysed)
54
+ similarity_analytics.ignore_matching_words(analysed, **{
55
+ threshold: 0.5,
56
+ order: [:ngrams]
57
+ })
30
58
  end
31
59
 
32
- def save!(analysed)
33
- if analysed.empty?
34
- session.logger.info("There were results identified!!")
35
- return
60
+ def similarity_screening
61
+ similarity_analytics.attribute = field_similarity
62
+ options = {
63
+ threshold: 0.4,
64
+ order: [:average, :dice]
65
+ }.tap do |opts|
66
+ opts.merge!(needle_read: facet_field_proc) if facet_field?
67
+ opts.merge!(unique_words: true) if unique_words?
68
+ end
69
+ analysed = similarity_analytics.analyse(**options)
70
+ puts "Got #{analysed.count} results after basic screening with #{options}"
71
+
72
+ return analysed if case_options[:only_screening]
73
+ options = {threshold: 0.5, order: [:average]}
74
+ puts "Going to rearrange results... with #{options}"
75
+ similarity_analytics.rearrange(analysed, **options).tap do |analysed|
76
+ puts "... got #{analysed.count} results after rearranging"
36
77
  end
78
+ end
79
+
80
+ def similarity_analytics
81
+ @analytics ||= people.similarity
82
+ end
83
+
84
+ def create_people_backup(cut = people, file = results_people_backup)
85
+ session.file_manager.save_json(cut, file)
86
+ end
87
+
88
+ def to_csv(data = people, file = csv_file)
89
+ opts = {}
90
+ opts.deep_merge!(export: {file: {name: file, format: :csv}})
91
+ opts.deep_merge!(export: {options: {nice_header: true}})
92
+ opts.deep_merge!(export: {options: {internal_names: true}})
93
+ #opts.deep_merge!(export: {options: {split_schemas: true}})
94
+ session.process_case("to-csv", type: :export, people: data, options: opts.merge(options.slice(:export)))
95
+ end
96
+
97
+ def unique_words?
98
+ case_options[:unique_words]
99
+ end
100
+
101
+ def field_similarity
102
+ return :name unless use_field?
103
+ use_field_proc
104
+ end
105
+
106
+ def use_field_proc
107
+ proc_value_access(use_field)
108
+ end
109
+
110
+ def facet_field_proc
111
+ proc_value_access(facet_field)
112
+ end
113
+
114
+ def use_field
115
+ case_options.dig(:use_field)
116
+ end
37
117
 
38
- ext = File.extname(file).downcase.delete(".")
118
+ def use_field?
119
+ !!use_field
120
+ end
121
+
122
+ def facet_field
123
+ case_options.dig(:facet_field)
124
+ end
125
+
126
+ def facet_field?
127
+ !!facet_field
128
+ end
129
+
130
+ def csv_file
131
+ case_options.dig(:csv_file)
132
+ end
133
+
134
+ def to_csv?
135
+ !!csv_file
136
+ end
137
+
138
+ def results_people_backup
139
+ case_options.dig(:backup_people)
140
+ end
141
+
142
+ def results_people_backup?
143
+ !!results_people_backup
144
+ end
145
+
146
+ def case_options
147
+ options.dig(:usecase, :analyse_people) || {}
148
+ end
39
149
 
40
- File.open(file, "w") do |fd|
150
+ def output_file
151
+ @output_file ||= options.dig(:output, :file) || "analytics.txt"
152
+ end
153
+
154
+ def save!(data)
155
+ ext = File.extname(output_file).downcase.delete(".")
156
+ session.logger.info("Generating file '#{output_file}'")
157
+ File.open(output_file, "w") do |fd|
41
158
  if ext == "txt"
42
- fd << analytics.analysis(analysed, format: :txt)
159
+ fd << data
43
160
  elsif ext == "html"
44
161
  puts "html is still not supported"
45
162
  exit(1)
@@ -50,4 +167,57 @@ class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loader
50
167
  end
51
168
  end
52
169
 
170
+ # A way to use command line to specify part
171
+ # => i.e. details[first-name] AND details[surname]
172
+ def proc_value_access(expression)
173
+ #return expression.to_sym if expression.start_with?(":")
174
+ subexpressions = expression.split(" AND ")
175
+ Proc.new do |person|
176
+ values = subexpressions.map {|exp| attribute_access(person, exp)}
177
+ values.compact.join(" ")
178
+ end
179
+ end
180
+
181
+ # A way to use command line to specify part
182
+ # => i.e. person.details[first-name]
183
+ def attribute_access(person, expression)
184
+ parts = expression.split(".")
185
+ parts_to_value(person, parts).tap do |value|
186
+ unless value.is_a?(String) || !value
187
+ raise "Something is wrong with #{expression} to parts #{parts}. Expecting String, obtained: #{value.class}"
188
+ end
189
+ end
190
+ end
191
+
192
+ def parts_to_value(obj, parts)
193
+ parts.reduce(obj) do |object, part|
194
+ get_attr(object, part)
195
+ end
196
+ end
197
+
198
+ def get_attr(obj, part)
199
+ case
200
+ when !obj
201
+ nil
202
+ when part.is_a?(Symbol) || obj.respond_to?(part.to_sym)
203
+ obj.send(part.to_sym)
204
+ when part.start_with?(":")
205
+ get_attr(obj, part[1..-1])
206
+ when part.start_with?("details[")
207
+ if (obj.respond_to?(:details)) && details = obj.details
208
+ if match = part.match(/details\[(?<field>.*)\]/)
209
+ details[match[:field]]
210
+ else
211
+ raise "Review your -use-field expression. It should read: person.details[target-alt_id]"
212
+ end
213
+ end
214
+ when part.start_with?("account")
215
+ obj.account if obj.respond_to?(:account)
216
+ when part.start_with?("person")
217
+ obj
218
+ else
219
+ raise "Review your expression. Cannot recognize '#{part}' as part of '#{obj.class}'"
220
+ end
221
+ end
222
+
53
223
  end
@@ -0,0 +1,37 @@
1
+ class Eco::API::UseCases::DefaultCases::CleanUnknownTags < Eco::API::Common::Loaders::UseCase
2
+ name "clean-unknown-tags"
3
+ type :transform
4
+
5
+ REGISTER_TAGS = [
6
+ "EVENT", "INJURY", "RISK", "CONTRACTOR", "PERMIT",
7
+ "AUDIT", "JSEA",
8
+ "TRAINING", "INDUCTION",
9
+ "MEETING", "PPE", "CHEMICAL",
10
+ "PLANT", "ASSET",
11
+ "POLICY", "IDEA", "REPORTS"
12
+ ]
13
+
14
+ attr_reader :session, :options
15
+
16
+ def main(people, session, options, usecase)
17
+ @session = session; @options = options
18
+
19
+ update = session.new_job("main", "update", :update, usecase)
20
+ people.each do |person|
21
+ unknown_tags = person.filter_tags.select {|tag| !tag?(tag)}
22
+ person.filter_tags -= unknown_tags
23
+ update.add(person)
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def tag?(value)
30
+ tagtree.tag?(value) || REGISTER_TAGS.any? {|reg| value == reg}
31
+ end
32
+
33
+ def tagtree
34
+ @tagtree ||= ASSETS.config.tagtree
35
+ end
36
+
37
+ end
@@ -18,6 +18,12 @@ ASSETS.cli.config do |cnf|
18
18
  exit
19
19
  end
20
20
 
21
+ desc = "Redirect Standard Ouput to file"
22
+ options_set.add("-stdout", desc) do |options, session|
23
+ file = SCR.get_arg("-stdout", with_param: true) || "output.txt"
24
+ STDOUT.reopen(file, "w+")
25
+ end
26
+
21
27
  desc = "Fix the current session to work with this schema"
22
28
  options_set.add("-schema-id", desc) do |options, session|
23
29
  sch_name = SCR.get_arg("-schema-id", with_param: true)
@@ -41,6 +47,18 @@ ASSETS.cli.config do |cnf|
41
47
  options.deep_merge!(input: {entries_from: true})
42
48
  end
43
49
 
50
+ desc = "Used to only get the people from the input file. It will also include their current and new supervisors."
51
+ options_set.add("-get-partial", desc) do |options, session|
52
+ options.deep_merge!(people: {
53
+ get: {from: :remote, type: :partial}
54
+ })
55
+ end
56
+
57
+ desc = "Do not load any people for this run."
58
+ options_set.add("-no-people", desc) do |options, session|
59
+ options.deep_merge!(people: {get: false})
60
+ end
61
+
44
62
  desc = "Locally cache all the people manager by retrieving from the server"
45
63
  options_set.add("-get-people", desc) do |options, session|
46
64
  options.deep_merge!(people: {
@@ -48,7 +66,17 @@ ASSETS.cli.config do |cnf|
48
66
  })
49
67
  end
50
68
 
51
- options_set.add(["-dry-run", "-simulate"], "Runs in dry-run (no requests sent to server)") do |options, session|
69
+ desc = "Used to specify the cache file of people to be used. "
70
+ desc += "It is useful to use as people reference those stored in cached file diffrent to the last one."
71
+ options_set.add("-people-from-backup", desc) do |options, session|
72
+ file = SCR.get_file("-people-from-backup", required: true, should_exist: true)
73
+ options.deep_merge!(people: {
74
+ get: {from: :local, type: :file, file: file}
75
+ })
76
+ end
77
+
78
+ desc = "Runs in dry-run (no requests sent to server)"
79
+ options_set.add(["-dry-run", "-simulate"], desc) do |options, session|
52
80
  options[:dry_run] = true
53
81
  options[:simulate] = true
54
82
  session.config.dry_run!
@@ -1,29 +1,23 @@
1
1
  ASSETS.cli.config do |cnf|
2
2
  cnf.people do |input, session, options|
3
- people = Eco::API::Organization::People.new([])
4
- if SCR.get_arg("-get-people")
5
- options.deep_merge!(people: {
6
- get: {from: :remote, type: :full}
7
- })
8
- people = session.micro.people_cache
9
- elsif SCR.get_arg("-no-people")
10
- people = Eco::API::Organization::People.new([])
11
- elsif SCR.get_arg("-people-from-backup")
12
- file = SCR.get_arg("-people-from-backup", with_param: true)
13
- options.deep_merge!(people: {
14
- get: {from: :local, type: :backup}
15
- })
16
- people = JSON.parse(File.read(file))
17
- people = Eco::API::Organization::People.new(people)
18
- elsif SCR.get_arg("-get-partial")
19
- unless input && input.is_a?(Enumerable)
3
+ get = options.dig(:people, :get) || {}
4
+ case
5
+ when get == false
6
+ Eco::API::Organization::People.new([])
7
+ when (get[:from] == :remote) && get[:type] == :full
8
+ # -get-people
9
+ session.micro.people_cache
10
+ when (get[:from] == :remote) && get[:type] == :partial
11
+ # -get-partial
12
+ unless (input && input.is_a?(Enumerable))
20
13
  raise "To use -get-partial (partial updates), you need to use -entries-from"
21
14
  end
22
- options.deep_merge!(people: {
23
- get: {from: :remote, type: :partial}
24
- })
25
-
26
- people = session.micro.people_search(input, options: options)
15
+ session.micro.people_search(input, options: options)
16
+ when (get[:from] == :local) && get[:type] == :file
17
+ # -people-from-backup
18
+ session.micro.people_load(get[:file], modifier: :file)
19
+ #people = JSON.parse(File.read(get[:file]))
20
+ #Eco::API::Organization::People.new(people)
27
21
  else
28
22
  options.deep_merge!(people: {
29
23
  get: {from: :local, type: :full}
@@ -33,9 +27,9 @@ ASSETS.cli.config do |cnf|
33
27
  options.deep_merge!(people: {
34
28
  get: {from: :remote, type: :full}
35
29
  })
36
- people = session.micro.people_cache if people.empty?
30
+ people = session.micro.people_cache
37
31
  end
32
+ people
38
33
  end
39
- people
40
34
  end
41
35
  end
@@ -26,11 +26,36 @@ ASSETS.cli.config do |cnf|
26
26
  end
27
27
 
28
28
  desc = "Provides a set of tools to analyse a set of people (i.e. detect duplicates)"
29
- cases.add("-analyse-people", :export, desc, case_name: "-analyse-people") do |people, session, options|
29
+ cases.add("-analyse-people", :export, desc, case_name: "analyse-people") do |people, session, options|
30
30
  options.deep_merge!(output: {file: "people_analysis.txt"}) unless options.dig(:output, :file)
31
- end.add_option("-to", "Specify the output file") do |options|
31
+ #unless options.dig(:usecase, :analyse_people, :use_field)
32
+ # options.deep_merge!(usecase: {analyse_people: {use_field: :name}})
33
+ #end
34
+ end.add_option("-to", "Specify the output file.") do |options|
32
35
  file = SCR.get_file("-to", required: true, should_exist: false)
33
36
  options.deep_merge!(output: {file: file})
37
+ end.add_option("-identify-duplicates", "Generates a list of people with possible duplicates.") do |options|
38
+ options.deep_merge!(usecase: {analyse_people: {identify_duplicates: true}})
39
+ end.add_option("-use-field", "Works with -identify-duplicates. Sets field to be used in the comparison.") do |options|
40
+ expression = SCR.get_arg("-use-field", with_param: true)
41
+ options.deep_merge!(usecase: {analyse_people: {use_field: expression}})
42
+ end.add_option("-facet-field", "Works with -identify-duplicates. Adds an additional layer of comparison.") do |options|
43
+ expression = SCR.get_arg("-facet-field", with_param: true)
44
+ options.deep_merge!(usecase: {analyse_people: {facet_field: expression}})
45
+ end.add_option("-only-screening", "Works with -identify-duplicates. Skips the rearrangement stage.") do |options|
46
+ options.deep_merge!(usecase: {analyse_people: {only_screening: true}})
47
+ end.add_option("-ignore-matching-words", "Works with -identify-duplicates. Re-adjust scores ignoring matching words.") do |options|
48
+ options.deep_merge!(usecase: {analyse_people: {ignore_matching_words: true}})
49
+ end.add_option("-unique-words", "Works with -identify-duplicates. Re-adjust the comparing strings to do not have repeated words.") do |options|
50
+ options.deep_merge!(usecase: {analyse_people: {unique_words: true}})
51
+ end.add_option("-identify-unnamed", "Identifies all people with no names.") do |options|
52
+ options.deep_merge!(usecase: {analyse_people: {identify_unnamed: true}})
53
+ end.add_option("-backup-people-results", "Generates a json file with all the people involved in the final results of the analysis.") do |options|
54
+ file = SCR.get_file("-backup-people-results", required: true, should_exist: false)
55
+ options.deep_merge!(usecase: {analyse_people: {backup_people: File.expand_path(file)}})
56
+ end.add_option("-to-csv", "Genarates a CSV file with all people of the final results.") do |options|
57
+ file = SCR.get_file("-to-csv", required: true, should_exist: false) || "Results.csv"
58
+ options.deep_merge!(usecase: {analyse_people: {csv_file: File.expand_path(file)}})
34
59
  end
35
60
 
36
61
  desc = "It exports to a CSV the (filtered) people"
@@ -62,6 +87,10 @@ ASSETS.cli.config do |cnf|
62
87
  options.deep_merge!(other: {file: {codes_column: col_codes}})
63
88
  end
64
89
 
90
+ desc = "Cleans from filter_tags those tags that are not present in the tagtree (as per tagtree.json file)."
91
+ desc += " It will preserve standard register tags of most common registers (i.e. EVENT, RISK)."
92
+ cases.add("-clean-unknown-tags", :transform, desc, case_name: "clean-unknown-tags")
93
+
65
94
  desc = "Removes the landing page or sets it to -page-id"
66
95
  cases.add("-reset-landing-page", :transform, desc, case_name: "reset-landing-page")
67
96
  .add_option("-page-id", "Target landing page to set to the users") do |options|