eco-helpers 2.0.19 → 2.0.25

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +77 -1
  3. data/eco-helpers.gemspec +4 -1
  4. data/lib/eco/api/common/base_loader.rb +9 -5
  5. data/lib/eco/api/common/loaders/parser.rb +1 -0
  6. data/lib/eco/api/common/people/default_parsers.rb +1 -0
  7. data/lib/eco/api/common/people/default_parsers/xls_parser.rb +53 -0
  8. data/lib/eco/api/common/people/entries.rb +1 -0
  9. data/lib/eco/api/common/people/entry_factory.rb +64 -16
  10. data/lib/eco/api/common/people/person_parser.rb +1 -1
  11. data/lib/eco/api/common/version_patches/exception.rb +5 -2
  12. data/lib/eco/api/organization/people.rb +8 -2
  13. data/lib/eco/api/organization/people_similarity.rb +171 -11
  14. data/lib/eco/api/organization/tag_tree.rb +33 -0
  15. data/lib/eco/api/session.rb +15 -7
  16. data/lib/eco/api/session/batch.rb +1 -1
  17. data/lib/eco/api/session/batch/job.rb +34 -9
  18. data/lib/eco/api/usecases.rb +2 -2
  19. data/lib/eco/api/usecases/base_case.rb +2 -2
  20. data/lib/eco/api/usecases/base_io.rb +17 -4
  21. data/lib/eco/api/usecases/default_cases.rb +1 -0
  22. data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +189 -19
  23. data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
  24. data/lib/eco/api/usecases/default_cases/hris_case.rb +20 -0
  25. data/lib/eco/cli/config/default/input.rb +61 -8
  26. data/lib/eco/cli/config/default/options.rb +46 -2
  27. data/lib/eco/cli/config/default/people.rb +18 -24
  28. data/lib/eco/cli/config/default/usecases.rb +31 -2
  29. data/lib/eco/cli/config/default/workflow.rb +8 -6
  30. data/lib/eco/cli/scripting/args_helpers.rb +2 -2
  31. data/lib/eco/csv/table.rb +121 -21
  32. data/lib/eco/data/fuzzy_match.rb +52 -12
  33. data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
  34. data/lib/eco/data/fuzzy_match/ngrams_score.rb +13 -9
  35. data/lib/eco/data/fuzzy_match/pairing.rb +12 -18
  36. data/lib/eco/data/fuzzy_match/result.rb +15 -1
  37. data/lib/eco/data/fuzzy_match/results.rb +18 -0
  38. data/lib/eco/data/fuzzy_match/score.rb +12 -7
  39. data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
  40. data/lib/eco/language/models/collection.rb +5 -2
  41. data/lib/eco/version.rb +1 -1
  42. metadata +64 -2
@@ -42,6 +42,39 @@ module Eco
42
42
  init_hashes
43
43
  end
44
44
 
45
+ # Updates the tag of the current tree
46
+ def tag=(value)
47
+ @tag = value
48
+ end
49
+
50
+ # @return [Eco::API::Organization::TagTree]
51
+ def dup
52
+ self.class.new(as_json)
53
+ end
54
+
55
+ # @return [Array] with the differences
56
+ def diff(tagtree, differences: {}, level: 0, **options)
57
+ require 'hashdiff'
58
+ Hashdiff.diff(self.as_json, tagtree.as_json, **options.slice(:array_path, :similarity, :use_lcs))
59
+ end
60
+
61
+ def top?
62
+ depth == -1
63
+ end
64
+
65
+ # @return [Array[Hash]] where `Hash` is a `node` `{"tag" => TAG, "nodes": Array[Hash]}`
66
+ def as_json
67
+ nodes_json = nodes.map {|node| node.as_json}
68
+ if top?
69
+ nodes_json
70
+ else
71
+ {
72
+ "tag" => tag,
73
+ "nodes" => nodes_json
74
+ }
75
+ end
76
+ end
77
+
45
78
  # @return [Boolean] `true` if there are tags in the node, `false` otherwise.
46
79
  def empty?
47
80
  @has_tags.empty?
@@ -66,6 +66,16 @@ module Eco
66
66
  @presets_factory ||= Eco::API::Organization::PresetsFactory.new(enviro: enviro)
67
67
  end
68
68
 
69
+ # @return [Eco::Data::Mapper] the mappings between the internal and external attribute/property names.
70
+ def fields_mapper
71
+ return @fields_mapper if instance_variable_defined?(:@fields_mapper)
72
+ mappings = []
73
+ if map_file = config.people.fields_mapper
74
+ mappings = map_file ? file_manager.load_json(map_file) : []
75
+ end
76
+ @fields_mapper = Eco::Data::Mapper.new(mappings)
77
+ end
78
+
69
79
  # Helper to obtain a EntryFactory
70
80
  # @param schema [String, Ecoportal::API::V1::PersonSchema] `schema` to which associate the EntryFactory,
71
81
  # where `String` can be the _name_ or the _id_ of the schema.
@@ -79,15 +89,11 @@ module Eco
79
89
  return @entry_factories[schema&.id]
80
90
  end
81
91
 
82
- mappings = []
83
- if map_file = config.people.fields_mapper
84
- mappings = map_file ? file_manager.load_json(map_file) : []
85
- end
86
92
  @entry_factories[schema&.id] = Eco::API::Common::People::EntryFactory.new(
87
93
  enviro,
88
94
  schema: schema,
89
95
  person_parser: config.people.parser,
90
- attr_map: Eco::Data::Mapper.new(mappings)
96
+ attr_map: fields_mapper
91
97
  )
92
98
  end
93
99
 
@@ -106,11 +112,13 @@ module Eco
106
112
  # @param attr [String] type (`Symbol`) or attribute (`String`) to target a specific parser.
107
113
  # @param source [Any] source value to be parsed.
108
114
  # @param phase [Symbol] the phase when this parser should be active.
109
- def parse_attribute(attr, source, phase = :internal)
115
+ # @param phase [Symbol] the phase when this parser should be active.
116
+ # @return [Object] the parsed attribute.
117
+ def parse_attribute(attr, source, phase = :internal, deps: {})
110
118
  unless parsers = entry_factory.person_parser
111
119
  raise "There are no parsers defined"
112
120
  end
113
- parsers.parse(attr, source, phase)
121
+ parsers.parse(attr, source, phase, deps: deps)
114
122
  end
115
123
 
116
124
  # @see Eco::API::Common::People::EntryFactory#export
@@ -137,7 +137,7 @@ module Eco
137
137
  rescue error_type => e
138
138
  raise unless retries_left > 0
139
139
  explanation = "Batch TimeOut. You have #{retries_left} retries left."
140
- prompt_user("Do you want to retry (y/N)?", explanation, default: "Y", timeout: 10) do |response|
140
+ prompt_user(" Do you want to retry (y/N)?", default: "Y", explanation: explanation, timeout: 10) do |response|
141
141
  if response.upcase.start_with?("Y")
142
142
  offer_retry_on(error_type, retries_left - 1, &block)
143
143
  else
@@ -164,12 +164,17 @@ module Eco
164
164
  # @return [Eco::API::Session::Batch::Status]
165
165
  def launch(simulate: false)
166
166
  pqueue = processed_queue
167
- @requests = pqueue.map {|e| as_update(e)}
167
+ @requests = as_update(pqueue)
168
168
  pre_checks(requests, simulate: simulate)
169
169
 
170
- unless simulate
170
+ if simulate
171
+ if options.dig(:requests, :backup)
172
+ req_backup = as_update(pqueue, add_feedback: false)
173
+ backup_update(req_backup, simulate: simulate)
174
+ end
175
+ else
171
176
  if pqueue.length > 0
172
- req_backup = pqueue.map {|e| as_update(e, add_feedback: false)}
177
+ req_backup = as_update(pqueue, add_feedback: false)
173
178
  backup_update(req_backup)
174
179
  session.batch.launch(pqueue, method: type).tap do |job_status|
175
180
  @status = job_status
@@ -220,13 +225,26 @@ module Eco
220
225
  end.join("\n")
221
226
  end
222
227
 
223
- def as_update(*args)
224
- feedback.as_update(*args)
228
+ def as_update(data, *args)
229
+ if data.is_a?(Array)
230
+ data.map do |e|
231
+ feedback.as_update(e, *args)
232
+ end.compact.select {|e| e && !e.empty?}
233
+ else
234
+ feedback.as_update(data, *args)
235
+ end
225
236
  end
226
237
 
227
238
  def processed_queue
228
239
  @queue.each {|e| @callbacks[e].call(e) if @callbacks.key?(e) }
229
- apply_policies(api_included(@queue)).select {|e| !as_update(e).empty?}
240
+ apply_policies(api_included(@queue)).select do |e|
241
+ !as_update(e).empty?
242
+ end.select do |e|
243
+ next true unless e.is_a?(Ecoportal::API::V1::Person)
244
+ next true unless e.new?
245
+ # new people should either have account or details
246
+ e.account || e.details
247
+ end
230
248
  end
231
249
 
232
250
  # if there is a config definition to exclude entries
@@ -235,7 +253,13 @@ module Eco
235
253
  def api_included(full_queue)
236
254
  return full_queue if type == :create
237
255
  return full_queue unless excluded = session.config.people.api_excluded
238
- full_queue.select {|entry| !excluded.call(entry, session, options, self)}
256
+ if options.dig(:include, :excluded, :only)
257
+ full_queue.select {|entry| excluded.call(entry, session, options, self)}
258
+ elsif options.dig(:include, :excluded)
259
+ full_queue
260
+ else
261
+ full_queue.select {|entry| !excluded.call(entry, session, options, self)}
262
+ end
239
263
  end
240
264
 
241
265
  # Applies the changes introduced by api policies
@@ -307,9 +331,10 @@ module Eco
307
331
  end
308
332
 
309
333
  # Keep a copy of the requests for future reference
310
- def backup_update(requests)
334
+ def backup_update(requests, simulate: false)
335
+ dry_run = simulate ? "_dry_run" : ""
311
336
  dir = config.people.requests_folder
312
- file = File.join(dir, "#{type}_data.json")
337
+ file = File.join(dir, "#{type}_data#{dry_run}.json")
313
338
  file_manager.save_json(requests, file, :timestamp)
314
339
  end
315
340
 
@@ -2,7 +2,7 @@ module Eco
2
2
  module API
3
3
  class UseCases
4
4
 
5
- class UnkownCase < Exception
5
+ class UnkownCase < StandardError
6
6
  def initialize(msg = nil, case_name: nil, type: nil)
7
7
  msg ||= "Unkown case"
8
8
  msg += ". Case name '#{case_name}'" if case_name
@@ -11,7 +11,7 @@ module Eco
11
11
  end
12
12
  end
13
13
 
14
- class AmbiguousCaseReference < Exception
14
+ class AmbiguousCaseReference < StandardError
15
15
  def initialize(msg = nil, case_name: nil)
16
16
  msg ||= "You must specify type when there are multiple cases with same name"
17
17
  msg += ". Case name '#{case_name}'" if case_name
@@ -4,7 +4,7 @@ module Eco
4
4
  # Core class of UseCases. It basically defines and manages allowed `types`
5
5
  class BaseCase
6
6
 
7
- class InvalidType < Exception
7
+ class InvalidType < StandardError
8
8
  def initialize(msg = nil, type:, types:)
9
9
  msg ||= "Invalid type."
10
10
  msg = "Given type '#{type}'. Valid types: #{types}"
@@ -13,7 +13,7 @@ module Eco
13
13
  end
14
14
 
15
15
  extend Eco::API::Common::ClassHelpers
16
-
16
+
17
17
  @types = [:import, :filter, :transform, :sync, :error_handler, :export, :other]
18
18
 
19
19
  class << self
@@ -5,6 +5,19 @@ module Eco
5
5
  class BaseIO < BaseCase
6
6
  @types = BaseCase.types
7
7
 
8
+ class MissingParameter < StandardError
9
+ attr_reader :type, :required, :given
10
+
11
+ def initialize(msg = nil, type: nil, required:, given:)
12
+ @type = type
13
+ @required = required
14
+ @given = given
15
+ msg += " of type '#{type}'" if type
16
+ msg += " requires an object '#{required}'. Given: #{given}."
17
+ super(msg)
18
+ end
19
+ end
20
+
8
21
  class << self
9
22
  def input_required?(type)
10
23
  !valid_type?(type) || [:import, :sync].include?(type)
@@ -80,13 +93,13 @@ module Eco
80
93
  def validate_args(input:, people:, session:, options:)
81
94
  case
82
95
  when !session.is_a?(Eco::API::Session)
83
- raise "A UseCase needs a Session object. Given: #{session}"
96
+ raise MissingParameter.new("UseCase", required: :session, given: session.class)
84
97
  when input_required? && !input
85
- raise "UseCase of type '#{type}' requires a valid input. None given"
98
+ raise MissingParameter.new("UseCase", type: type, required: :input, given: input.class)
86
99
  when people_required? && !people.is_a?(Eco::API::Organization::People)
87
- raise "UseCase of type '#{type}' requires a People object. Given: #{people}"
100
+ raise MissingParameter.new("UseCase", type: type, required: :people, given: people.class)
88
101
  when !options || (options && !options.is_a?(Hash))
89
- raise "To inject dependencies via ':options' it should be a Hash object. Given: #{options}"
102
+ raise MissingParameter.new("Use Case options", required: :Hash, given: options.class)
90
103
  end
91
104
  true
92
105
  end
@@ -13,6 +13,7 @@ require_relative 'default_cases/abstract_policygroup_abilities_case.rb'
13
13
  require_relative 'default_cases/analyse_people_case'
14
14
  require_relative 'default_cases/append_usergroups_case'
15
15
  require_relative 'default_cases/change_email_case'
16
+ require_relative 'default_cases/clean_unknown_tags_case'
16
17
  require_relative 'default_cases/codes_to_tags_case'
17
18
  require_relative 'default_cases/create_case'
18
19
  require_relative 'default_cases/create_details_case'
@@ -5,41 +5,158 @@ class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loader
5
5
  attr_reader :session, :people, :options
6
6
 
7
7
  def main(people, session, options, usecase)
8
+ options[:end_get] = false
8
9
  @session = session; @options = options; @people = people
9
10
 
10
- save!(analysis)
11
+ case
12
+ when case_options[:identify_duplicates]
13
+ identify_duplicates
14
+ when case_options[:identify_unnamed]
15
+ identify_unnamed
16
+ else
17
+ session.logger.info("No analysis operation was specified")
18
+ end.tap do |people_involved|
19
+ if people_involved
20
+ to_csv(people_involved) if to_csv?
21
+ create_people_backup(people_involved) if results_people_backup?
22
+ end
23
+ end
11
24
  end
12
25
 
13
26
  private
14
27
 
15
- def analysis
16
- analytics.attribute = :name
17
- analysed = analytics.analyse({
18
- threshold: 0.20,
19
- order: [:average, :dice]
20
- })
21
- analytics.re_analyse(analysed, threshold: 0.5, order: [:average])
28
+ def identify_unnamed
29
+ similarity_analytics.unnamed.tap do |unnamed|
30
+ if unnamed.empty?
31
+ session.logger.info("There were no people with no name!!")
32
+ end
33
+ end
22
34
  end
23
35
 
24
- def analytics
25
- @analytics ||= people.analytics
36
+ def identify_duplicates
37
+ analysed = similarity_screening
38
+ if case_options[:ignore_matching_words]
39
+ puts "Fine tune results by ignoring matching words..."
40
+ analysed = strict_similarity(analysed)
41
+ end
42
+
43
+ similarity_analytics.newSimilarity(analysed).tap do |related_people|
44
+ if related_people.empty?
45
+ session.logger.info("There were no possible duplicates identified!!")
46
+ else
47
+ report = similarity_analytics.report(analysed, format: :txt)
48
+ save!(report)
49
+ end
50
+ end
26
51
  end
27
52
 
28
- def file
29
- @file ||= options.dig(:output, :file) || "analytics.txt"
53
+ def strict_similarity(analysed)
54
+ similarity_analytics.ignore_matching_words(analysed, **{
55
+ threshold: 0.5,
56
+ order: [:ngrams]
57
+ })
30
58
  end
31
59
 
32
- def save!(analysed)
33
- if analysed.empty?
34
- session.logger.info("There were results identified!!")
35
- return
60
+ def similarity_screening
61
+ similarity_analytics.attribute = field_similarity
62
+ options = {
63
+ threshold: 0.4,
64
+ order: [:average, :dice]
65
+ }.tap do |opts|
66
+ opts.merge!(needle_read: facet_field_proc) if facet_field?
67
+ opts.merge!(unique_words: true) if unique_words?
68
+ end
69
+ analysed = similarity_analytics.analyse(**options)
70
+ puts "Got #{analysed.count} results after basic screening with #{options}"
71
+
72
+ return analysed if case_options[:only_screening]
73
+ options = {threshold: 0.5, order: [:average]}
74
+ puts "Going to rearrange results... with #{options}"
75
+ similarity_analytics.rearrange(analysed, **options).tap do |analysed|
76
+ puts "... got #{analysed.count} results after rearranging"
36
77
  end
78
+ end
79
+
80
+ def similarity_analytics
81
+ @analytics ||= people.similarity
82
+ end
83
+
84
+ def create_people_backup(cut = people, file = results_people_backup)
85
+ session.file_manager.save_json(cut, file)
86
+ end
87
+
88
+ def to_csv(data = people, file = csv_file)
89
+ opts = {}
90
+ opts.deep_merge!(export: {file: {name: file, format: :csv}})
91
+ opts.deep_merge!(export: {options: {nice_header: true}})
92
+ opts.deep_merge!(export: {options: {internal_names: true}})
93
+ #opts.deep_merge!(export: {options: {split_schemas: true}})
94
+ session.process_case("to-csv", type: :export, people: data, options: opts.merge(options.slice(:export)))
95
+ end
96
+
97
+ def unique_words?
98
+ case_options[:unique_words]
99
+ end
100
+
101
+ def field_similarity
102
+ return :name unless use_field?
103
+ use_field_proc
104
+ end
105
+
106
+ def use_field_proc
107
+ proc_value_access(use_field)
108
+ end
109
+
110
+ def facet_field_proc
111
+ proc_value_access(facet_field)
112
+ end
113
+
114
+ def use_field
115
+ case_options.dig(:use_field)
116
+ end
37
117
 
38
- ext = File.extname(file).downcase.delete(".")
118
+ def use_field?
119
+ !!use_field
120
+ end
121
+
122
+ def facet_field
123
+ case_options.dig(:facet_field)
124
+ end
125
+
126
+ def facet_field?
127
+ !!facet_field
128
+ end
129
+
130
+ def csv_file
131
+ case_options.dig(:csv_file)
132
+ end
133
+
134
+ def to_csv?
135
+ !!csv_file
136
+ end
137
+
138
+ def results_people_backup
139
+ case_options.dig(:backup_people)
140
+ end
141
+
142
+ def results_people_backup?
143
+ !!results_people_backup
144
+ end
145
+
146
+ def case_options
147
+ options.dig(:usecase, :analyse_people) || {}
148
+ end
39
149
 
40
- File.open(file, "w") do |fd|
150
+ def output_file
151
+ @output_file ||= options.dig(:output, :file) || "analytics.txt"
152
+ end
153
+
154
+ def save!(data)
155
+ ext = File.extname(output_file).downcase.delete(".")
156
+ session.logger.info("Generating file '#{output_file}'")
157
+ File.open(output_file, "w") do |fd|
41
158
  if ext == "txt"
42
- fd << analytics.analysis(analysed, format: :txt)
159
+ fd << data
43
160
  elsif ext == "html"
44
161
  puts "html is still not supported"
45
162
  exit(1)
@@ -50,4 +167,57 @@ class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loader
50
167
  end
51
168
  end
52
169
 
170
+ # A way to use command line to specify part
171
+ # => i.e. details[first-name] AND details[surname]
172
+ def proc_value_access(expression)
173
+ #return expression.to_sym if expression.start_with?(":")
174
+ subexpressions = expression.split(" AND ")
175
+ Proc.new do |person|
176
+ values = subexpressions.map {|exp| attribute_access(person, exp)}
177
+ values.compact.join(" ")
178
+ end
179
+ end
180
+
181
+ # A way to use command line to specify part
182
+ # => i.e. person.details[first-name]
183
+ def attribute_access(person, expression)
184
+ parts = expression.split(".")
185
+ parts_to_value(person, parts).tap do |value|
186
+ unless value.is_a?(String) || !value
187
+ raise "Something is wrong with #{expression} to parts #{parts}. Expecting String, obtained: #{value.class}"
188
+ end
189
+ end
190
+ end
191
+
192
+ def parts_to_value(obj, parts)
193
+ parts.reduce(obj) do |object, part|
194
+ get_attr(object, part)
195
+ end
196
+ end
197
+
198
+ def get_attr(obj, part)
199
+ case
200
+ when !obj
201
+ nil
202
+ when part.is_a?(Symbol) || obj.respond_to?(part.to_sym)
203
+ obj.send(part.to_sym)
204
+ when part.start_with?(":")
205
+ get_attr(obj, part[1..-1])
206
+ when part.start_with?("details[")
207
+ if (obj.respond_to?(:details)) && details = obj.details
208
+ if match = part.match(/details\[(?<field>.*)\]/)
209
+ details[match[:field]]
210
+ else
211
+ raise "Review your -use-field expression. It should read: person.details[target-alt_id]"
212
+ end
213
+ end
214
+ when part.start_with?("account")
215
+ obj.account if obj.respond_to?(:account)
216
+ when part.start_with?("person")
217
+ obj
218
+ else
219
+ raise "Review your expression. Cannot recognize '#{part}' as part of '#{obj.class}'"
220
+ end
221
+ end
222
+
53
223
  end