eco-helpers 2.0.19 → 2.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +77 -1
  3. data/eco-helpers.gemspec +4 -1
  4. data/lib/eco/api/common/base_loader.rb +9 -5
  5. data/lib/eco/api/common/loaders/parser.rb +1 -0
  6. data/lib/eco/api/common/people/default_parsers.rb +1 -0
  7. data/lib/eco/api/common/people/default_parsers/xls_parser.rb +53 -0
  8. data/lib/eco/api/common/people/entries.rb +1 -0
  9. data/lib/eco/api/common/people/entry_factory.rb +64 -16
  10. data/lib/eco/api/common/people/person_parser.rb +1 -1
  11. data/lib/eco/api/common/version_patches/exception.rb +5 -2
  12. data/lib/eco/api/organization/people.rb +8 -2
  13. data/lib/eco/api/organization/people_similarity.rb +171 -11
  14. data/lib/eco/api/organization/tag_tree.rb +33 -0
  15. data/lib/eco/api/session.rb +15 -7
  16. data/lib/eco/api/session/batch.rb +1 -1
  17. data/lib/eco/api/session/batch/job.rb +34 -9
  18. data/lib/eco/api/usecases.rb +2 -2
  19. data/lib/eco/api/usecases/base_case.rb +2 -2
  20. data/lib/eco/api/usecases/base_io.rb +17 -4
  21. data/lib/eco/api/usecases/default_cases.rb +1 -0
  22. data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +189 -19
  23. data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
  24. data/lib/eco/api/usecases/default_cases/hris_case.rb +20 -0
  25. data/lib/eco/cli/config/default/input.rb +61 -8
  26. data/lib/eco/cli/config/default/options.rb +46 -2
  27. data/lib/eco/cli/config/default/people.rb +18 -24
  28. data/lib/eco/cli/config/default/usecases.rb +31 -2
  29. data/lib/eco/cli/config/default/workflow.rb +8 -6
  30. data/lib/eco/cli/scripting/args_helpers.rb +2 -2
  31. data/lib/eco/csv/table.rb +121 -21
  32. data/lib/eco/data/fuzzy_match.rb +52 -12
  33. data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
  34. data/lib/eco/data/fuzzy_match/ngrams_score.rb +13 -9
  35. data/lib/eco/data/fuzzy_match/pairing.rb +12 -18
  36. data/lib/eco/data/fuzzy_match/result.rb +15 -1
  37. data/lib/eco/data/fuzzy_match/results.rb +18 -0
  38. data/lib/eco/data/fuzzy_match/score.rb +12 -7
  39. data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
  40. data/lib/eco/language/models/collection.rb +5 -2
  41. data/lib/eco/version.rb +1 -1
  42. metadata +64 -2
@@ -42,6 +42,39 @@ module Eco
42
42
  init_hashes
43
43
  end
44
44
 
45
+ # Updates the tag of the current tree
46
+ def tag=(value)
47
+ @tag = value
48
+ end
49
+
50
+ # @return [Eco::API::Organization::TagTree]
51
+ def dup
52
+ self.class.new(as_json)
53
+ end
54
+
55
+ # @return [Array] with the differences
56
+ def diff(tagtree, differences: {}, level: 0, **options)
57
+ require 'hashdiff'
58
+ Hashdiff.diff(self.as_json, tagtree.as_json, **options.slice(:array_path, :similarity, :use_lcs))
59
+ end
60
+
61
+ def top?
62
+ depth == -1
63
+ end
64
+
65
+ # @return [Array[Hash]] where `Hash` is a `node` `{"tag" => TAG, "nodes": Array[Hash]}`
66
+ def as_json
67
+ nodes_json = nodes.map {|node| node.as_json}
68
+ if top?
69
+ nodes_json
70
+ else
71
+ {
72
+ "tag" => tag,
73
+ "nodes" => nodes_json
74
+ }
75
+ end
76
+ end
77
+
45
78
  # @return [Boolean] `true` if there are tags in the node, `false` otherwise.
46
79
  def empty?
47
80
  @has_tags.empty?
@@ -66,6 +66,16 @@ module Eco
66
66
  @presets_factory ||= Eco::API::Organization::PresetsFactory.new(enviro: enviro)
67
67
  end
68
68
 
69
+ # @return [Eco::Data::Mapper] the mappings between the internal and external attribute/property names.
70
+ def fields_mapper
71
+ return @fields_mapper if instance_variable_defined?(:@fields_mapper)
72
+ mappings = []
73
+ if map_file = config.people.fields_mapper
74
+ mappings = map_file ? file_manager.load_json(map_file) : []
75
+ end
76
+ @fields_mapper = Eco::Data::Mapper.new(mappings)
77
+ end
78
+
69
79
  # Helper to obtain a EntryFactory
70
80
  # @param schema [String, Ecoportal::API::V1::PersonSchema] `schema` to which associate the EntryFactory,
71
81
  # where `String` can be the _name_ or the _id_ of the schema.
@@ -79,15 +89,11 @@ module Eco
79
89
  return @entry_factories[schema&.id]
80
90
  end
81
91
 
82
- mappings = []
83
- if map_file = config.people.fields_mapper
84
- mappings = map_file ? file_manager.load_json(map_file) : []
85
- end
86
92
  @entry_factories[schema&.id] = Eco::API::Common::People::EntryFactory.new(
87
93
  enviro,
88
94
  schema: schema,
89
95
  person_parser: config.people.parser,
90
- attr_map: Eco::Data::Mapper.new(mappings)
96
+ attr_map: fields_mapper
91
97
  )
92
98
  end
93
99
 
@@ -106,11 +112,13 @@ module Eco
106
112
  # @param attr [String] type (`Symbol`) or attribute (`String`) to target a specific parser.
107
113
  # @param source [Any] source value to be parsed.
108
114
  # @param phase [Symbol] the phase when this parser should be active.
109
- def parse_attribute(attr, source, phase = :internal)
115
+ # @param phase [Symbol] the phase when this parser should be active.
116
+ # @return [Object] the parsed attribute.
117
+ def parse_attribute(attr, source, phase = :internal, deps: {})
110
118
  unless parsers = entry_factory.person_parser
111
119
  raise "There are no parsers defined"
112
120
  end
113
- parsers.parse(attr, source, phase)
121
+ parsers.parse(attr, source, phase, deps: deps)
114
122
  end
115
123
 
116
124
  # @see Eco::API::Common::People::EntryFactory#export
@@ -137,7 +137,7 @@ module Eco
137
137
  rescue error_type => e
138
138
  raise unless retries_left > 0
139
139
  explanation = "Batch TimeOut. You have #{retries_left} retries left."
140
- prompt_user("Do you want to retry (y/N)?", explanation, default: "Y", timeout: 10) do |response|
140
+ prompt_user(" Do you want to retry (y/N)?", default: "Y", explanation: explanation, timeout: 10) do |response|
141
141
  if response.upcase.start_with?("Y")
142
142
  offer_retry_on(error_type, retries_left - 1, &block)
143
143
  else
@@ -164,12 +164,17 @@ module Eco
164
164
  # @return [Eco::API::Session::Batch::Status]
165
165
  def launch(simulate: false)
166
166
  pqueue = processed_queue
167
- @requests = pqueue.map {|e| as_update(e)}
167
+ @requests = as_update(pqueue)
168
168
  pre_checks(requests, simulate: simulate)
169
169
 
170
- unless simulate
170
+ if simulate
171
+ if options.dig(:requests, :backup)
172
+ req_backup = as_update(pqueue, add_feedback: false)
173
+ backup_update(req_backup, simulate: simulate)
174
+ end
175
+ else
171
176
  if pqueue.length > 0
172
- req_backup = pqueue.map {|e| as_update(e, add_feedback: false)}
177
+ req_backup = as_update(pqueue, add_feedback: false)
173
178
  backup_update(req_backup)
174
179
  session.batch.launch(pqueue, method: type).tap do |job_status|
175
180
  @status = job_status
@@ -220,13 +225,26 @@ module Eco
220
225
  end.join("\n")
221
226
  end
222
227
 
223
- def as_update(*args)
224
- feedback.as_update(*args)
228
+ def as_update(data, *args)
229
+ if data.is_a?(Array)
230
+ data.map do |e|
231
+ feedback.as_update(e, *args)
232
+ end.compact.select {|e| e && !e.empty?}
233
+ else
234
+ feedback.as_update(data, *args)
235
+ end
225
236
  end
226
237
 
227
238
  def processed_queue
228
239
  @queue.each {|e| @callbacks[e].call(e) if @callbacks.key?(e) }
229
- apply_policies(api_included(@queue)).select {|e| !as_update(e).empty?}
240
+ apply_policies(api_included(@queue)).select do |e|
241
+ !as_update(e).empty?
242
+ end.select do |e|
243
+ next true unless e.is_a?(Ecoportal::API::V1::Person)
244
+ next true unless e.new?
245
+ # new people should either have account or details
246
+ e.account || e.details
247
+ end
230
248
  end
231
249
 
232
250
  # if there is a config definition to exclude entries
@@ -235,7 +253,13 @@ module Eco
235
253
  def api_included(full_queue)
236
254
  return full_queue if type == :create
237
255
  return full_queue unless excluded = session.config.people.api_excluded
238
- full_queue.select {|entry| !excluded.call(entry, session, options, self)}
256
+ if options.dig(:include, :excluded, :only)
257
+ full_queue.select {|entry| excluded.call(entry, session, options, self)}
258
+ elsif options.dig(:include, :excluded)
259
+ full_queue
260
+ else
261
+ full_queue.select {|entry| !excluded.call(entry, session, options, self)}
262
+ end
239
263
  end
240
264
 
241
265
  # Applies the changes introduced by api policies
@@ -307,9 +331,10 @@ module Eco
307
331
  end
308
332
 
309
333
  # Keep a copy of the requests for future reference
310
- def backup_update(requests)
334
+ def backup_update(requests, simulate: false)
335
+ dry_run = simulate ? "_dry_run" : ""
311
336
  dir = config.people.requests_folder
312
- file = File.join(dir, "#{type}_data.json")
337
+ file = File.join(dir, "#{type}_data#{dry_run}.json")
313
338
  file_manager.save_json(requests, file, :timestamp)
314
339
  end
315
340
 
@@ -2,7 +2,7 @@ module Eco
2
2
  module API
3
3
  class UseCases
4
4
 
5
- class UnkownCase < Exception
5
+ class UnkownCase < StandardError
6
6
  def initialize(msg = nil, case_name: nil, type: nil)
7
7
  msg ||= "Unkown case"
8
8
  msg += ". Case name '#{case_name}'" if case_name
@@ -11,7 +11,7 @@ module Eco
11
11
  end
12
12
  end
13
13
 
14
- class AmbiguousCaseReference < Exception
14
+ class AmbiguousCaseReference < StandardError
15
15
  def initialize(msg = nil, case_name: nil)
16
16
  msg ||= "You must specify type when there are multiple cases with same name"
17
17
  msg += ". Case name '#{case_name}'" if case_name
@@ -4,7 +4,7 @@ module Eco
4
4
  # Core class of UseCases. It basically defines and manages allowed `types`
5
5
  class BaseCase
6
6
 
7
- class InvalidType < Exception
7
+ class InvalidType < StandardError
8
8
  def initialize(msg = nil, type:, types:)
9
9
  msg ||= "Invalid type."
10
10
  msg = "Given type '#{type}'. Valid types: #{types}"
@@ -13,7 +13,7 @@ module Eco
13
13
  end
14
14
 
15
15
  extend Eco::API::Common::ClassHelpers
16
-
16
+
17
17
  @types = [:import, :filter, :transform, :sync, :error_handler, :export, :other]
18
18
 
19
19
  class << self
@@ -5,6 +5,19 @@ module Eco
5
5
  class BaseIO < BaseCase
6
6
  @types = BaseCase.types
7
7
 
8
+ class MissingParameter < StandardError
9
+ attr_reader :type, :required, :given
10
+
11
+ def initialize(msg = nil, type: nil, required:, given:)
12
+ @type = type
13
+ @required = required
14
+ @given = given
15
+ msg += " of type '#{type}'" if type
16
+ msg += " requires an object '#{required}'. Given: #{given}."
17
+ super(msg)
18
+ end
19
+ end
20
+
8
21
  class << self
9
22
  def input_required?(type)
10
23
  !valid_type?(type) || [:import, :sync].include?(type)
@@ -80,13 +93,13 @@ module Eco
80
93
  def validate_args(input:, people:, session:, options:)
81
94
  case
82
95
  when !session.is_a?(Eco::API::Session)
83
- raise "A UseCase needs a Session object. Given: #{session}"
96
+ raise MissingParameter.new("UseCase", required: :session, given: session.class)
84
97
  when input_required? && !input
85
- raise "UseCase of type '#{type}' requires a valid input. None given"
98
+ raise MissingParameter.new("UseCase", type: type, required: :input, given: input.class)
86
99
  when people_required? && !people.is_a?(Eco::API::Organization::People)
87
- raise "UseCase of type '#{type}' requires a People object. Given: #{people}"
100
+ raise MissingParameter.new("UseCase", type: type, required: :people, given: people.class)
88
101
  when !options || (options && !options.is_a?(Hash))
89
- raise "To inject dependencies via ':options' it should be a Hash object. Given: #{options}"
102
+ raise MissingParameter.new("Use Case options", required: :Hash, given: options.class)
90
103
  end
91
104
  true
92
105
  end
@@ -13,6 +13,7 @@ require_relative 'default_cases/abstract_policygroup_abilities_case.rb'
13
13
  require_relative 'default_cases/analyse_people_case'
14
14
  require_relative 'default_cases/append_usergroups_case'
15
15
  require_relative 'default_cases/change_email_case'
16
+ require_relative 'default_cases/clean_unknown_tags_case'
16
17
  require_relative 'default_cases/codes_to_tags_case'
17
18
  require_relative 'default_cases/create_case'
18
19
  require_relative 'default_cases/create_details_case'
@@ -5,41 +5,158 @@ class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loader
5
5
  attr_reader :session, :people, :options
6
6
 
7
7
  def main(people, session, options, usecase)
8
+ options[:end_get] = false
8
9
  @session = session; @options = options; @people = people
9
10
 
10
- save!(analysis)
11
+ case
12
+ when case_options[:identify_duplicates]
13
+ identify_duplicates
14
+ when case_options[:identify_unnamed]
15
+ identify_unnamed
16
+ else
17
+ session.logger.info("No analysis operation was specified")
18
+ end.tap do |people_involved|
19
+ if people_involved
20
+ to_csv(people_involved) if to_csv?
21
+ create_people_backup(people_involved) if results_people_backup?
22
+ end
23
+ end
11
24
  end
12
25
 
13
26
  private
14
27
 
15
- def analysis
16
- analytics.attribute = :name
17
- analysed = analytics.analyse({
18
- threshold: 0.20,
19
- order: [:average, :dice]
20
- })
21
- analytics.re_analyse(analysed, threshold: 0.5, order: [:average])
28
+ def identify_unnamed
29
+ similarity_analytics.unnamed.tap do |unnamed|
30
+ if unnamed.empty?
31
+ session.logger.info("There were no people with no name!!")
32
+ end
33
+ end
22
34
  end
23
35
 
24
- def analytics
25
- @analytics ||= people.analytics
36
+ def identify_duplicates
37
+ analysed = similarity_screening
38
+ if case_options[:ignore_matching_words]
39
+ puts "Fine tune results by ignoring matching words..."
40
+ analysed = strict_similarity(analysed)
41
+ end
42
+
43
+ similarity_analytics.newSimilarity(analysed).tap do |related_people|
44
+ if related_people.empty?
45
+ session.logger.info("There were no possible duplicates identified!!")
46
+ else
47
+ report = similarity_analytics.report(analysed, format: :txt)
48
+ save!(report)
49
+ end
50
+ end
26
51
  end
27
52
 
28
- def file
29
- @file ||= options.dig(:output, :file) || "analytics.txt"
53
+ def strict_similarity(analysed)
54
+ similarity_analytics.ignore_matching_words(analysed, **{
55
+ threshold: 0.5,
56
+ order: [:ngrams]
57
+ })
30
58
  end
31
59
 
32
- def save!(analysed)
33
- if analysed.empty?
34
- session.logger.info("There were results identified!!")
35
- return
60
+ def similarity_screening
61
+ similarity_analytics.attribute = field_similarity
62
+ options = {
63
+ threshold: 0.4,
64
+ order: [:average, :dice]
65
+ }.tap do |opts|
66
+ opts.merge!(needle_read: facet_field_proc) if facet_field?
67
+ opts.merge!(unique_words: true) if unique_words?
68
+ end
69
+ analysed = similarity_analytics.analyse(**options)
70
+ puts "Got #{analysed.count} results after basic screening with #{options}"
71
+
72
+ return analysed if case_options[:only_screening]
73
+ options = {threshold: 0.5, order: [:average]}
74
+ puts "Going to rearrange results... with #{options}"
75
+ similarity_analytics.rearrange(analysed, **options).tap do |analysed|
76
+ puts "... got #{analysed.count} results after rearranging"
36
77
  end
78
+ end
79
+
80
+ def similarity_analytics
81
+ @analytics ||= people.similarity
82
+ end
83
+
84
+ def create_people_backup(cut = people, file = results_people_backup)
85
+ session.file_manager.save_json(cut, file)
86
+ end
87
+
88
+ def to_csv(data = people, file = csv_file)
89
+ opts = {}
90
+ opts.deep_merge!(export: {file: {name: file, format: :csv}})
91
+ opts.deep_merge!(export: {options: {nice_header: true}})
92
+ opts.deep_merge!(export: {options: {internal_names: true}})
93
+ #opts.deep_merge!(export: {options: {split_schemas: true}})
94
+ session.process_case("to-csv", type: :export, people: data, options: opts.merge(options.slice(:export)))
95
+ end
96
+
97
+ def unique_words?
98
+ case_options[:unique_words]
99
+ end
100
+
101
+ def field_similarity
102
+ return :name unless use_field?
103
+ use_field_proc
104
+ end
105
+
106
+ def use_field_proc
107
+ proc_value_access(use_field)
108
+ end
109
+
110
+ def facet_field_proc
111
+ proc_value_access(facet_field)
112
+ end
113
+
114
+ def use_field
115
+ case_options.dig(:use_field)
116
+ end
37
117
 
38
- ext = File.extname(file).downcase.delete(".")
118
+ def use_field?
119
+ !!use_field
120
+ end
121
+
122
+ def facet_field
123
+ case_options.dig(:facet_field)
124
+ end
125
+
126
+ def facet_field?
127
+ !!facet_field
128
+ end
129
+
130
+ def csv_file
131
+ case_options.dig(:csv_file)
132
+ end
133
+
134
+ def to_csv?
135
+ !!csv_file
136
+ end
137
+
138
+ def results_people_backup
139
+ case_options.dig(:backup_people)
140
+ end
141
+
142
+ def results_people_backup?
143
+ !!results_people_backup
144
+ end
145
+
146
+ def case_options
147
+ options.dig(:usecase, :analyse_people) || {}
148
+ end
39
149
 
40
- File.open(file, "w") do |fd|
150
+ def output_file
151
+ @output_file ||= options.dig(:output, :file) || "analytics.txt"
152
+ end
153
+
154
+ def save!(data)
155
+ ext = File.extname(output_file).downcase.delete(".")
156
+ session.logger.info("Generating file '#{output_file}'")
157
+ File.open(output_file, "w") do |fd|
41
158
  if ext == "txt"
42
- fd << analytics.analysis(analysed, format: :txt)
159
+ fd << data
43
160
  elsif ext == "html"
44
161
  puts "html is still not supported"
45
162
  exit(1)
@@ -50,4 +167,57 @@ class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loader
50
167
  end
51
168
  end
52
169
 
170
+ # A way to use command line to specify part
171
+ # => i.e. details[first-name] AND details[surname]
172
+ def proc_value_access(expression)
173
+ #return expression.to_sym if expression.start_with?(":")
174
+ subexpressions = expression.split(" AND ")
175
+ Proc.new do |person|
176
+ values = subexpressions.map {|exp| attribute_access(person, exp)}
177
+ values.compact.join(" ")
178
+ end
179
+ end
180
+
181
+ # A way to use command line to specify part
182
+ # => i.e. person.details[first-name]
183
+ def attribute_access(person, expression)
184
+ parts = expression.split(".")
185
+ parts_to_value(person, parts).tap do |value|
186
+ unless value.is_a?(String) || !value
187
+ raise "Something is wrong with #{expression} to parts #{parts}. Expecting String, obtained: #{value.class}"
188
+ end
189
+ end
190
+ end
191
+
192
+ def parts_to_value(obj, parts)
193
+ parts.reduce(obj) do |object, part|
194
+ get_attr(object, part)
195
+ end
196
+ end
197
+
198
+ def get_attr(obj, part)
199
+ case
200
+ when !obj
201
+ nil
202
+ when part.is_a?(Symbol) || obj.respond_to?(part.to_sym)
203
+ obj.send(part.to_sym)
204
+ when part.start_with?(":")
205
+ get_attr(obj, part[1..-1])
206
+ when part.start_with?("details[")
207
+ if (obj.respond_to?(:details)) && details = obj.details
208
+ if match = part.match(/details\[(?<field>.*)\]/)
209
+ details[match[:field]]
210
+ else
211
+ raise "Review your -use-field expression. It should read: person.details[target-alt_id]"
212
+ end
213
+ end
214
+ when part.start_with?("account")
215
+ obj.account if obj.respond_to?(:account)
216
+ when part.start_with?("person")
217
+ obj
218
+ else
219
+ raise "Review your expression. Cannot recognize '#{part}' as part of '#{obj.class}'"
220
+ end
221
+ end
222
+
53
223
  end