eco-helpers 2.0.18 → 2.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +14 -1
  3. data/lib/eco/api/common/people/entry_factory.rb +26 -9
  4. data/lib/eco/api/common/people/person_entry.rb +1 -0
  5. data/lib/eco/api/common/session.rb +1 -0
  6. data/lib/eco/api/common/session/base_session.rb +2 -0
  7. data/lib/eco/api/common/session/helpers.rb +30 -0
  8. data/lib/eco/api/common/session/helpers/prompt_user.rb +34 -0
  9. data/lib/eco/api/common/version_patches/ecoportal_api/external_person.rb +1 -1
  10. data/lib/eco/api/common/version_patches/ecoportal_api/internal_person.rb +7 -4
  11. data/lib/eco/api/microcases/with_each.rb +67 -6
  12. data/lib/eco/api/microcases/with_each_present.rb +4 -2
  13. data/lib/eco/api/microcases/with_each_starter.rb +4 -2
  14. data/lib/eco/api/organization.rb +1 -1
  15. data/lib/eco/api/organization/people.rb +92 -23
  16. data/lib/eco/api/organization/people_similarity.rb +112 -0
  17. data/lib/eco/api/organization/person_schemas.rb +5 -1
  18. data/lib/eco/api/organization/policy_groups.rb +5 -1
  19. data/lib/eco/api/session.rb +5 -2
  20. data/lib/eco/api/session/batch.rb +7 -5
  21. data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +12 -35
  22. data/lib/eco/api/usecases/default_cases/to_csv_case.rb +81 -36
  23. data/lib/eco/api/usecases/default_cases/to_csv_detailed_case.rb +3 -4
  24. data/lib/eco/api/usecases/ooze_samples/ooze_update_case.rb +3 -2
  25. data/lib/eco/cli/config/default/options.rb +2 -1
  26. data/lib/eco/cli/config/default/usecases.rb +2 -0
  27. data/lib/eco/cli/config/default/workflow.rb +4 -1
  28. data/lib/eco/csv.rb +4 -2
  29. data/lib/eco/data/fuzzy_match.rb +63 -21
  30. data/lib/eco/data/fuzzy_match/ngrams_score.rb +7 -2
  31. data/lib/eco/data/fuzzy_match/pairing.rb +0 -1
  32. data/lib/eco/data/fuzzy_match/result.rb +7 -1
  33. data/lib/eco/data/fuzzy_match/results.rb +12 -6
  34. data/lib/eco/version.rb +1 -1
  35. metadata +4 -2
  36. data/lib/eco/api/organization/people_analytics.rb +0 -60
@@ -0,0 +1,112 @@
1
+ module Eco
2
+ module API
3
+ module Organization
4
+
5
+ # Class to find out duplicates in the People Manager
6
+ #
7
+ # @attr_writer attribute [String, Proc, nil] the target attribute to be read.
8
+ class PeopleSimilarity < Eco::API::Organization::People
9
+ include Eco::Data::FuzzyMatch
10
+
11
+ attr_accessor :attribute
12
+
13
+ # @!group Config
14
+ # @return [String, Proc, nil] the target attribute to be read.
15
+ def attribute=(attr)
16
+ @attribute ||= "name"
17
+ end
18
+
19
+ # Define the order or relevant of per user matches
20
+ # @param values[Array<Symbol>] the algorithms' results it should be ordered by
21
+ # * Possible values: `:dice`, `:levenshtein`, `:jaro_winkler`, `:ngrams`, `:words_ngrams`, `:chars_position`
22
+ def order=(values)
23
+ @order = values
24
+ end
25
+
26
+ def order
27
+ @order ||= [:words_ngrams, :dice]
28
+ end
29
+
30
+ # Define the order or relevant of per user matches
31
+ # @param value [Float] the threshold that all of the algorithms should comply with
32
+ def threshold=(value)
33
+ @threshold = value
34
+ end
35
+
36
+ def threshold
37
+ @threshold ||= 0.15
38
+ end
39
+
40
+ # @!endgroup
41
+
42
+ # @!group Searchers
43
+
44
+ # It gathers those that have the same `email`
45
+ # @return [Hash] where `keys` are `email`s and `values` an `Array<Person>`
46
+ def repeated_emails
47
+ init_caches
48
+ @by_email.select do |email, people|
49
+ people.count > 1
50
+ end
51
+ end
52
+
53
+ # @!endgroup
54
+
55
+ # @!group Analysers
56
+
57
+ # Analyses People bases on `options`
58
+ # @return [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
59
+ def analyse(**options)
60
+ options = { read: self.attribute }.merge(options)
61
+ each_with_object({}) do |person, results|
62
+ results[person.id] = find_all_with_score(person, **options)
63
+ end
64
+ end
65
+
66
+ # Launches a reanalyis on `analysed` based on `options`
67
+ # @param analysed [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
68
+ def re_analyse(analysed, **options)
69
+ analysed.each_with_object({}) do |(id, results), out|
70
+ out[id] = results.relevant_results(**options)
71
+ end
72
+ end
73
+
74
+ # @!group Helpers
75
+
76
+ # @return [String] well structured text
77
+ def analysis(analysed, format: :txt)
78
+ case
79
+ when format == :txt
80
+ analysed.each_with_object("") do |(id, results), out|
81
+ msg = results.results.map {|r| r.print}.join("\n ")
82
+ "'#{self[id].identify}':\n " + msg
83
+ end
84
+ end
85
+ end
86
+
87
+ # @note
88
+ # 1. Unless `:analysed` is provided, it launches an analysis cutting with Jaro Winker min 0.5
89
+ # 2. It then re-sorts and cuts based on `options`
90
+ # @return [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
91
+ def print_analysis(**options)
92
+ analysed = options[:analysed] || results_with_false_positives.analyse(**options)
93
+ analysed.each_with_object({}) do |(id, results), out|
94
+ puts analysis(analysed)
95
+ end
96
+ end
97
+ # @!endgroup
98
+
99
+ protected
100
+
101
+ def on_change
102
+ remove_instance_variable(@fuzzy_match)
103
+ super
104
+ end
105
+
106
+ private
107
+
108
+
109
+ end
110
+ end
111
+ end
112
+ end
@@ -28,7 +28,11 @@ module Eco
28
28
  end
29
29
 
30
30
  def schema(id_name)
31
- @by_id.fetch(schema_id(id_name), nil)
31
+ self[id_name]
32
+ end
33
+
34
+ def [](id_name)
35
+ @by_id[schema_id(id_name)]
32
36
  end
33
37
 
34
38
  private
@@ -44,7 +44,11 @@ module Eco
44
44
  end
45
45
 
46
46
  def policy_group(id_name)
47
- @by_id.fetch(policy_group_id(id_name), nil)
47
+ self[id_name]
48
+ end
49
+
50
+ def [](id_name)
51
+ @by_id[policy_group_id(id_name)]
48
52
  end
49
53
 
50
54
  def user_pg_ids(initial: [], final: [], non_custom: (non_custom_not_used = true; []), preserve_custom: true)
@@ -74,12 +74,15 @@ module Eco
74
74
  def entry_factory(schema: nil)
75
75
  schema = to_schema(schema) || self.schema
76
76
  return @entry_factories[schema&.id] if @entry_factories.key?(schema&.id)
77
+ unless @entry_factories.empty?
78
+ @entry_factories[schema&.id] = @entry_factories.values.first.newFactory(schema: schema)
79
+ return @entry_factories[schema&.id]
80
+ end
77
81
 
78
82
  mappings = []
79
83
  if map_file = config.people.fields_mapper
80
84
  mappings = map_file ? file_manager.load_json(map_file) : []
81
85
  end
82
-
83
86
  @entry_factories[schema&.id] = Eco::API::Common::People::EntryFactory.new(
84
87
  enviro,
85
88
  schema: schema,
@@ -127,7 +130,7 @@ module Eco
127
130
  # @see Eco::API::Common::People::EntryFactory#new
128
131
  # @return [Eco::API::Common::People::PersonEntry] parsed entry.
129
132
  def new_entry(data, dependencies: {})
130
- entry_factory.new(data, dependencies: dependencies)
133
+ entry_factory(schema: data&.details&.schema_id).new(data, dependencies: dependencies)
131
134
  end
132
135
 
133
136
  # @see Eco::API::Common::People::EntryFactory#entries
@@ -136,11 +136,13 @@ module Eco
136
136
  block.call
137
137
  rescue error_type => e
138
138
  raise unless retries_left > 0
139
- print "Batch TimeOut. You have #{retries_left} retries left. Do you want to retry (y/N)? "
140
- if (res = STDIN.gets.chomp) && res[0].downcase == "y"
141
- offer_retry_on(error_type, retries_left - 1, &block)
142
- else
143
- raise
139
+ explanation = "Batch TimeOut. You have #{retries_left} retries left."
140
+ prompt_user("Do you want to retry (y/N)?", explanation, default: "Y", timeout: 10) do |response|
141
+ if response.upcase.start_with?("Y")
142
+ offer_retry_on(error_type, retries_left - 1, &block)
143
+ else
144
+ raise
145
+ end
144
146
  end
145
147
  end
146
148
  end
@@ -7,14 +7,18 @@ class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loader
7
7
  def main(people, session, options, usecase)
8
8
  @session = session; @options = options; @people = people
9
9
 
10
- save!(cyclic_sets)
10
+ save!(analysis)
11
11
  end
12
12
 
13
13
  private
14
14
 
15
- def identify_double_ups
16
- analytics.similarity
17
-
15
+ def analysis
16
+ analytics.attribute = :name
17
+ analysed = analytics.analyse({
18
+ threshold: 0.20,
19
+ order: [:average, :dice]
20
+ })
21
+ analytics.re_analyse(analysed, threshold: 0.5, order: [:average])
18
22
  end
19
23
 
20
24
  def analytics
@@ -25,9 +29,9 @@ class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loader
25
29
  @file ||= options.dig(:output, :file) || "analytics.txt"
26
30
  end
27
31
 
28
- def save!(data)
29
- if data.empty?
30
- session.logger.info("There were no cyclic supervisors identified!!")
32
+ def save!(analysed)
33
+ if analysed.empty?
34
+ session.logger.info("There were results identified!!")
31
35
  return
32
36
  end
33
37
 
@@ -35,42 +39,15 @@ class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loader
35
39
 
36
40
  File.open(file, "w") do |fd|
37
41
  if ext == "txt"
38
- create_file(data, file: file, format: :txt)
42
+ fd << analytics.analysis(analysed, format: :txt)
39
43
  elsif ext == "html"
40
44
  puts "html is still not supported"
41
45
  exit(1)
42
- create_file(data, file: file, format: :html)
43
46
  elsif ext == "json"
44
47
  puts "json is still not supported"
45
48
  exit(1)
46
- create_file(data, file: file, format: :json)
47
49
  end
48
50
  end
49
51
  end
50
52
 
51
- def create_file(sets, file:, format: :txt)
52
- File.open(file, "w") do |fd|
53
- fd << sets_to_str(sets, format: format)
54
- end
55
- puts "Generated file #{file}"
56
- end
57
-
58
- def sets_to_str(sets, format: :txt)
59
- raise "Required Array. Given: #{sets.class}" unless sets.is_a?(Array)
60
- "".tap do |str|
61
- sets.each do |set|
62
- str << set_to_str(set, format: format)
63
- end
64
- end
65
- end
66
-
67
- def set_to_str(set, lev: 0, format: :txt)
68
- raise "Required Array. Given: #{set.class}" unless set.is_a?(Array)
69
- "".tap do |str|
70
- entry = set.shift
71
- str << "#{" " * lev}#{(lev > 0)? "+-#{lev}- " : ""}#{entry.name} (#{entry.external_id}|#{entry.email}|#{entry.id})\n"
72
- str << set_to_str(set, lev: lev + 1, format: format) unless !set || set.empty?
73
- end
74
- end
75
-
76
53
  end
@@ -13,28 +13,42 @@ class Eco::API::UseCases::DefaultCases::ToCsvCase < Eco::API::Common::Loaders::U
13
13
  return false
14
14
  end
15
15
 
16
- session.logger.info("going to create file: #{file}")
17
- header = spot_header
18
-
19
- CSV.open(file, "w") do |csv|
20
- csv << header
21
- people.each do |person|
22
- csv << to_row(person)
16
+ if options.dig(:export, :options, :split_schemas)
17
+ by_schema.each do |id, people|
18
+ sch_name = schemas.to_name(id)
19
+ prefix = sch_name ? sch_name.gsub(" ", "_") : "No_Schema"
20
+ create_file!("#{prefix}_#{file}", people)
23
21
  end
22
+ else
23
+ create_file!(file, people)
24
24
  end
25
25
  end
26
26
 
27
27
  private
28
28
 
29
+ def create_file!(filename = file, data = people)
30
+ session.logger.info("going to create file: #{filename}")
31
+
32
+ CSV.open(filename, "w") do |csv|
33
+ csv << spot_header(data.first)
34
+ data.each do |person|
35
+ csv << to_row(person)
36
+ end
37
+ end
38
+ end
39
+
29
40
  def to_row(person)
30
41
  entry = to_entry_type(person)
31
- entry.values_at(*keys(entry))
42
+ entry.values_at(*keys(entry)).tap do |row|
43
+ row << schemas.to_name(person.details&.schema_id) || "No Schema"
44
+ end
32
45
  end
33
46
 
34
- def spot_header
35
- header = keys(to_entry_type(people.first))
47
+ def spot_header(person = people.first)
48
+ header = keys(to_entry_type(person))
49
+ header << "Schema"
36
50
  header = yield(header) if block_given?
37
- header = nice_header_names(header) if nice_header_names?
51
+ header = nice_header_names(header, schema: schema(person.details)) if nice_header_names?
38
52
  header
39
53
  end
40
54
 
@@ -46,10 +60,62 @@ class Eco::API::UseCases::DefaultCases::ToCsvCase < Eco::API::Common::Loaders::U
46
60
  options.dig(:nice_header) || options.dig(:export, :options, :nice_header)
47
61
  end
48
62
 
49
- def nice_header_names(header)
50
- name_maps = session.schema.fields_by_alt_id.each_with_object({}) do |(alt_id, fld), mappings|
63
+ def nice_header_names(header, schema: nil)
64
+ schema ||= session.schema
65
+ name_maps = schema.fields_by_alt_id.each_with_object({}) do |(alt_id, fld), mappings|
51
66
  mappings[alt_id] = fld.name
52
- end.merge({
67
+ end.merge(nice_header_maps)
68
+ header.map {|name| name_maps[name] ? name_maps[name] : name}
69
+ end
70
+
71
+ def to_entry_type(person)
72
+ session.new_entry(person, dependencies: deps).yield_self do |person_entry|
73
+ options.dig(:export, :options, :internal_names) ? person_entry.mapped_entry : person_entry.external_entry
74
+ end
75
+ end
76
+
77
+ def deps
78
+ @deps ||= {"supervisor_id" => {people: people}}
79
+ end
80
+
81
+ def file
82
+ @file ||= (options[:file] || options.dig(:export, :file, :name)).tap do |filename|
83
+ unless filename
84
+ session.logger.error("Destination file not specified")
85
+ return false
86
+ end
87
+ end
88
+ end
89
+
90
+ def by_schema
91
+ people.group_by do |person|
92
+ if details = person.details
93
+ details.schema_id
94
+ end
95
+ end.transform_values do |persons|
96
+ people.newFrom persons
97
+ end
98
+ end
99
+
100
+ def schema(value)
101
+ case value
102
+ when Ecoportal::API::V1::Person
103
+ schema(value.details&.schema_id)
104
+ when String
105
+ schemas[value]
106
+ when Ecoportal::API::V1::PersonDetails
107
+ schema(value.schema_id)
108
+ when Ecoportal::API::V1::PersonSchema
109
+ value
110
+ end
111
+ end
112
+
113
+ def schemas
114
+ session.schemas
115
+ end
116
+
117
+ def nice_header_maps
118
+ @nice_header_maps ||= {
53
119
  "policy_group_ids" => "User Group(s)",
54
120
  "email" => "Email",
55
121
  "name" => "Name",
@@ -96,28 +162,7 @@ class Eco::API::UseCases::DefaultCases::ToCsvCase < Eco::API::Common::Loaders::U
96
162
  "custom_person_details" => "(min) on People Schema Details",
97
163
  "custom_person_account" => "(min) on Users",
98
164
  "custom_person_abilities" => "(min) on Users' Abilities"
99
- })
100
- header.map {|name| name_maps[name] ? name_maps[name] : name}
101
- end
102
-
103
- def to_entry_type(person)
104
- session.new_entry(person, dependencies: deps).yield_self do |person_entry|
105
- options.dig(:export, :options, :internal_names) ? person_entry.mapped_entry : person_entry.external_entry
106
- end
107
- end
108
-
109
- def deps
110
- @deps ||= {"supervisor_id" => {people: people}}
165
+ }
111
166
  end
112
167
 
113
- def file
114
- @file ||= (options[:file] || options.dig(:export, :file, :name)).tap do |filename|
115
- unless filename
116
- session.logger.error("Destination file not specified")
117
- return false
118
- end
119
- end
120
- end
121
-
122
-
123
168
  end
@@ -5,8 +5,7 @@ class Eco::API::UseCases::DefaultCases::ToCsvDetailedCase < Eco::API::UseCases::
5
5
  private
6
6
 
7
7
  def to_row(person)
8
- entry = to_entry_type(person)
9
- data = entry.values_at(*keys(entry))
8
+ data = super(person)
10
9
  data << person.subordinates
11
10
  data << person_supervisor(person)
12
11
  data += user_abilities(person)
@@ -38,8 +37,8 @@ class Eco::API::UseCases::DefaultCases::ToCsvDetailedCase < Eco::API::UseCases::
38
37
  preferences.map {|key| user_preferences[key] || false}
39
38
  end
40
39
 
41
- def spot_header
42
- super do |header|
40
+ def spot_header(person = people.first)
41
+ super(person) do |header|
43
42
  header << "Subordinates"
44
43
  header << "Supervisor Name"
45
44
  header += abilities_header
@@ -124,8 +124,9 @@ class Eco::API::UseCases::OozeSamples::OozeUpdateCase < Eco::API::Common::Loader
124
124
  end
125
125
 
126
126
  def prompt_to_confirm!
127
- print "\nDo you want to proceed (y/N)? "
128
- exit(1) unless $stdin.gets.chomp.to_s.downcase == "y"
127
+ prompt_user("Do you want to proceed (y/N)?", default: "Y") do |response|
128
+ exit(1) unless response.upcase.start_with?("Y")
129
+ end
129
130
  end
130
131
 
131
132
  end