eco-helpers 2.0.18 → 2.0.19

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +14 -1
  3. data/lib/eco/api/common/people/entry_factory.rb +26 -9
  4. data/lib/eco/api/common/people/person_entry.rb +1 -0
  5. data/lib/eco/api/common/session.rb +1 -0
  6. data/lib/eco/api/common/session/base_session.rb +2 -0
  7. data/lib/eco/api/common/session/helpers.rb +30 -0
  8. data/lib/eco/api/common/session/helpers/prompt_user.rb +34 -0
  9. data/lib/eco/api/common/version_patches/ecoportal_api/external_person.rb +1 -1
  10. data/lib/eco/api/common/version_patches/ecoportal_api/internal_person.rb +7 -4
  11. data/lib/eco/api/microcases/with_each.rb +67 -6
  12. data/lib/eco/api/microcases/with_each_present.rb +4 -2
  13. data/lib/eco/api/microcases/with_each_starter.rb +4 -2
  14. data/lib/eco/api/organization.rb +1 -1
  15. data/lib/eco/api/organization/people.rb +92 -23
  16. data/lib/eco/api/organization/people_similarity.rb +112 -0
  17. data/lib/eco/api/organization/person_schemas.rb +5 -1
  18. data/lib/eco/api/organization/policy_groups.rb +5 -1
  19. data/lib/eco/api/session.rb +5 -2
  20. data/lib/eco/api/session/batch.rb +7 -5
  21. data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +12 -35
  22. data/lib/eco/api/usecases/default_cases/to_csv_case.rb +81 -36
  23. data/lib/eco/api/usecases/default_cases/to_csv_detailed_case.rb +3 -4
  24. data/lib/eco/api/usecases/ooze_samples/ooze_update_case.rb +3 -2
  25. data/lib/eco/cli/config/default/options.rb +2 -1
  26. data/lib/eco/cli/config/default/usecases.rb +2 -0
  27. data/lib/eco/cli/config/default/workflow.rb +4 -1
  28. data/lib/eco/csv.rb +4 -2
  29. data/lib/eco/data/fuzzy_match.rb +63 -21
  30. data/lib/eco/data/fuzzy_match/ngrams_score.rb +7 -2
  31. data/lib/eco/data/fuzzy_match/pairing.rb +0 -1
  32. data/lib/eco/data/fuzzy_match/result.rb +7 -1
  33. data/lib/eco/data/fuzzy_match/results.rb +12 -6
  34. data/lib/eco/version.rb +1 -1
  35. metadata +4 -2
  36. data/lib/eco/api/organization/people_analytics.rb +0 -60
@@ -0,0 +1,112 @@
1
+ module Eco
2
+ module API
3
+ module Organization
4
+
5
+ # Class to find out duplicates in the People Manager
6
+ #
7
+ # @attr_writer attribute [String, Proc, nil] the target attribute to be read.
8
+ class PeopleSimilarity < Eco::API::Organization::People
9
+ include Eco::Data::FuzzyMatch
10
+
11
+ attr_accessor :attribute
12
+
13
+ # @!group Config
14
+ # @return [String, Proc, nil] the target attribute to be read.
15
+ def attribute=(attr)
16
+ @attribute ||= "name"
17
+ end
18
+
19
+ # Define the order or relevant of per user matches
20
+ # @param values[Array<Symbol>] the algorithms' results it should be ordered by
21
+ # * Possible values: `:dice`, `:levenshtein`, `:jaro_winkler`, `:ngrams`, `:words_ngrams`, `:chars_position`
22
+ def order=(values)
23
+ @order = values
24
+ end
25
+
26
+ def order
27
+ @order ||= [:words_ngrams, :dice]
28
+ end
29
+
30
+ # Define the order or relevant of per user matches
31
+ # @param value [Float] the threshold that all of the algorithms should comply with
32
+ def threshold=(value)
33
+ @threshold = value
34
+ end
35
+
36
+ def threshold
37
+ @threshold ||= 0.15
38
+ end
39
+
40
+ # @!endgroup
41
+
42
+ # @!group Searchers
43
+
44
+ # It gathers those that have the same `email`
45
+ # @return [Hash] where `keys` are `email`s and `values` an `Array<Person>`
46
+ def repeated_emails
47
+ init_caches
48
+ @by_email.select do |email, people|
49
+ people.count > 1
50
+ end
51
+ end
52
+
53
+ # @!endgroup
54
+
55
+ # @!group Analysers
56
+
57
+ # Analyses People bases on `options`
58
+ # @return [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
59
+ def analyse(**options)
60
+ options = { read: self.attribute }.merge(options)
61
+ each_with_object({}) do |person, results|
62
+ results[person.id] = find_all_with_score(person, **options)
63
+ end
64
+ end
65
+
66
+ # Launches a reanalyis on `analysed` based on `options`
67
+ # @param analysed [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
68
+ def re_analyse(analysed, **options)
69
+ analysed.each_with_object({}) do |(id, results), out|
70
+ out[id] = results.relevant_results(**options)
71
+ end
72
+ end
73
+
74
+ # @!group Helpers
75
+
76
+ # @return [String] well structured text
77
+ def analysis(analysed, format: :txt)
78
+ case
79
+ when format == :txt
80
+ analysed.each_with_object("") do |(id, results), out|
81
+ msg = results.results.map {|r| r.print}.join("\n ")
82
+ "'#{self[id].identify}':\n " + msg
83
+ end
84
+ end
85
+ end
86
+
87
+ # @note
88
+ # 1. Unless `:analysed` is provided, it launches an analysis cutting with Jaro Winker min 0.5
89
+ # 2. It then re-sorts and cuts based on `options`
90
+ # @return [Hash] where the _keys_ are the people `id`s and the _values_ the `Eco::Data::FuzzyMatch::Results`
91
+ def print_analysis(**options)
92
+ analysed = options[:analysed] || results_with_false_positives.analyse(**options)
93
+ analysed.each_with_object({}) do |(id, results), out|
94
+ puts analysis(analysed)
95
+ end
96
+ end
97
+ # @!endgroup
98
+
99
+ protected
100
+
101
+ def on_change
102
+ remove_instance_variable(@fuzzy_match)
103
+ super
104
+ end
105
+
106
+ private
107
+
108
+
109
+ end
110
+ end
111
+ end
112
+ end
@@ -28,7 +28,11 @@ module Eco
28
28
  end
29
29
 
30
30
  def schema(id_name)
31
- @by_id.fetch(schema_id(id_name), nil)
31
+ self[id_name]
32
+ end
33
+
34
+ def [](id_name)
35
+ @by_id[schema_id(id_name)]
32
36
  end
33
37
 
34
38
  private
@@ -44,7 +44,11 @@ module Eco
44
44
  end
45
45
 
46
46
  def policy_group(id_name)
47
- @by_id.fetch(policy_group_id(id_name), nil)
47
+ self[id_name]
48
+ end
49
+
50
+ def [](id_name)
51
+ @by_id[policy_group_id(id_name)]
48
52
  end
49
53
 
50
54
  def user_pg_ids(initial: [], final: [], non_custom: (non_custom_not_used = true; []), preserve_custom: true)
@@ -74,12 +74,15 @@ module Eco
74
74
  def entry_factory(schema: nil)
75
75
  schema = to_schema(schema) || self.schema
76
76
  return @entry_factories[schema&.id] if @entry_factories.key?(schema&.id)
77
+ unless @entry_factories.empty?
78
+ @entry_factories[schema&.id] = @entry_factories.values.first.newFactory(schema: schema)
79
+ return @entry_factories[schema&.id]
80
+ end
77
81
 
78
82
  mappings = []
79
83
  if map_file = config.people.fields_mapper
80
84
  mappings = map_file ? file_manager.load_json(map_file) : []
81
85
  end
82
-
83
86
  @entry_factories[schema&.id] = Eco::API::Common::People::EntryFactory.new(
84
87
  enviro,
85
88
  schema: schema,
@@ -127,7 +130,7 @@ module Eco
127
130
  # @see Eco::API::Common::People::EntryFactory#new
128
131
  # @return [Eco::API::Common::People::PersonEntry] parsed entry.
129
132
  def new_entry(data, dependencies: {})
130
- entry_factory.new(data, dependencies: dependencies)
133
+ entry_factory(schema: data&.details&.schema_id).new(data, dependencies: dependencies)
131
134
  end
132
135
 
133
136
  # @see Eco::API::Common::People::EntryFactory#entries
@@ -136,11 +136,13 @@ module Eco
136
136
  block.call
137
137
  rescue error_type => e
138
138
  raise unless retries_left > 0
139
- print "Batch TimeOut. You have #{retries_left} retries left. Do you want to retry (y/N)? "
140
- if (res = STDIN.gets.chomp) && res[0].downcase == "y"
141
- offer_retry_on(error_type, retries_left - 1, &block)
142
- else
143
- raise
139
+ explanation = "Batch TimeOut. You have #{retries_left} retries left."
140
+ prompt_user("Do you want to retry (y/N)?", explanation, default: "Y", timeout: 10) do |response|
141
+ if response.upcase.start_with?("Y")
142
+ offer_retry_on(error_type, retries_left - 1, &block)
143
+ else
144
+ raise
145
+ end
144
146
  end
145
147
  end
146
148
  end
@@ -7,14 +7,18 @@ class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loader
7
7
  def main(people, session, options, usecase)
8
8
  @session = session; @options = options; @people = people
9
9
 
10
- save!(cyclic_sets)
10
+ save!(analysis)
11
11
  end
12
12
 
13
13
  private
14
14
 
15
- def identify_double_ups
16
- analytics.similarity
17
-
15
+ def analysis
16
+ analytics.attribute = :name
17
+ analysed = analytics.analyse({
18
+ threshold: 0.20,
19
+ order: [:average, :dice]
20
+ })
21
+ analytics.re_analyse(analysed, threshold: 0.5, order: [:average])
18
22
  end
19
23
 
20
24
  def analytics
@@ -25,9 +29,9 @@ class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loader
25
29
  @file ||= options.dig(:output, :file) || "analytics.txt"
26
30
  end
27
31
 
28
- def save!(data)
29
- if data.empty?
30
- session.logger.info("There were no cyclic supervisors identified!!")
32
+ def save!(analysed)
33
+ if analysed.empty?
34
+ session.logger.info("There were results identified!!")
31
35
  return
32
36
  end
33
37
 
@@ -35,42 +39,15 @@ class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loader
35
39
 
36
40
  File.open(file, "w") do |fd|
37
41
  if ext == "txt"
38
- create_file(data, file: file, format: :txt)
42
+ fd << analytics.analysis(analysed, format: :txt)
39
43
  elsif ext == "html"
40
44
  puts "html is still not supported"
41
45
  exit(1)
42
- create_file(data, file: file, format: :html)
43
46
  elsif ext == "json"
44
47
  puts "json is still not supported"
45
48
  exit(1)
46
- create_file(data, file: file, format: :json)
47
49
  end
48
50
  end
49
51
  end
50
52
 
51
- def create_file(sets, file:, format: :txt)
52
- File.open(file, "w") do |fd|
53
- fd << sets_to_str(sets, format: format)
54
- end
55
- puts "Generated file #{file}"
56
- end
57
-
58
- def sets_to_str(sets, format: :txt)
59
- raise "Required Array. Given: #{sets.class}" unless sets.is_a?(Array)
60
- "".tap do |str|
61
- sets.each do |set|
62
- str << set_to_str(set, format: format)
63
- end
64
- end
65
- end
66
-
67
- def set_to_str(set, lev: 0, format: :txt)
68
- raise "Required Array. Given: #{set.class}" unless set.is_a?(Array)
69
- "".tap do |str|
70
- entry = set.shift
71
- str << "#{" " * lev}#{(lev > 0)? "+-#{lev}- " : ""}#{entry.name} (#{entry.external_id}|#{entry.email}|#{entry.id})\n"
72
- str << set_to_str(set, lev: lev + 1, format: format) unless !set || set.empty?
73
- end
74
- end
75
-
76
53
  end
@@ -13,28 +13,42 @@ class Eco::API::UseCases::DefaultCases::ToCsvCase < Eco::API::Common::Loaders::U
13
13
  return false
14
14
  end
15
15
 
16
- session.logger.info("going to create file: #{file}")
17
- header = spot_header
18
-
19
- CSV.open(file, "w") do |csv|
20
- csv << header
21
- people.each do |person|
22
- csv << to_row(person)
16
+ if options.dig(:export, :options, :split_schemas)
17
+ by_schema.each do |id, people|
18
+ sch_name = schemas.to_name(id)
19
+ prefix = sch_name ? sch_name.gsub(" ", "_") : "No_Schema"
20
+ create_file!("#{prefix}_#{file}", people)
23
21
  end
22
+ else
23
+ create_file!(file, people)
24
24
  end
25
25
  end
26
26
 
27
27
  private
28
28
 
29
+ def create_file!(filename = file, data = people)
30
+ session.logger.info("going to create file: #{filename}")
31
+
32
+ CSV.open(filename, "w") do |csv|
33
+ csv << spot_header(data.first)
34
+ data.each do |person|
35
+ csv << to_row(person)
36
+ end
37
+ end
38
+ end
39
+
29
40
  def to_row(person)
30
41
  entry = to_entry_type(person)
31
- entry.values_at(*keys(entry))
42
+ entry.values_at(*keys(entry)).tap do |row|
43
+ row << schemas.to_name(person.details&.schema_id) || "No Schema"
44
+ end
32
45
  end
33
46
 
34
- def spot_header
35
- header = keys(to_entry_type(people.first))
47
+ def spot_header(person = people.first)
48
+ header = keys(to_entry_type(person))
49
+ header << "Schema"
36
50
  header = yield(header) if block_given?
37
- header = nice_header_names(header) if nice_header_names?
51
+ header = nice_header_names(header, schema: schema(person.details)) if nice_header_names?
38
52
  header
39
53
  end
40
54
 
@@ -46,10 +60,62 @@ class Eco::API::UseCases::DefaultCases::ToCsvCase < Eco::API::Common::Loaders::U
46
60
  options.dig(:nice_header) || options.dig(:export, :options, :nice_header)
47
61
  end
48
62
 
49
- def nice_header_names(header)
50
- name_maps = session.schema.fields_by_alt_id.each_with_object({}) do |(alt_id, fld), mappings|
63
+ def nice_header_names(header, schema: nil)
64
+ schema ||= session.schema
65
+ name_maps = schema.fields_by_alt_id.each_with_object({}) do |(alt_id, fld), mappings|
51
66
  mappings[alt_id] = fld.name
52
- end.merge({
67
+ end.merge(nice_header_maps)
68
+ header.map {|name| name_maps[name] ? name_maps[name] : name}
69
+ end
70
+
71
+ def to_entry_type(person)
72
+ session.new_entry(person, dependencies: deps).yield_self do |person_entry|
73
+ options.dig(:export, :options, :internal_names) ? person_entry.mapped_entry : person_entry.external_entry
74
+ end
75
+ end
76
+
77
+ def deps
78
+ @deps ||= {"supervisor_id" => {people: people}}
79
+ end
80
+
81
+ def file
82
+ @file ||= (options[:file] || options.dig(:export, :file, :name)).tap do |filename|
83
+ unless filename
84
+ session.logger.error("Destination file not specified")
85
+ return false
86
+ end
87
+ end
88
+ end
89
+
90
+ def by_schema
91
+ people.group_by do |person|
92
+ if details = person.details
93
+ details.schema_id
94
+ end
95
+ end.transform_values do |persons|
96
+ people.newFrom persons
97
+ end
98
+ end
99
+
100
+ def schema(value)
101
+ case value
102
+ when Ecoportal::API::V1::Person
103
+ schema(value.details&.schema_id)
104
+ when String
105
+ schemas[value]
106
+ when Ecoportal::API::V1::PersonDetails
107
+ schema(value.schema_id)
108
+ when Ecoportal::API::V1::PersonSchema
109
+ value
110
+ end
111
+ end
112
+
113
+ def schemas
114
+ session.schemas
115
+ end
116
+
117
+ def nice_header_maps
118
+ @nice_header_maps ||= {
53
119
  "policy_group_ids" => "User Group(s)",
54
120
  "email" => "Email",
55
121
  "name" => "Name",
@@ -96,28 +162,7 @@ class Eco::API::UseCases::DefaultCases::ToCsvCase < Eco::API::Common::Loaders::U
96
162
  "custom_person_details" => "(min) on People Schema Details",
97
163
  "custom_person_account" => "(min) on Users",
98
164
  "custom_person_abilities" => "(min) on Users' Abilities"
99
- })
100
- header.map {|name| name_maps[name] ? name_maps[name] : name}
101
- end
102
-
103
- def to_entry_type(person)
104
- session.new_entry(person, dependencies: deps).yield_self do |person_entry|
105
- options.dig(:export, :options, :internal_names) ? person_entry.mapped_entry : person_entry.external_entry
106
- end
107
- end
108
-
109
- def deps
110
- @deps ||= {"supervisor_id" => {people: people}}
165
+ }
111
166
  end
112
167
 
113
- def file
114
- @file ||= (options[:file] || options.dig(:export, :file, :name)).tap do |filename|
115
- unless filename
116
- session.logger.error("Destination file not specified")
117
- return false
118
- end
119
- end
120
- end
121
-
122
-
123
168
  end
@@ -5,8 +5,7 @@ class Eco::API::UseCases::DefaultCases::ToCsvDetailedCase < Eco::API::UseCases::
5
5
  private
6
6
 
7
7
  def to_row(person)
8
- entry = to_entry_type(person)
9
- data = entry.values_at(*keys(entry))
8
+ data = super(person)
10
9
  data << person.subordinates
11
10
  data << person_supervisor(person)
12
11
  data += user_abilities(person)
@@ -38,8 +37,8 @@ class Eco::API::UseCases::DefaultCases::ToCsvDetailedCase < Eco::API::UseCases::
38
37
  preferences.map {|key| user_preferences[key] || false}
39
38
  end
40
39
 
41
- def spot_header
42
- super do |header|
40
+ def spot_header(person = people.first)
41
+ super(person) do |header|
43
42
  header << "Subordinates"
44
43
  header << "Supervisor Name"
45
44
  header += abilities_header
@@ -124,8 +124,9 @@ class Eco::API::UseCases::OozeSamples::OozeUpdateCase < Eco::API::Common::Loader
124
124
  end
125
125
 
126
126
  def prompt_to_confirm!
127
- print "\nDo you want to proceed (y/N)? "
128
- exit(1) unless $stdin.gets.chomp.to_s.downcase == "y"
127
+ prompt_user("Do you want to proceed (y/N)?", default: "Y") do |response|
128
+ exit(1) unless response.upcase.start_with?("Y")
129
+ end
129
130
  end
130
131
 
131
132
  end