eco-helpers 2.0.14 → 2.0.19

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +90 -2
  3. data/eco-helpers.gemspec +6 -4
  4. data/lib/eco-helpers.rb +2 -0
  5. data/lib/eco/api/common/base_loader.rb +14 -0
  6. data/lib/eco/api/common/people/default_parsers/date_parser.rb +11 -1
  7. data/lib/eco/api/common/people/default_parsers/login_providers_parser.rb +1 -1
  8. data/lib/eco/api/common/people/default_parsers/policy_groups_parser.rb +11 -11
  9. data/lib/eco/api/common/people/entry_factory.rb +26 -9
  10. data/lib/eco/api/common/people/person_entry.rb +5 -2
  11. data/lib/eco/api/common/people/supervisor_helpers.rb +27 -0
  12. data/lib/eco/api/common/session.rb +1 -0
  13. data/lib/eco/api/common/session/base_session.rb +2 -0
  14. data/lib/eco/api/common/session/file_manager.rb +2 -2
  15. data/lib/eco/api/common/session/helpers.rb +30 -0
  16. data/lib/eco/api/common/session/helpers/prompt_user.rb +34 -0
  17. data/lib/eco/api/common/session/mailer.rb +0 -1
  18. data/lib/eco/api/common/session/s3_uploader.rb +0 -1
  19. data/lib/eco/api/common/session/sftp.rb +0 -1
  20. data/lib/eco/api/common/version_patches/ecoportal_api/external_person.rb +1 -1
  21. data/lib/eco/api/common/version_patches/ecoportal_api/internal_person.rb +7 -4
  22. data/lib/eco/api/common/version_patches/exception.rb +8 -4
  23. data/lib/eco/api/microcases.rb +3 -1
  24. data/lib/eco/api/microcases/append_usergroups.rb +0 -1
  25. data/lib/eco/api/microcases/people_cache.rb +2 -2
  26. data/lib/eco/api/microcases/people_load.rb +2 -2
  27. data/lib/eco/api/microcases/people_refresh.rb +2 -2
  28. data/lib/eco/api/microcases/people_search.rb +6 -6
  29. data/lib/eco/api/microcases/preserve_default_tag.rb +23 -0
  30. data/lib/eco/api/microcases/preserve_filter_tags.rb +28 -0
  31. data/lib/eco/api/microcases/preserve_policy_groups.rb +30 -0
  32. data/lib/eco/api/microcases/set_account.rb +0 -1
  33. data/lib/eco/api/microcases/with_each.rb +67 -6
  34. data/lib/eco/api/microcases/with_each_present.rb +4 -2
  35. data/lib/eco/api/microcases/with_each_starter.rb +4 -2
  36. data/lib/eco/api/organization.rb +1 -0
  37. data/lib/eco/api/organization/people.rb +98 -22
  38. data/lib/eco/api/organization/people_similarity.rb +112 -0
  39. data/lib/eco/api/organization/person_schemas.rb +5 -1
  40. data/lib/eco/api/organization/policy_groups.rb +5 -1
  41. data/lib/eco/api/organization/presets_factory.rb +40 -80
  42. data/lib/eco/api/organization/presets_integrity.json +6 -0
  43. data/lib/eco/api/organization/presets_values.json +5 -4
  44. data/lib/eco/api/policies/default_policies/99_user_access_policy.rb +0 -30
  45. data/lib/eco/api/session.rb +6 -22
  46. data/lib/eco/api/session/batch.rb +25 -7
  47. data/lib/eco/api/session/config.rb +16 -15
  48. data/lib/eco/api/session/config/api.rb +4 -0
  49. data/lib/eco/api/session/config/apis.rb +80 -0
  50. data/lib/eco/api/session/config/files.rb +7 -0
  51. data/lib/eco/api/session/config/people.rb +3 -19
  52. data/lib/eco/api/usecases/default_cases.rb +4 -1
  53. data/lib/eco/api/usecases/default_cases/abstract_policygroup_abilities_case.rb +161 -0
  54. data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +53 -0
  55. data/lib/eco/api/usecases/default_cases/codes_to_tags_case.rb +2 -3
  56. data/lib/eco/api/usecases/default_cases/reset_landing_page_case.rb +11 -1
  57. data/lib/eco/api/usecases/default_cases/restore_db_case.rb +1 -2
  58. data/lib/eco/api/usecases/default_cases/supers_cyclic_identify_case.rb +72 -0
  59. data/lib/eco/api/usecases/default_cases/supers_hierarchy_case.rb +59 -0
  60. data/lib/eco/api/usecases/default_cases/to_csv_case.rb +132 -29
  61. data/lib/eco/api/usecases/default_cases/to_csv_detailed_case.rb +61 -36
  62. data/lib/eco/api/usecases/ooze_samples/ooze_update_case.rb +3 -2
  63. data/lib/eco/cli.rb +0 -10
  64. data/lib/eco/cli/config/default/options.rb +20 -17
  65. data/lib/eco/cli/config/default/people_filters.rb +3 -3
  66. data/lib/eco/cli/config/default/usecases.rb +80 -26
  67. data/lib/eco/cli/config/default/workflow.rb +16 -4
  68. data/lib/eco/cli/config/help.rb +1 -0
  69. data/lib/eco/cli/config/options_set.rb +106 -13
  70. data/lib/eco/cli/config/use_cases.rb +33 -33
  71. data/lib/eco/cli/scripting/args_helpers.rb +30 -3
  72. data/lib/eco/csv.rb +4 -2
  73. data/lib/eco/data.rb +1 -0
  74. data/lib/eco/data/crypto/encryption.rb +3 -3
  75. data/lib/eco/data/files/directory.rb +28 -20
  76. data/lib/eco/data/files/helpers.rb +6 -4
  77. data/lib/eco/data/fuzzy_match.rb +161 -0
  78. data/lib/eco/data/fuzzy_match/array_helpers.rb +75 -0
  79. data/lib/eco/data/fuzzy_match/chars_position_score.rb +37 -0
  80. data/lib/eco/data/fuzzy_match/ngrams_score.rb +78 -0
  81. data/lib/eco/data/fuzzy_match/pairing.rb +101 -0
  82. data/lib/eco/data/fuzzy_match/result.rb +73 -0
  83. data/lib/eco/data/fuzzy_match/results.rb +59 -0
  84. data/lib/eco/data/fuzzy_match/score.rb +44 -0
  85. data/lib/eco/data/fuzzy_match/stop_words.rb +35 -0
  86. data/lib/eco/data/fuzzy_match/string_helpers.rb +69 -0
  87. data/lib/eco/version.rb +1 -1
  88. metadata +87 -10
  89. data/lib/eco/api/microcases/refresh_abilities.rb +0 -19
  90. data/lib/eco/api/organization/presets_reference.json +0 -59
  91. data/lib/eco/api/usecases/default_cases/refresh_abilities_case.rb +0 -30
@@ -0,0 +1,75 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ module ArrayHelpers
5
+ # Keeps the start order of the `values` and consecutive `values` together/consecutive.
6
+ # @param values [Array] the input array with the values.
7
+ # @param range [Integer, Range] determine the lenght of the generated values.
8
+ # @return [Array<Array<Value>>] combinations of `range` length of `values`.
9
+ def ngrams(values, range=2..3)
10
+ [].tap do |out|
11
+ if range.is_a?(Integer)
12
+ n = range
13
+ values_count = values.length
14
+ values.each_with_index do |word, i|
15
+ min = i
16
+ max = i + (n - 1)
17
+ break if values_count <= max
18
+ out << values[min..max].join(' ')
19
+ end
20
+ out.uniq!
21
+ else
22
+ range.each {|n| out.concat(ngrams(values, n))}
23
+ out.uniq!
24
+ end
25
+ end
26
+ end
27
+
28
+ # Keeps the start order of the `values` of the input `Array` `values`.
29
+ # It does **not** keep consecutive `values` together (it can jump/skip items).
30
+ # @param values [Array] the input array with the values.
31
+ # @param range [Integer, Range] determine the lenght of the generated values.
32
+ # @return [Array<Array<Value>>] combinations of `range` length of `values`
33
+ def combinations(values, range=2..3)
34
+ if range.is_a?(Integer)
35
+ values.combination(range).to_a
36
+ else
37
+ range.flat_map {|size| values.combination(size).to_a}
38
+ end
39
+ end
40
+
41
+ # It includes `combinations` that break the initial order of the `Array`.
42
+ # It does **not** keep consecutive `values` together (it can jump/skip items).
43
+ # @param values [Array] the input array with the values.
44
+ # @param range [Integer, Range] determine the lenght of the generated values.
45
+ # @return [Array<Array<Value>>] permutations of `range` length of `values`
46
+ def permutations(values, range=2..3)
47
+ combinations(values, range).tap do |out|
48
+ range = range.is_a?(Integer)? (range..range) : range
49
+ out.dup.select do |item|
50
+ range.include?(item.length)
51
+ end.each do |comb|
52
+ comb.permutation.to_a.tap do |perms|
53
+ perms.each {|perm| out << perm}
54
+ end
55
+ end
56
+ out.uniq!
57
+ end
58
+ end
59
+
60
+ # Helper to praper facet structure
61
+ # @param values1 [Array] the input array with the values to have their facet against.
62
+ # @param values2 [Array] the input array with the values to facet against.
63
+ # @return [Hash] where `keys` are `values1` and `value` of each `key` all `values2`
64
+ def facet(values1, values2)
65
+ {}.tap do |out|
66
+ next unless values1.is_a?(Enumerable)
67
+ values1 = values1.is_a?(Hash) ? values1.values : values1.to_a
68
+ values1.each {|val| out[val] = values2.dup}
69
+ end
70
+ end
71
+
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,37 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ module CharsPositionScore
5
+ # For each character in `str1`, a search is performed on `str2`.
6
+ # The search is deemed successful if a character is found in `str2` within `max_distance` characters of the current position.
7
+ # A score is kept of matching characters.
8
+ # @note This algorithm is best suited for matching mis-spellings.
9
+ # @max_distance [Integer] maximum char position distance to score.
10
+ # @normalized [Boolean] to avoid double ups in normalizing.
11
+ # @return [Score] the score object with the result.
12
+ def chars_position_score(str1, str2, max_distance: 3, normalized: false)
13
+ str1, str2 = normalize_string([str1, str2]) unless normalized
14
+ len1 = str1 && str1.length; len2 = str2 && str2.length
15
+ Score.new(0, len1 || 0).tap do |score|
16
+ next if !str1 || !str2
17
+ next score.increase(score.total) if str1 == str2
18
+ next if len1 < 2
19
+ pos = 0
20
+ len1.times do |i|
21
+ start = pos + 1
22
+ found = false
23
+ if pos = str2.index(str1[i])
24
+ if pos < (start + max_distance)
25
+ found = true
26
+ score.increase
27
+ end
28
+ end
29
+ pos = start unless found
30
+ end
31
+ end
32
+ end
33
+
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,78 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ module NGramsScore
5
+ # It does the following:
6
+ # 1. It splits both strings into words
7
+ # 2. Pairs all words by best `ngrams_score` match
8
+ # 3. Gives `0` score to those words of `str2` that lost their pair (a word of `str1` cannot be paired twice)
9
+ # 4. Merges the `ngrams_score` of all the paired words of `str2` against their `str1` word pair
10
+ # @param range [Integer, Range] determine the lenght of the generated values for each `word`.
11
+ # @normalized [Boolean] to avoid double ups in normalizing.
12
+ # @return [Score] the score object with the result.
13
+ def words_ngrams_score(str1, str2, range: 3..5, normalized: false)
14
+ str1, str2 = normalize_string([str1, str2]) unless normalized
15
+ len1 = str1 && str1.length; len2 = str2 && str2.length
16
+
17
+ Score.new(0, 0).tap do |score|
18
+ next if !str2 || !str1
19
+ if str1 == str2
20
+ score.increase_total(len1)
21
+ score.increase(score.total)
22
+ end
23
+ if str1.length < 2 || str1.length < 2
24
+ score.increase_total(len1)
25
+ end
26
+
27
+ paired_words(str1, str2, normalized: true) do |needle, item|
28
+ ngrams_score(needle, item, range: range, normalized: true)
29
+ end.each do |sub_str1, (item, iscore)|
30
+ #puts "pairs '#{sub_str1}' --> '#{item}' (score: #{iscore.ratio})"
31
+ score.merge!(iscore)
32
+ end
33
+ end
34
+ end
35
+
36
+ # A score is kept of matching ngram combinations of `str2`.
37
+ # @note This algorithm is best suited for matching sentences, or 'firstname lastname' compared with 'lastname firstname' combinations.
38
+ # @param range [Integer, Range] determine the lenght of the generated values.
39
+ # @normalized [Boolean] to avoid double ups in normalizing.
40
+ # @return [Score] the score object with the result.
41
+ def ngrams_score(str1, str2, range: 3..5, normalized: false)
42
+ str1, str2 = normalize_string([str1, str2]) unless normalized
43
+ len1 = str1 && str1.length; len2 = str2 && str2.length
44
+
45
+ Score.new(0, len1 || 0).tap do |score|
46
+ next if !str2 || !str1
47
+ next score.increase(score.total) if str1 == str2
48
+ next if str1.length < 2 || str2.length < 2
49
+
50
+ grams = word_ngrams(str2, range, normalized: true)
51
+ next unless grams.length > 0
52
+
53
+ if range.is_a?(Integer)
54
+ item_weight = score.total.to_f / grams.length
55
+ matches = grams.select {|res| str1.include?(gram)}.length
56
+ score.increase(matches * item_weight)
57
+ else
58
+ groups = grams.group_by {|gram| gram.length}
59
+ sorted_lens = groups.keys.sort.reverse
60
+ lens = sorted_lens.length
61
+ group_weight = (1.0 / lens).round(3)
62
+
63
+ groups.each do |len, grams|
64
+ len_max_score = score.total * group_weight
65
+ item_weight = len_max_score / grams.length
66
+ matches = grams.select {|gram| str1.include?(gram)}.length
67
+ #pp "#{len} match: #{matches} (over #{grams.length}) || max_score: #{len_max_score} (over #{score.total})"
68
+ score.increase(matches * item_weight)
69
+ end
70
+ end
71
+
72
+ end
73
+ end
74
+
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,101 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ module Pairing
5
+
6
+ # Pair words using some algorithm.
7
+ # It does the following:
8
+ # 1. It splits both strings into words.
9
+ # 2. Pairs all words by using `block` to score the best match.
10
+ # 3. Gives `0` score to those words of `str2` that lost their pair (a word of `str1` cannot be paired twice).
11
+ # 4. Merges the `Score` of all the paired words of `str2` against their `str1` word pair.
12
+ # @yield [needle, item] offers a comparison algorithm between two strings.
13
+ # @yieldparam needle [String] the string of reference.
14
+ # @yieldparam item [String] one of the haystack items.
15
+ # @yieldreturn [Eco::Data::FuzzyMatch::Score] the `Score` object with the results of comparing `str1` and `str2`
16
+ # @param str1 [String] the string of reference.
17
+ # @param str2 [String] one of the haystack items.
18
+ # @param format [Symbol] determines the `values` of the returned `Hash`::
19
+ # 1. `:pair` for just pair
20
+ # 2. `:score` for just score
21
+ # 2. `[:pair, :score]` for `Array`
22
+ # @normalized [Boolean] to avoid double ups in normalizing.
23
+ # @return [Hash] where `keys` are the **words** of `str1` and their `values`:
24
+ # 1. if `format` is `:pair` => the `str2` words with highest match.
25
+ # 2. if `format` is `:score` => the `Score` words with highest match.
26
+ # 3. if `format` is `[:pair, :score]` => both in an `Array`.
27
+ def paired_words(str1, str2, format: [:pair, :score], normalized: false)
28
+ str1, str2 = normalize_string([str1, str2]) unless normalized
29
+ return {} if !str2 || !str1
30
+ return {str1 => nil} if str1.length < 2 || str1.length < 2
31
+
32
+ needles = get_words(str1, normalized: true)
33
+ haystack = get_words(str2, normalized: true)
34
+
35
+ ranking = {}
36
+ faceted = needles.each_with_object({}) do |needle, faceted|
37
+ faceted[needle] = haystack.map do |item|
38
+ {
39
+ pair: item,
40
+ score: yield(needle, item)
41
+ }.tap do |result|
42
+ ranking[item] ||= []
43
+ if result[:score].ratio > 0.05
44
+ ranking[item] << ({needle: needle, score: result[:score]})
45
+ end
46
+ end
47
+ end.sort_by do |result|
48
+ result[:score].ratio
49
+ end.reverse
50
+ end
51
+
52
+ paired = {}
53
+ #scores = {}
54
+ ranking.each do |item, results|
55
+ sorted = results.reject do |result|
56
+ paired.key?(result[:needle])
57
+ end.sort_by do |result|
58
+ result[:score].ratio
59
+ end.reverse
60
+ if result = sorted.shift
61
+ paired[result[:needle]] = {
62
+ pair: item,
63
+ score: result[:score]
64
+ }
65
+ end
66
+ end
67
+
68
+ pending_items = haystack - paired.values
69
+ faceted.reject do |needle, results|
70
+ paired.key?(needle)
71
+ end.each do |needle, results|
72
+ results.select! do |result|
73
+ pending_items.include?(result[:pair]) && result[:score].ratio > 0.05
74
+ end
75
+ if result = results.shift
76
+ paired[needle] = result
77
+ pending_items.delete(result[:pair])
78
+ end
79
+ end
80
+
81
+ pending_needles = needles - paired.keys
82
+ pending_needles.each do |needle|
83
+ paired[needle] = {
84
+ pair: nil,
85
+ score: Score.new(0, needle.length)
86
+ }
87
+ end
88
+ paired.transform_values do |result|
89
+ case format
90
+ when Array
91
+ result.values_at(*format)
92
+ else
93
+ restult[format]
94
+ end
95
+ end
96
+ end
97
+
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,73 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ class Result < Struct.new(:match, :value, :dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position)
5
+ ALL_METHODS = [:dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position]
6
+
7
+ def dice; super&.round(3); end
8
+ def levenshtein; super&.round(3); end
9
+ def jaro_winkler; super&.round(3); end
10
+ def ngrams; super&.round(3); end
11
+ def words_ngrams; super&.round(3); end
12
+ def chars_position; super&.round(3); end
13
+
14
+ def average
15
+ values = [dice, levenshtein, jaro_winkler, ngrams, words_ngrams, chars_position]
16
+ (values.inject(0.0, :+) / values.length).round(3)
17
+ end
18
+
19
+ # TODO: print in the order of `order`
20
+ def print
21
+ msg = "(Dice: #{dice}) (Lev Dst: #{levenshtein}) "
22
+ msg << "(Jaro: #{jaro_winkler}) "
23
+ msg << "(Ngram: #{ngrams}) (WNgrams: #{words_ngrams}) "
24
+ msg << "(C Pos: #{chars_position}) "
25
+ msg << "(Avg: #{average}) "
26
+ msg << "'#{value}'"
27
+ end
28
+
29
+ def all_threshold?(methods = order, threshold = 0.15)
30
+ return true unless threshold
31
+ [methods].flatten.compact.all? {|method| threshold?(method, threshold)}
32
+ end
33
+
34
+ def any_threshold?(methods = order, threshold = 0.15)
35
+ return true unless threshold
36
+ [methods].flatten.compact.any? {|method| threshold?(method, threshold)}
37
+ end
38
+
39
+ def threshold?(method = :dice, threshold = 0.15)
40
+ raise "Uknown method '#{method}'" unless self.respond_to?(method)
41
+ self.send(method) >= threshold
42
+ end
43
+
44
+ def order=(values)
45
+ @order = [values].flatten.compact.tap do |o|
46
+ o << [:words_ngrams, :dice] if o.empty?
47
+ end
48
+ end
49
+
50
+ def order
51
+ @order ||= [:words_ngrams, :dice]
52
+ end
53
+
54
+ def <=>(result)
55
+ compare(result)
56
+ end
57
+
58
+ private
59
+
60
+ def compare(other, order: self.order)
61
+ return 0 unless method = order.first
62
+ raise "Uknown method '#{method}'" unless self.respond_to?(method) && other.respond_to?(method)
63
+ return -1 if self.send(method) > other.send(method)
64
+ return 1 if self.send(method) < other.send(method)
65
+ compare(other, order: order[1..-1])
66
+ end
67
+
68
+
69
+ end
70
+
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,59 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ class Results < Struct.new(:needle, :value, :raw_results)
5
+
6
+ attr_accessor :threshold
7
+
8
+ def results_with_false_positives
9
+ relevant_results(order: :jaro_winkler, threshold: 0.5)
10
+ end
11
+
12
+ def relevant_results(**options)
13
+ options = {order: order, threshold: threshold || 0.5}.merge(options)
14
+ raw_results.select do |result|
15
+ result.all_threshold?(options[:order], options[:threshold])
16
+ end.yield_self do |filtered|
17
+ self.class.new(needle, value, filtered).tap do |results|
18
+ results.order = options[:order]
19
+ end
20
+ end
21
+ end
22
+
23
+ # @param values[Array<Symbol>] the algorithms' results it should be ordered by
24
+ # * Possible values: `:dice`, `:levenshtein`, `:jaro_winkler`, `:ngrams`, `:words_ngrams`, `:chars_position`, `:average`
25
+ def order=(values)
26
+ @order = [values].flatten.compact.tap do |o|
27
+ raw_results.each {|r| r.order = o}
28
+ end
29
+ end
30
+
31
+ def order
32
+ @order ||= [:words_ngrams, :dice]
33
+ end
34
+
35
+ def results
36
+ raw_results.sort
37
+ end
38
+
39
+ def print
40
+ msg = results.map do |result|
41
+ result.print
42
+ end.join("\n ")
43
+
44
+ puts "'#{value}':\n " + msg
45
+ end
46
+
47
+ private
48
+
49
+ def item_string(item, attr = self.method)
50
+ return item if !item || item.is_a?(String) || !attr
51
+ attr = attr.to_sym
52
+ return item.send(attr) if item.respond_to?(attr)
53
+ end
54
+
55
+ end
56
+
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,44 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ class Score < Struct.new(:score, :total)
5
+
6
+ def ratio(decimals = 6)
7
+ ((score || 0).to_f / (total || 1)).round(decimals)
8
+ end
9
+
10
+ def percent(decimals = 3)
11
+ (100 * ratio).round(decimals)
12
+ end
13
+
14
+ def increase(value = 1)
15
+ self.score += value
16
+ end
17
+
18
+ def increase_total(value)
19
+ self.total += value
20
+ end
21
+
22
+ def values_at(*keys)
23
+ keys.map do |key|
24
+ self.send(key) if self.respond_to?(key)
25
+ end
26
+ end
27
+
28
+ # Merges 2 Score instance objects
29
+ def merge(value)
30
+ Score.new(*values_at(:score, :total)).merge!(value)
31
+ end
32
+
33
+ def merge!(value)
34
+ raise "Expecting Score object. Given: #{value.class}" unless value.is_a?(Score)
35
+ increase(value.score)
36
+ increase_total(value.total)
37
+ self
38
+ end
39
+
40
+ end
41
+
42
+ end
43
+ end
44
+ end