eco-helpers 2.0.16 → 2.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +50 -6
  3. data/eco-helpers.gemspec +6 -4
  4. data/lib/eco-helpers.rb +1 -0
  5. data/lib/eco/api/common/base_loader.rb +14 -0
  6. data/lib/eco/api/common/people/default_parsers/date_parser.rb +11 -1
  7. data/lib/eco/api/common/people/default_parsers/login_providers_parser.rb +1 -1
  8. data/lib/eco/api/common/people/default_parsers/policy_groups_parser.rb +11 -11
  9. data/lib/eco/api/common/people/person_entry.rb +4 -2
  10. data/lib/eco/api/common/session/mailer.rb +0 -1
  11. data/lib/eco/api/common/session/s3_uploader.rb +0 -1
  12. data/lib/eco/api/common/session/sftp.rb +0 -1
  13. data/lib/eco/api/microcases.rb +3 -1
  14. data/lib/eco/api/microcases/append_usergroups.rb +0 -1
  15. data/lib/eco/api/microcases/people_cache.rb +2 -2
  16. data/lib/eco/api/microcases/people_load.rb +2 -2
  17. data/lib/eco/api/microcases/people_refresh.rb +2 -2
  18. data/lib/eco/api/microcases/people_search.rb +6 -6
  19. data/lib/eco/api/microcases/preserve_default_tag.rb +23 -0
  20. data/lib/eco/api/microcases/preserve_filter_tags.rb +28 -0
  21. data/lib/eco/api/microcases/preserve_policy_groups.rb +30 -0
  22. data/lib/eco/api/microcases/set_account.rb +0 -1
  23. data/lib/eco/api/organization.rb +1 -0
  24. data/lib/eco/api/organization/people.rb +7 -0
  25. data/lib/eco/api/organization/people_analytics.rb +60 -0
  26. data/lib/eco/api/organization/presets_factory.rb +22 -83
  27. data/lib/eco/api/organization/presets_integrity.json +6 -0
  28. data/lib/eco/api/organization/presets_values.json +5 -4
  29. data/lib/eco/api/policies/default_policies/99_user_access_policy.rb +0 -30
  30. data/lib/eco/api/session.rb +1 -20
  31. data/lib/eco/api/session/batch.rb +23 -7
  32. data/lib/eco/api/session/config.rb +0 -10
  33. data/lib/eco/api/session/config/people.rb +1 -17
  34. data/lib/eco/api/usecases/default_cases.rb +1 -1
  35. data/lib/eco/api/usecases/default_cases/abstract_policygroup_abilities_case.rb +1 -1
  36. data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +76 -0
  37. data/lib/eco/api/usecases/default_cases/codes_to_tags_case.rb +2 -3
  38. data/lib/eco/api/usecases/default_cases/reset_landing_page_case.rb +11 -1
  39. data/lib/eco/api/usecases/default_cases/restore_db_case.rb +1 -2
  40. data/lib/eco/api/usecases/default_cases/supers_cyclic_identify_case.rb +1 -1
  41. data/lib/eco/api/usecases/default_cases/supers_hierarchy_case.rb +1 -1
  42. data/lib/eco/api/usecases/default_cases/to_csv_case.rb +85 -27
  43. data/lib/eco/api/usecases/default_cases/to_csv_detailed_case.rb +62 -36
  44. data/lib/eco/cli/config/default/options.rb +19 -17
  45. data/lib/eco/cli/config/default/people_filters.rb +3 -3
  46. data/lib/eco/cli/config/default/usecases.rb +66 -32
  47. data/lib/eco/cli/config/default/workflow.rb +1 -1
  48. data/lib/eco/cli/config/help.rb +1 -0
  49. data/lib/eco/cli/config/options_set.rb +106 -13
  50. data/lib/eco/cli/config/use_cases.rb +33 -33
  51. data/lib/eco/cli/scripting/args_helpers.rb +30 -3
  52. data/lib/eco/data.rb +1 -0
  53. data/lib/eco/data/crypto/encryption.rb +3 -3
  54. data/lib/eco/data/files/helpers.rb +6 -4
  55. data/lib/eco/data/fuzzy_match.rb +119 -0
  56. data/lib/eco/data/fuzzy_match/array_helpers.rb +75 -0
  57. data/lib/eco/data/fuzzy_match/chars_position_score.rb +37 -0
  58. data/lib/eco/data/fuzzy_match/ngrams_score.rb +73 -0
  59. data/lib/eco/data/fuzzy_match/pairing.rb +102 -0
  60. data/lib/eco/data/fuzzy_match/result.rb +67 -0
  61. data/lib/eco/data/fuzzy_match/results.rb +53 -0
  62. data/lib/eco/data/fuzzy_match/score.rb +44 -0
  63. data/lib/eco/data/fuzzy_match/stop_words.rb +35 -0
  64. data/lib/eco/data/fuzzy_match/string_helpers.rb +69 -0
  65. data/lib/eco/version.rb +1 -1
  66. metadata +82 -10
  67. data/lib/eco/api/microcases/refresh_abilities.rb +0 -19
  68. data/lib/eco/api/organization/presets_reference.json +0 -59
  69. data/lib/eco/api/usecases/default_cases/refresh_abilities_case.rb +0 -30
@@ -0,0 +1,73 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ module NGramsScore
5
+ # It does the following:
6
+ # 1. It splits both strings into words
7
+ # 2. Pairs all words by best `ngrams_score` match
8
+ # 3. Gives `0` score to those words of `str2` that lost their pair (a word of `str1` cannot be paired twice)
9
+ # 4. Merges the `ngrams_score` of all the paired words of `str2` against their `str1` word pair
10
+ # @param range [Integer, Range] determine the lenght of the generated values for each `word`.
11
+ # @normalized [Boolean] to avoid double ups in normalizing.
12
+ # @return [Score] the score object with the result.
13
+ def words_ngrams_score(str1, str2, range: 3..5, normalized: false)
14
+ str1, str2 = normalize_string([str1, str2]) unless normalized
15
+ len1 = str1 && str1.length; len2 = str2 && str2.length
16
+
17
+ Score.new(0, 0).tap do |score|
18
+ next if !str2 || !str1
19
+ next score.increase(score.total) if str1 == str2
20
+ next if str1.length < 2 || str1.length < 2
21
+
22
+ paired_words(str1, str2, normalized: true) do |needle, item|
23
+ ngrams_score(needle, item, range: range, normalized: true)
24
+ end.each do |sub_str1, (item, iscore)|
25
+ #puts "pairs '#{sub_str1}' --> '#{item}' (score: #{iscore.ratio})"
26
+ score.merge!(iscore)
27
+ end
28
+ end
29
+ end
30
+
31
+ # A score is kept of matching ngram combinations of `str2`.
32
+ # @note This algorithm is best suited for matching sentences, or 'firstname lastname' compared with 'lastname firstname' combinations.
33
+ # @param range [Integer, Range] determine the lenght of the generated values.
34
+ # @normalized [Boolean] to avoid double ups in normalizing.
35
+ # @return [Score] the score object with the result.
36
+ def ngrams_score(str1, str2, range: 3..5, normalized: false)
37
+ str1, str2 = normalize_string([str1, str2]) unless normalized
38
+ len1 = str1 && str1.length; len2 = str2 && str2.length
39
+
40
+ Score.new(0, len1 || 0).tap do |score|
41
+ next if !str2 || !str1
42
+ next score.increase(score.total) if str1 == str2
43
+ next if str1.length < 2 || str2.length < 2
44
+
45
+ grams = word_ngrams(str2, range, normalized: true)
46
+ next unless grams.length > 0
47
+
48
+ if range.is_a?(Integer)
49
+ item_weight = score.total.to_f / grams.length
50
+ matches = grams.select {|res| str1.include?(gram)}.length
51
+ score.increase(matches * item_weight)
52
+ else
53
+ groups = grams.group_by {|gram| gram.length}
54
+ sorted_lens = groups.keys.sort.reverse
55
+ lens = sorted_lens.length
56
+ group_weight = (1.0 / lens).round(3)
57
+
58
+ groups.each do |len, grams|
59
+ len_max_score = score.total * group_weight
60
+ item_weight = len_max_score / grams.length
61
+ matches = grams.select {|gram| str1.include?(gram)}.length
62
+ #pp "#{len} match: #{matches} (over #{grams.length}) || max_score: #{len_max_score} (over #{score.total})"
63
+ score.increase(matches * item_weight)
64
+ end
65
+ end
66
+
67
+ end
68
+ end
69
+
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,102 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ module Pairing
5
+
6
+ # Pair words using some algorithm.
7
+ # It does the following:
8
+ # 1. It splits both strings into words.
9
+ # 2. Pairs all words by using `block` to score the best match.
10
+ # 3. Gives `0` score to those words of `str2` that lost their pair (a word of `str1` cannot be paired twice).
11
+ # 4. Merges the `Score` of all the paired words of `str2` against their `str1` word pair.
12
+ # @yield [needle, item] offers a comparison algorithm between two strings.
13
+ # @yieldparam needle [String] the string of reference.
14
+ # @yieldparam item [String] one of the haystack items.
15
+ # @yieldreturn [Eco::Data::FuzzyMatch::Score] the `Score` object with the results of comparing `str1` and `str2`
16
+ # @param str1 [String] the string of reference.
17
+ # @param str2 [String] one of the haystack items.
18
+ # @param format [Symbol] determines the `values` of the returned `Hash`::
19
+ # 1. `:pair` for just pair
20
+ # 2. `:score` for just score
21
+ # 2. `[:pair, :score]` for `Array`
22
+ # @normalized [Boolean] to avoid double ups in normalizing.
23
+ # @return [Hash] where `keys` are the **words** of `str1` and their `values`:
24
+ # 1. if `format` is `:pair` => the `str2` words with highest match.
25
+ # 2. if `format` is `:score` => the `Score` words with highest match.
26
+ # 3. if `format` is `[:pair, :score]` => both in an `Array`.
27
+ def paired_words(str1, str2, format: [:pair, :score], normalized: false)
28
+ str1, str2 = normalize_string([str1, str2]) unless normalized
29
+ return {} if !str2 || !str1
30
+ return score.increase(score.total) if str1 == str2
31
+ return {str1 => nil} if str1.length < 2 || str1.length < 2
32
+
33
+ needles = get_words(str1, normalized: true)
34
+ haystack = get_words(str2, normalized: true)
35
+
36
+ ranking = {}
37
+ faceted = needles.each_with_object({}) do |needle, faceted|
38
+ faceted[needle] = haystack.map do |item|
39
+ {
40
+ pair: item,
41
+ score: yield(needle, item)
42
+ }.tap do |result|
43
+ ranking[item] ||= []
44
+ if result[:score].ratio > 0.05
45
+ ranking[item] << ({needle: needle, score: result[:score]})
46
+ end
47
+ end
48
+ end.sort_by do |result|
49
+ result[:score].ratio
50
+ end.reverse
51
+ end
52
+
53
+ paired = {}
54
+ #scores = {}
55
+ ranking.each do |item, results|
56
+ sorted = results.reject do |result|
57
+ paired.key?(result[:needle])
58
+ end.sort_by do |result|
59
+ result[:score].ratio
60
+ end.reverse
61
+ if result = sorted.shift
62
+ paired[result[:needle]] = {
63
+ pair: item,
64
+ score: result[:score]
65
+ }
66
+ end
67
+ end
68
+
69
+ pending_items = haystack - paired.values
70
+ faceted.reject do |needle, results|
71
+ paired.key?(needle)
72
+ end.each do |needle, results|
73
+ results.select! do |result|
74
+ pending_items.include?(result[:pair]) && result[:score].ratio > 0.05
75
+ end
76
+ if result = results.shift
77
+ paired[needle] = result
78
+ pending_items.delete(result[:pair])
79
+ end
80
+ end
81
+
82
+ pending_needles = needles - paired.keys
83
+ pending_needles.each do |needle|
84
+ paired[needle] = {
85
+ pair: nil,
86
+ score: Score.new(0, needle.length)
87
+ }
88
+ end
89
+ paired.transform_values do |result|
90
+ case format
91
+ when Array
92
+ result.values_at(*format)
93
+ else
94
+ restult[format]
95
+ end
96
+ end
97
+ end
98
+
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,67 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ class Result < Struct.new(:match, :value, :dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position)
5
+ ALL_METHODS = [:dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position]
6
+
7
+ def dice; super&.round(3); end
8
+ def levenshtein; super&.round(3); end
9
+ def jaro_winkler; super&.round(3); end
10
+ def ngrams; super&.round(3); end
11
+ def words_ngrams; super&.round(3); end
12
+ def chars_position; super&.round(3); end
13
+
14
+ # TODO: print in the order of `order`
15
+ def print
16
+ msg = "(Dice: #{dice}) (Lev Dst: #{levenshtein}) "
17
+ msg << "(Jaro: #{jaro_winkler}) "
18
+ msg << "(Ngram: #{ngrams}) (WNgrams: #{words_ngrams}) "
19
+ msg << "(C Pos: #{chars_position}) "
20
+ msg << "'#{value}'"
21
+ end
22
+
23
+ def all_threshold?(methods = order, threshold = 0.15)
24
+ return true unless threshold
25
+ [methods].flatten.compact.all? {|method| threshold?(method, threshold)}
26
+ end
27
+
28
+ def any_threshold?(methods = order, threshold = 0.15)
29
+ return true unless threshold
30
+ [methods].flatten.compact.any? {|method| threshold?(method, threshold)}
31
+ end
32
+
33
+ def threshold?(method = :dice, threshold = 0.15)
34
+ raise "Uknown method '#{method}'" unless self.respond_to?(method)
35
+ self.send(method) >= threshold
36
+ end
37
+
38
+ def order=(values)
39
+ @order = [values].flatten.compact.tap do |o|
40
+ o = [:words_ngrams, :dice] if o.empty?
41
+ end
42
+ end
43
+
44
+ def order
45
+ @order ||= [:words_ngrams, :dice]
46
+ end
47
+
48
+ def <=>(result)
49
+ compare(result)
50
+ end
51
+
52
+ private
53
+
54
+ def compare(other, order: self.order)
55
+ return 0 unless method = order.first
56
+ raise "Uknown method '#{method}'" unless self.respond_to?(method) && other.respond_to?(method)
57
+ return -1 if self.send(method) > other.send(method)
58
+ return 1 if self.send(method) < other.send(method)
59
+ compare(other, order: order[1..-1])
60
+ end
61
+
62
+
63
+ end
64
+
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,53 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ class Results < Struct.new(:needle, :value, :raw_results)
5
+
6
+ def results_with_false_positives
7
+ relevant_results(methods: :jaro_winkler, threshold: 0.5)
8
+ end
9
+
10
+ def relevant_results(methods: order, threshold: 0.5)
11
+ raw_results.select do |result|
12
+ result.all_threshold?(methods, threshold)
13
+ end.yield_self do |filtered|
14
+ self.class.new(needle, value, filtered).tap do |results|
15
+ results.order = methods
16
+ end
17
+ end
18
+ end
19
+
20
+ def order=(values)
21
+ @order = [values].flatten.compact
22
+ raw_results.each {|r| r.order = @order}
23
+ end
24
+
25
+ def order
26
+ @order ||= [:words_ngrams, :dice]
27
+ end
28
+
29
+ def results
30
+ raw_results.sort
31
+ end
32
+
33
+ def print
34
+ msg = results.map do |result|
35
+ result.print
36
+ end.join("\n ")
37
+
38
+ puts "'#{value}':\n " + msg
39
+ end
40
+
41
+ private
42
+
43
+ def item_string(item, attr = self.method)
44
+ return item if !item || item.is_a?(String) || !attr
45
+ attr = attr.to_sym
46
+ return item.send(attr) if item.respond_to?(attr)
47
+ end
48
+
49
+ end
50
+
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,44 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ class Score < Struct.new(:score, :total)
5
+
6
+ def ratio(decimals = 6)
7
+ ((score || 0).to_f / (total || 1)).round(decimals)
8
+ end
9
+
10
+ def percent(decimals = 3)
11
+ (100 * ratio).round(decimals)
12
+ end
13
+
14
+ def increase(value = 1)
15
+ self.score += value
16
+ end
17
+
18
+ def increase_total(value)
19
+ self.total += value
20
+ end
21
+
22
+ def values_at(*keys)
23
+ keys.map do |key|
24
+ self.send(key) if self.respond_to?(key)
25
+ end
26
+ end
27
+
28
+ # Merges 2 Score instance objects
29
+ def merge(value)
30
+ Score.new(*values_at(:score, :total)).merge!(value)
31
+ end
32
+
33
+ def merge!(value)
34
+ raise "Expecting Score object. Given: #{value.class}" unless value.is_a?(Score)
35
+ increase(value.score)
36
+ increase_total(value.total)
37
+ self
38
+ end
39
+
40
+ end
41
+
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,35 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ module StopWords
5
+ PREPOSITIONS = [
6
+ "aboard", "about", "above", "across", "after", "against", "along", "amid", "among", "around", "as", "at",
7
+ "before", "behind", "below", "beneath", "beside", "between", "beyond", "but", "by",
8
+ "concerning", "considering", "despite", "down", "during", "except", "following", "for", "from",
9
+ "in", "inside", "into", "like", "minus", "near", "next",
10
+ "of", "off", "on", "onto", "opposite", "out", "outside", "over", "past", "per", "plus",
11
+ "regarding", "round", "save", "since", "than", "through", "till", "to", "toward",
12
+ "under", "underneath", "unlike", "until", "up", "upon", "versus", "via",
13
+ "with", "within", "without"
14
+ ]
15
+ PRONOUNS = [
16
+ "all", "another", "any", "anybody", "anyone", "anything", "as", "aught",
17
+ "both", "each", "each other", "either", "enough", "everybody", "everyone", "everything",
18
+ "few", "he", "her", "hers", "herself", "him", "himself", "his", "I", "idem", "it", "its", "itself",
19
+ "many", "me", "mine", "most", "my", "myself", "naught", "neither", "no one", "nobody", "none", "nothing", "nought",
20
+ "one", "one another", "other", "others", "ought", "our", "ours", "ourself", "ourselves",
21
+ "several", "she", "some", "somebody", "someone", "something", "somewhat", "such", "suchlike",
22
+ "that", "thee", "their", "theirs", "theirself", "theirselves", "them", "themself", "themselves", "there",
23
+ "these", "they", "thine", "this", "those", "thou", "thy", "thyself", "us",
24
+ "we", "what", "whatever", "whatnot", "whatsoever", "whence", "where", "whereby", "wherefrom",
25
+ "wherein", "whereinto", "whereof", "whereon", "wherever", "wheresoever", "whereto", "whereunto",
26
+ "wherewith", "wherewithal", "whether", "which", "whichever", "whichsoever", "who", "whoever", "whom",
27
+ "whomever", "whomso", "whomsoever", "whose", "whosever", "whosesoever", "whoso", "whosoever",
28
+ "ye", "yon", "yonder", "you", "your", "yours", "yourself", "yourselves"
29
+ ]
30
+ ARTICLES = ["a", "an", "the"]
31
+
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,69 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ module StringHelpers
5
+ # Downcases and trims
6
+ def normalize_string(value)
7
+ case value
8
+ when Array
9
+ value.map {|val| normalize_string(val)}
10
+ when Symbol
11
+ normalize_string(value.to_sym)
12
+ when String
13
+ value.downcase.strip
14
+ end
15
+ end
16
+
17
+ def get_words(str, normalized: false)
18
+ return [] unless str
19
+ str = normalize_string(str) unless normalized
20
+ str.scan(/[a-zA-Z'-]+/)
21
+ end
22
+
23
+ # Keeps the start order of the `words` and consecutive `words` together/consecutive.
24
+ # @param str [String] the input string with the words.
25
+ # @param range [Integer, Range] determine the lenght of the generated values.
26
+ # @return [Array<String>] combinations of `range` length of `words`.
27
+ def string_ngrams(str, range=2..3, normalized: false)
28
+ ngrams(get_words(str, normalized: normalized), range)
29
+ end
30
+
31
+ # Keeps the start order of the `words` of the input `Array` `words`.
32
+ # It does **not** keep consecutive `words` together (it can jump/skip items).
33
+ # @param str [String] the input string with the words.
34
+ # @param range [Integer, Range] determine the lenght of the generated values.
35
+ # @return [Array<String>] combinations of `range` length of `words`
36
+ def string_combinations(str, range=2..3, normalized: false)
37
+ combinations(get_words(str, normalized: normalized), range)
38
+ .map {|comb| comb.join(' ')}
39
+ end
40
+
41
+ # It includes `combinations` that break the initial order of the `Array`.
42
+ # It does **not** keep consecutive `words` together (it can jump/skip items).
43
+ # @param str [String] the input string with the words.
44
+ # @param range [Integer, Range] determine the lenght of the generated values.
45
+ # @return [Array<String>] permutations of `range` length of `words`
46
+ def string_permutations(str, range=2..3, normalized: false)
47
+ permutations(get_words(str, normalized: normalized), range)
48
+ .map {|comb| comb.join(' ')}
49
+ end
50
+
51
+ # Keeps the start order of the `charts` and consecutive `charts` together/consecutive.
52
+ # @param str [String] the input `word` string.
53
+ # @param range [Integer, Range] determine the lenght of the generated values.
54
+ # @return [Array<String>] combinations of `range` length of `words`.
55
+ def word_ngrams(str, range=2..3, normalized: false)
56
+ str = normalize_string(str) unless normalized
57
+ ngrams(str.to_s.chars, range)
58
+ .map {|comb| no_blanks(comb)}
59
+ end
60
+
61
+ def no_blanks(str)
62
+ return nil unless str && str.is_a?(String)
63
+ str.tr(' ', '')
64
+ end
65
+
66
+ end
67
+ end
68
+ end
69
+ end