eco-helpers 2.0.16 → 2.0.17

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +50 -6
  3. data/eco-helpers.gemspec +6 -4
  4. data/lib/eco-helpers.rb +1 -0
  5. data/lib/eco/api/common/base_loader.rb +14 -0
  6. data/lib/eco/api/common/people/default_parsers/date_parser.rb +11 -1
  7. data/lib/eco/api/common/people/default_parsers/login_providers_parser.rb +1 -1
  8. data/lib/eco/api/common/people/default_parsers/policy_groups_parser.rb +11 -11
  9. data/lib/eco/api/common/people/person_entry.rb +4 -2
  10. data/lib/eco/api/common/session/mailer.rb +0 -1
  11. data/lib/eco/api/common/session/s3_uploader.rb +0 -1
  12. data/lib/eco/api/common/session/sftp.rb +0 -1
  13. data/lib/eco/api/microcases.rb +3 -1
  14. data/lib/eco/api/microcases/append_usergroups.rb +0 -1
  15. data/lib/eco/api/microcases/people_cache.rb +2 -2
  16. data/lib/eco/api/microcases/people_load.rb +2 -2
  17. data/lib/eco/api/microcases/people_refresh.rb +2 -2
  18. data/lib/eco/api/microcases/people_search.rb +6 -6
  19. data/lib/eco/api/microcases/preserve_default_tag.rb +23 -0
  20. data/lib/eco/api/microcases/preserve_filter_tags.rb +28 -0
  21. data/lib/eco/api/microcases/preserve_policy_groups.rb +30 -0
  22. data/lib/eco/api/microcases/set_account.rb +0 -1
  23. data/lib/eco/api/organization.rb +1 -0
  24. data/lib/eco/api/organization/people.rb +7 -0
  25. data/lib/eco/api/organization/people_analytics.rb +60 -0
  26. data/lib/eco/api/organization/presets_factory.rb +22 -83
  27. data/lib/eco/api/organization/presets_integrity.json +6 -0
  28. data/lib/eco/api/organization/presets_values.json +5 -4
  29. data/lib/eco/api/policies/default_policies/99_user_access_policy.rb +0 -30
  30. data/lib/eco/api/session.rb +1 -20
  31. data/lib/eco/api/session/batch.rb +23 -7
  32. data/lib/eco/api/session/config.rb +0 -10
  33. data/lib/eco/api/session/config/people.rb +1 -17
  34. data/lib/eco/api/usecases/default_cases.rb +1 -1
  35. data/lib/eco/api/usecases/default_cases/abstract_policygroup_abilities_case.rb +1 -1
  36. data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +76 -0
  37. data/lib/eco/api/usecases/default_cases/codes_to_tags_case.rb +2 -3
  38. data/lib/eco/api/usecases/default_cases/reset_landing_page_case.rb +11 -1
  39. data/lib/eco/api/usecases/default_cases/restore_db_case.rb +1 -2
  40. data/lib/eco/api/usecases/default_cases/supers_cyclic_identify_case.rb +1 -1
  41. data/lib/eco/api/usecases/default_cases/supers_hierarchy_case.rb +1 -1
  42. data/lib/eco/api/usecases/default_cases/to_csv_case.rb +85 -27
  43. data/lib/eco/api/usecases/default_cases/to_csv_detailed_case.rb +62 -36
  44. data/lib/eco/cli/config/default/options.rb +19 -17
  45. data/lib/eco/cli/config/default/people_filters.rb +3 -3
  46. data/lib/eco/cli/config/default/usecases.rb +66 -32
  47. data/lib/eco/cli/config/default/workflow.rb +1 -1
  48. data/lib/eco/cli/config/help.rb +1 -0
  49. data/lib/eco/cli/config/options_set.rb +106 -13
  50. data/lib/eco/cli/config/use_cases.rb +33 -33
  51. data/lib/eco/cli/scripting/args_helpers.rb +30 -3
  52. data/lib/eco/data.rb +1 -0
  53. data/lib/eco/data/crypto/encryption.rb +3 -3
  54. data/lib/eco/data/files/helpers.rb +6 -4
  55. data/lib/eco/data/fuzzy_match.rb +119 -0
  56. data/lib/eco/data/fuzzy_match/array_helpers.rb +75 -0
  57. data/lib/eco/data/fuzzy_match/chars_position_score.rb +37 -0
  58. data/lib/eco/data/fuzzy_match/ngrams_score.rb +73 -0
  59. data/lib/eco/data/fuzzy_match/pairing.rb +102 -0
  60. data/lib/eco/data/fuzzy_match/result.rb +67 -0
  61. data/lib/eco/data/fuzzy_match/results.rb +53 -0
  62. data/lib/eco/data/fuzzy_match/score.rb +44 -0
  63. data/lib/eco/data/fuzzy_match/stop_words.rb +35 -0
  64. data/lib/eco/data/fuzzy_match/string_helpers.rb +69 -0
  65. data/lib/eco/version.rb +1 -1
  66. metadata +82 -10
  67. data/lib/eco/api/microcases/refresh_abilities.rb +0 -19
  68. data/lib/eco/api/organization/presets_reference.json +0 -59
  69. data/lib/eco/api/usecases/default_cases/refresh_abilities_case.rb +0 -30
@@ -0,0 +1,73 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ module NGramsScore
5
+ # It does the following:
6
+ # 1. It splits both strings into words
7
+ # 2. Pairs all words by best `ngrams_score` match
8
+ # 3. Gives `0` score to those words of `str2` that lost their pair (a word of `str1` cannot be paired twice)
9
+ # 4. Merges the `ngrams_score` of all the paired words of `str2` against their `str1` word pair
10
+ # @param range [Integer, Range] determine the lenght of the generated values for each `word`.
11
+ # @normalized [Boolean] to avoid double ups in normalizing.
12
+ # @return [Score] the score object with the result.
13
+ def words_ngrams_score(str1, str2, range: 3..5, normalized: false)
14
+ str1, str2 = normalize_string([str1, str2]) unless normalized
15
+ len1 = str1 && str1.length; len2 = str2 && str2.length
16
+
17
+ Score.new(0, 0).tap do |score|
18
+ next if !str2 || !str1
19
+ next score.increase(score.total) if str1 == str2
20
+ next if str1.length < 2 || str1.length < 2
21
+
22
+ paired_words(str1, str2, normalized: true) do |needle, item|
23
+ ngrams_score(needle, item, range: range, normalized: true)
24
+ end.each do |sub_str1, (item, iscore)|
25
+ #puts "pairs '#{sub_str1}' --> '#{item}' (score: #{iscore.ratio})"
26
+ score.merge!(iscore)
27
+ end
28
+ end
29
+ end
30
+
31
+ # A score is kept of matching ngram combinations of `str2`.
32
+ # @note This algorithm is best suited for matching sentences, or 'firstname lastname' compared with 'lastname firstname' combinations.
33
+ # @param range [Integer, Range] determine the lenght of the generated values.
34
+ # @normalized [Boolean] to avoid double ups in normalizing.
35
+ # @return [Score] the score object with the result.
36
+ def ngrams_score(str1, str2, range: 3..5, normalized: false)
37
+ str1, str2 = normalize_string([str1, str2]) unless normalized
38
+ len1 = str1 && str1.length; len2 = str2 && str2.length
39
+
40
+ Score.new(0, len1 || 0).tap do |score|
41
+ next if !str2 || !str1
42
+ next score.increase(score.total) if str1 == str2
43
+ next if str1.length < 2 || str2.length < 2
44
+
45
+ grams = word_ngrams(str2, range, normalized: true)
46
+ next unless grams.length > 0
47
+
48
+ if range.is_a?(Integer)
49
+ item_weight = score.total.to_f / grams.length
50
+ matches = grams.select {|res| str1.include?(gram)}.length
51
+ score.increase(matches * item_weight)
52
+ else
53
+ groups = grams.group_by {|gram| gram.length}
54
+ sorted_lens = groups.keys.sort.reverse
55
+ lens = sorted_lens.length
56
+ group_weight = (1.0 / lens).round(3)
57
+
58
+ groups.each do |len, grams|
59
+ len_max_score = score.total * group_weight
60
+ item_weight = len_max_score / grams.length
61
+ matches = grams.select {|gram| str1.include?(gram)}.length
62
+ #pp "#{len} match: #{matches} (over #{grams.length}) || max_score: #{len_max_score} (over #{score.total})"
63
+ score.increase(matches * item_weight)
64
+ end
65
+ end
66
+
67
+ end
68
+ end
69
+
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,102 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ module Pairing
5
+
6
+ # Pair words using some algorithm.
7
+ # It does the following:
8
+ # 1. It splits both strings into words.
9
+ # 2. Pairs all words by using `block` to score the best match.
10
+ # 3. Gives `0` score to those words of `str2` that lost their pair (a word of `str1` cannot be paired twice).
11
+ # 4. Merges the `Score` of all the paired words of `str2` against their `str1` word pair.
12
+ # @yield [needle, item] offers a comparison algorithm between two strings.
13
+ # @yieldparam needle [String] the string of reference.
14
+ # @yieldparam item [String] one of the haystack items.
15
+ # @yieldreturn [Eco::Data::FuzzyMatch::Score] the `Score` object with the results of comparing `str1` and `str2`
16
+ # @param str1 [String] the string of reference.
17
+ # @param str2 [String] one of the haystack items.
18
+ # @param format [Symbol] determines the `values` of the returned `Hash`::
19
+ # 1. `:pair` for just pair
20
+ # 2. `:score` for just score
21
+ # 2. `[:pair, :score]` for `Array`
22
+ # @normalized [Boolean] to avoid double ups in normalizing.
23
+ # @return [Hash] where `keys` are the **words** of `str1` and their `values`:
24
+ # 1. if `format` is `:pair` => the `str2` words with highest match.
25
+ # 2. if `format` is `:score` => the `Score` words with highest match.
26
+ # 3. if `format` is `[:pair, :score]` => both in an `Array`.
27
+ def paired_words(str1, str2, format: [:pair, :score], normalized: false)
28
+ str1, str2 = normalize_string([str1, str2]) unless normalized
29
+ return {} if !str2 || !str1
30
+ return score.increase(score.total) if str1 == str2
31
+ return {str1 => nil} if str1.length < 2 || str1.length < 2
32
+
33
+ needles = get_words(str1, normalized: true)
34
+ haystack = get_words(str2, normalized: true)
35
+
36
+ ranking = {}
37
+ faceted = needles.each_with_object({}) do |needle, faceted|
38
+ faceted[needle] = haystack.map do |item|
39
+ {
40
+ pair: item,
41
+ score: yield(needle, item)
42
+ }.tap do |result|
43
+ ranking[item] ||= []
44
+ if result[:score].ratio > 0.05
45
+ ranking[item] << ({needle: needle, score: result[:score]})
46
+ end
47
+ end
48
+ end.sort_by do |result|
49
+ result[:score].ratio
50
+ end.reverse
51
+ end
52
+
53
+ paired = {}
54
+ #scores = {}
55
+ ranking.each do |item, results|
56
+ sorted = results.reject do |result|
57
+ paired.key?(result[:needle])
58
+ end.sort_by do |result|
59
+ result[:score].ratio
60
+ end.reverse
61
+ if result = sorted.shift
62
+ paired[result[:needle]] = {
63
+ pair: item,
64
+ score: result[:score]
65
+ }
66
+ end
67
+ end
68
+
69
+ pending_items = haystack - paired.values
70
+ faceted.reject do |needle, results|
71
+ paired.key?(needle)
72
+ end.each do |needle, results|
73
+ results.select! do |result|
74
+ pending_items.include?(result[:pair]) && result[:score].ratio > 0.05
75
+ end
76
+ if result = results.shift
77
+ paired[needle] = result
78
+ pending_items.delete(result[:pair])
79
+ end
80
+ end
81
+
82
+ pending_needles = needles - paired.keys
83
+ pending_needles.each do |needle|
84
+ paired[needle] = {
85
+ pair: nil,
86
+ score: Score.new(0, needle.length)
87
+ }
88
+ end
89
+ paired.transform_values do |result|
90
+ case format
91
+ when Array
92
+ result.values_at(*format)
93
+ else
94
+ restult[format]
95
+ end
96
+ end
97
+ end
98
+
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,67 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ class Result < Struct.new(:match, :value, :dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position)
5
+ ALL_METHODS = [:dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position]
6
+
7
+ def dice; super&.round(3); end
8
+ def levenshtein; super&.round(3); end
9
+ def jaro_winkler; super&.round(3); end
10
+ def ngrams; super&.round(3); end
11
+ def words_ngrams; super&.round(3); end
12
+ def chars_position; super&.round(3); end
13
+
14
+ # TODO: print in the order of `order`
15
+ def print
16
+ msg = "(Dice: #{dice}) (Lev Dst: #{levenshtein}) "
17
+ msg << "(Jaro: #{jaro_winkler}) "
18
+ msg << "(Ngram: #{ngrams}) (WNgrams: #{words_ngrams}) "
19
+ msg << "(C Pos: #{chars_position}) "
20
+ msg << "'#{value}'"
21
+ end
22
+
23
+ def all_threshold?(methods = order, threshold = 0.15)
24
+ return true unless threshold
25
+ [methods].flatten.compact.all? {|method| threshold?(method, threshold)}
26
+ end
27
+
28
+ def any_threshold?(methods = order, threshold = 0.15)
29
+ return true unless threshold
30
+ [methods].flatten.compact.any? {|method| threshold?(method, threshold)}
31
+ end
32
+
33
+ def threshold?(method = :dice, threshold = 0.15)
34
+ raise "Uknown method '#{method}'" unless self.respond_to?(method)
35
+ self.send(method) >= threshold
36
+ end
37
+
38
+ def order=(values)
39
+ @order = [values].flatten.compact.tap do |o|
40
+ o = [:words_ngrams, :dice] if o.empty?
41
+ end
42
+ end
43
+
44
+ def order
45
+ @order ||= [:words_ngrams, :dice]
46
+ end
47
+
48
+ def <=>(result)
49
+ compare(result)
50
+ end
51
+
52
+ private
53
+
54
+ def compare(other, order: self.order)
55
+ return 0 unless method = order.first
56
+ raise "Uknown method '#{method}'" unless self.respond_to?(method) && other.respond_to?(method)
57
+ return -1 if self.send(method) > other.send(method)
58
+ return 1 if self.send(method) < other.send(method)
59
+ compare(other, order: order[1..-1])
60
+ end
61
+
62
+
63
+ end
64
+
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,53 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ class Results < Struct.new(:needle, :value, :raw_results)
5
+
6
+ def results_with_false_positives
7
+ relevant_results(methods: :jaro_winkler, threshold: 0.5)
8
+ end
9
+
10
+ def relevant_results(methods: order, threshold: 0.5)
11
+ raw_results.select do |result|
12
+ result.all_threshold?(methods, threshold)
13
+ end.yield_self do |filtered|
14
+ self.class.new(needle, value, filtered).tap do |results|
15
+ results.order = methods
16
+ end
17
+ end
18
+ end
19
+
20
+ def order=(values)
21
+ @order = [values].flatten.compact
22
+ raw_results.each {|r| r.order = @order}
23
+ end
24
+
25
+ def order
26
+ @order ||= [:words_ngrams, :dice]
27
+ end
28
+
29
+ def results
30
+ raw_results.sort
31
+ end
32
+
33
+ def print
34
+ msg = results.map do |result|
35
+ result.print
36
+ end.join("\n ")
37
+
38
+ puts "'#{value}':\n " + msg
39
+ end
40
+
41
+ private
42
+
43
+ def item_string(item, attr = self.method)
44
+ return item if !item || item.is_a?(String) || !attr
45
+ attr = attr.to_sym
46
+ return item.send(attr) if item.respond_to?(attr)
47
+ end
48
+
49
+ end
50
+
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,44 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ class Score < Struct.new(:score, :total)
5
+
6
+ def ratio(decimals = 6)
7
+ ((score || 0).to_f / (total || 1)).round(decimals)
8
+ end
9
+
10
+ def percent(decimals = 3)
11
+ (100 * ratio).round(decimals)
12
+ end
13
+
14
+ def increase(value = 1)
15
+ self.score += value
16
+ end
17
+
18
+ def increase_total(value)
19
+ self.total += value
20
+ end
21
+
22
+ def values_at(*keys)
23
+ keys.map do |key|
24
+ self.send(key) if self.respond_to?(key)
25
+ end
26
+ end
27
+
28
+ # Merges 2 Score instance objects
29
+ def merge(value)
30
+ Score.new(*values_at(:score, :total)).merge!(value)
31
+ end
32
+
33
+ def merge!(value)
34
+ raise "Expecting Score object. Given: #{value.class}" unless value.is_a?(Score)
35
+ increase(value.score)
36
+ increase_total(value.total)
37
+ self
38
+ end
39
+
40
+ end
41
+
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,35 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ module StopWords
5
+ PREPOSITIONS = [
6
+ "aboard", "about", "above", "across", "after", "against", "along", "amid", "among", "around", "as", "at",
7
+ "before", "behind", "below", "beneath", "beside", "between", "beyond", "but", "by",
8
+ "concerning", "considering", "despite", "down", "during", "except", "following", "for", "from",
9
+ "in", "inside", "into", "like", "minus", "near", "next",
10
+ "of", "off", "on", "onto", "opposite", "out", "outside", "over", "past", "per", "plus",
11
+ "regarding", "round", "save", "since", "than", "through", "till", "to", "toward",
12
+ "under", "underneath", "unlike", "until", "up", "upon", "versus", "via",
13
+ "with", "within", "without"
14
+ ]
15
+ PRONOUNS = [
16
+ "all", "another", "any", "anybody", "anyone", "anything", "as", "aught",
17
+ "both", "each", "each other", "either", "enough", "everybody", "everyone", "everything",
18
+ "few", "he", "her", "hers", "herself", "him", "himself", "his", "I", "idem", "it", "its", "itself",
19
+ "many", "me", "mine", "most", "my", "myself", "naught", "neither", "no one", "nobody", "none", "nothing", "nought",
20
+ "one", "one another", "other", "others", "ought", "our", "ours", "ourself", "ourselves",
21
+ "several", "she", "some", "somebody", "someone", "something", "somewhat", "such", "suchlike",
22
+ "that", "thee", "their", "theirs", "theirself", "theirselves", "them", "themself", "themselves", "there",
23
+ "these", "they", "thine", "this", "those", "thou", "thy", "thyself", "us",
24
+ "we", "what", "whatever", "whatnot", "whatsoever", "whence", "where", "whereby", "wherefrom",
25
+ "wherein", "whereinto", "whereof", "whereon", "wherever", "wheresoever", "whereto", "whereunto",
26
+ "wherewith", "wherewithal", "whether", "which", "whichever", "whichsoever", "who", "whoever", "whom",
27
+ "whomever", "whomso", "whomsoever", "whose", "whosever", "whosesoever", "whoso", "whosoever",
28
+ "ye", "yon", "yonder", "you", "your", "yours", "yourself", "yourselves"
29
+ ]
30
+ ARTICLES = ["a", "an", "the"]
31
+
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,69 @@
1
+ module Eco
2
+ module Data
3
+ module FuzzyMatch
4
+ module StringHelpers
5
+ # Downcases and trims
6
+ def normalize_string(value)
7
+ case value
8
+ when Array
9
+ value.map {|val| normalize_string(val)}
10
+ when Symbol
11
+ normalize_string(value.to_sym)
12
+ when String
13
+ value.downcase.strip
14
+ end
15
+ end
16
+
17
+ def get_words(str, normalized: false)
18
+ return [] unless str
19
+ str = normalize_string(str) unless normalized
20
+ str.scan(/[a-zA-Z'-]+/)
21
+ end
22
+
23
+ # Keeps the start order of the `words` and consecutive `words` together/consecutive.
24
+ # @param str [String] the input string with the words.
25
+ # @param range [Integer, Range] determine the lenght of the generated values.
26
+ # @return [Array<String>] combinations of `range` length of `words`.
27
+ def string_ngrams(str, range=2..3, normalized: false)
28
+ ngrams(get_words(str, normalized: normalized), range)
29
+ end
30
+
31
+ # Keeps the start order of the `words` of the input `Array` `words`.
32
+ # It does **not** keep consecutive `words` together (it can jump/skip items).
33
+ # @param str [String] the input string with the words.
34
+ # @param range [Integer, Range] determine the lenght of the generated values.
35
+ # @return [Array<String>] combinations of `range` length of `words`
36
+ def string_combinations(str, range=2..3, normalized: false)
37
+ combinations(get_words(str, normalized: normalized), range)
38
+ .map {|comb| comb.join(' ')}
39
+ end
40
+
41
+ # It includes `combinations` that break the initial order of the `Array`.
42
+ # It does **not** keep consecutive `words` together (it can jump/skip items).
43
+ # @param str [String] the input string with the words.
44
+ # @param range [Integer, Range] determine the lenght of the generated values.
45
+ # @return [Array<String>] permutations of `range` length of `words`
46
+ def string_permutations(str, range=2..3, normalized: false)
47
+ permutations(get_words(str, normalized: normalized), range)
48
+ .map {|comb| comb.join(' ')}
49
+ end
50
+
51
+ # Keeps the start order of the `charts` and consecutive `charts` together/consecutive.
52
+ # @param str [String] the input `word` string.
53
+ # @param range [Integer, Range] determine the lenght of the generated values.
54
+ # @return [Array<String>] combinations of `range` length of `words`.
55
+ def word_ngrams(str, range=2..3, normalized: false)
56
+ str = normalize_string(str) unless normalized
57
+ ngrams(str.to_s.chars, range)
58
+ .map {|comb| no_blanks(comb)}
59
+ end
60
+
61
+ def no_blanks(str)
62
+ return nil unless str && str.is_a?(String)
63
+ str.tr(' ', '')
64
+ end
65
+
66
+ end
67
+ end
68
+ end
69
+ end