eco-helpers 2.0.15 → 2.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +109 -3
- data/eco-helpers.gemspec +11 -5
- data/lib/eco-helpers.rb +2 -0
- data/lib/eco/api/common/base_loader.rb +14 -0
- data/lib/eco/api/common/loaders/parser.rb +1 -0
- data/lib/eco/api/common/people/default_parsers/date_parser.rb +11 -1
- data/lib/eco/api/common/people/default_parsers/login_providers_parser.rb +1 -1
- data/lib/eco/api/common/people/default_parsers/policy_groups_parser.rb +11 -11
- data/lib/eco/api/common/people/entries.rb +1 -0
- data/lib/eco/api/common/people/entry_factory.rb +74 -23
- data/lib/eco/api/common/people/person_entry.rb +5 -2
- data/lib/eco/api/common/people/supervisor_helpers.rb +27 -0
- data/lib/eco/api/common/session.rb +1 -0
- data/lib/eco/api/common/session/base_session.rb +2 -0
- data/lib/eco/api/common/session/file_manager.rb +2 -2
- data/lib/eco/api/common/session/helpers.rb +30 -0
- data/lib/eco/api/common/session/helpers/prompt_user.rb +34 -0
- data/lib/eco/api/common/session/mailer.rb +0 -1
- data/lib/eco/api/common/session/s3_uploader.rb +0 -1
- data/lib/eco/api/common/session/sftp.rb +0 -1
- data/lib/eco/api/common/version_patches/ecoportal_api/external_person.rb +1 -1
- data/lib/eco/api/common/version_patches/ecoportal_api/internal_person.rb +7 -4
- data/lib/eco/api/common/version_patches/exception.rb +11 -4
- data/lib/eco/api/microcases.rb +3 -1
- data/lib/eco/api/microcases/append_usergroups.rb +0 -1
- data/lib/eco/api/microcases/people_cache.rb +2 -2
- data/lib/eco/api/microcases/people_load.rb +2 -2
- data/lib/eco/api/microcases/people_refresh.rb +2 -2
- data/lib/eco/api/microcases/people_search.rb +6 -6
- data/lib/eco/api/microcases/preserve_default_tag.rb +23 -0
- data/lib/eco/api/microcases/preserve_filter_tags.rb +28 -0
- data/lib/eco/api/microcases/preserve_policy_groups.rb +30 -0
- data/lib/eco/api/microcases/set_account.rb +0 -1
- data/lib/eco/api/microcases/with_each.rb +67 -6
- data/lib/eco/api/microcases/with_each_present.rb +4 -2
- data/lib/eco/api/microcases/with_each_starter.rb +4 -2
- data/lib/eco/api/organization.rb +1 -0
- data/lib/eco/api/organization/people.rb +98 -22
- data/lib/eco/api/organization/people_similarity.rb +272 -0
- data/lib/eco/api/organization/person_schemas.rb +5 -1
- data/lib/eco/api/organization/policy_groups.rb +5 -1
- data/lib/eco/api/organization/presets_factory.rb +40 -80
- data/lib/eco/api/organization/presets_integrity.json +6 -0
- data/lib/eco/api/organization/presets_values.json +5 -4
- data/lib/eco/api/organization/tag_tree.rb +33 -0
- data/lib/eco/api/policies/default_policies/99_user_access_policy.rb +0 -30
- data/lib/eco/api/session.rb +10 -24
- data/lib/eco/api/session/batch.rb +25 -7
- data/lib/eco/api/session/config.rb +16 -15
- data/lib/eco/api/session/config/api.rb +4 -0
- data/lib/eco/api/session/config/apis.rb +80 -0
- data/lib/eco/api/session/config/files.rb +7 -0
- data/lib/eco/api/session/config/people.rb +3 -19
- data/lib/eco/api/usecases/default_cases.rb +4 -1
- data/lib/eco/api/usecases/default_cases/abstract_policygroup_abilities_case.rb +161 -0
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +223 -0
- data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
- data/lib/eco/api/usecases/default_cases/codes_to_tags_case.rb +2 -3
- data/lib/eco/api/usecases/default_cases/reset_landing_page_case.rb +11 -1
- data/lib/eco/api/usecases/default_cases/restore_db_case.rb +1 -2
- data/lib/eco/api/usecases/default_cases/supers_cyclic_identify_case.rb +72 -0
- data/lib/eco/api/usecases/default_cases/supers_hierarchy_case.rb +1 -1
- data/lib/eco/api/usecases/default_cases/to_csv_case.rb +132 -29
- data/lib/eco/api/usecases/default_cases/to_csv_detailed_case.rb +61 -36
- data/lib/eco/api/usecases/ooze_samples/ooze_update_case.rb +3 -2
- data/lib/eco/cli.rb +0 -10
- data/lib/eco/cli/config/default/options.rb +48 -17
- data/lib/eco/cli/config/default/people.rb +18 -24
- data/lib/eco/cli/config/default/people_filters.rb +3 -3
- data/lib/eco/cli/config/default/usecases.rb +105 -28
- data/lib/eco/cli/config/default/workflow.rb +21 -12
- data/lib/eco/cli/config/help.rb +1 -0
- data/lib/eco/cli/config/options_set.rb +106 -13
- data/lib/eco/cli/config/use_cases.rb +33 -33
- data/lib/eco/cli/scripting/args_helpers.rb +30 -3
- data/lib/eco/csv.rb +4 -2
- data/lib/eco/csv/table.rb +121 -21
- data/lib/eco/data.rb +1 -0
- data/lib/eco/data/crypto/encryption.rb +3 -3
- data/lib/eco/data/files/directory.rb +28 -20
- data/lib/eco/data/files/helpers.rb +6 -4
- data/lib/eco/data/fuzzy_match.rb +201 -0
- data/lib/eco/data/fuzzy_match/array_helpers.rb +75 -0
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +38 -0
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +82 -0
- data/lib/eco/data/fuzzy_match/pairing.rb +95 -0
- data/lib/eco/data/fuzzy_match/result.rb +87 -0
- data/lib/eco/data/fuzzy_match/results.rb +77 -0
- data/lib/eco/data/fuzzy_match/score.rb +49 -0
- data/lib/eco/data/fuzzy_match/stop_words.rb +35 -0
- data/lib/eco/data/fuzzy_match/string_helpers.rb +82 -0
- data/lib/eco/version.rb +1 -1
- metadata +168 -11
- data/lib/eco/api/microcases/refresh_abilities.rb +0 -19
- data/lib/eco/api/organization/presets_reference.json +0 -59
- data/lib/eco/api/usecases/default_cases/refresh_abilities_case.rb +0 -30
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
module Eco
|
|
2
|
+
module Data
|
|
3
|
+
module FuzzyMatch
|
|
4
|
+
module Pairing
|
|
5
|
+
|
|
6
|
+
# Pair words using some algorithm.
|
|
7
|
+
# It does the following:
|
|
8
|
+
# 1. It splits both strings into words.
|
|
9
|
+
# 2. Pairs all words by using `block` to score the best match.
|
|
10
|
+
# 3. Gives `0` score to those words of `str2` that lost their pair (a word of `str1` cannot be paired twice).
|
|
11
|
+
# 4. Merges the `Score` of all the paired words of `str2` against their `str1` word pair.
|
|
12
|
+
# @yield [needle, item] offers a comparison algorithm between two strings.
|
|
13
|
+
# @yieldparam needle [String] the string of reference.
|
|
14
|
+
# @yieldparam item [String] one of the haystack items.
|
|
15
|
+
# @yieldreturn [Eco::Data::FuzzyMatch::Score] the `Score` object with the results of comparing `str1` and `str2`
|
|
16
|
+
# @param str1 [String] the string of reference.
|
|
17
|
+
# @param str2 [String] one of the haystack items.
|
|
18
|
+
# @normalized [Boolean] to avoid double ups in normalizing.
|
|
19
|
+
# @return [Hash] where `keys` are the **words** of `str1` and their `values` a pair array of `pair` and `Score`
|
|
20
|
+
def paired_words(str1, str2, normalized: false)
|
|
21
|
+
str1, str2 = normalize_string([str1, str2]) unless normalized
|
|
22
|
+
return {nil => [nil, Score.new(0, 0)]} if !str2 || !str1
|
|
23
|
+
return {str1 => [nil, Score.new(0, 0)]} if str1.length < 2 || str1.length < 2
|
|
24
|
+
|
|
25
|
+
needles = get_words(str1, normalized: true)
|
|
26
|
+
haystack = get_words(str2, normalized: true)
|
|
27
|
+
|
|
28
|
+
ranking = {}
|
|
29
|
+
faceted = needles.each_with_object({}) do |needle, faceted|
|
|
30
|
+
faceted[needle] = haystack.map do |item|
|
|
31
|
+
{
|
|
32
|
+
pair: item,
|
|
33
|
+
score: yield(needle, item)
|
|
34
|
+
}.tap do |result|
|
|
35
|
+
ranking[item] ||= []
|
|
36
|
+
if result[:score].ratio > 0.05
|
|
37
|
+
ranking[item] << ({needle: needle, score: result[:score]})
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end.sort_by do |result|
|
|
41
|
+
result[:score].ratio
|
|
42
|
+
end.reverse
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
paired = {}
|
|
46
|
+
#scores = {}
|
|
47
|
+
ranking.each do |item, results|
|
|
48
|
+
sorted = results.reject do |result|
|
|
49
|
+
paired.key?(result[:needle])
|
|
50
|
+
end.sort_by do |result|
|
|
51
|
+
result[:score].ratio
|
|
52
|
+
end.reverse
|
|
53
|
+
if result = sorted.shift
|
|
54
|
+
unless result[:score].is_a?(Eco::Data::FuzzyMatch::Score)
|
|
55
|
+
raise "Parining ('#{str1}' vs '#{str2}') -> Something got sour with needle '#{result[:needle]}' and item #{item}"
|
|
56
|
+
end
|
|
57
|
+
paired[result[:needle]] = {
|
|
58
|
+
pair: item,
|
|
59
|
+
score: result[:score]
|
|
60
|
+
}
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
pending_items = haystack - paired.values
|
|
65
|
+
faceted.reject do |needle, results|
|
|
66
|
+
paired.key?(needle)
|
|
67
|
+
end.each do |needle, results|
|
|
68
|
+
results.select! do |result|
|
|
69
|
+
pending_items.include?(result[:pair]) && result[:score].ratio > 0.05
|
|
70
|
+
end
|
|
71
|
+
if result = results.shift
|
|
72
|
+
unless result[:score].is_a?(Eco::Data::FuzzyMatch::Score)
|
|
73
|
+
raise "Parining ('#{str1}' vs '#{str2}') -> Something got sour with needle '#{needle}' and item #{result[:pair]}"
|
|
74
|
+
end
|
|
75
|
+
paired[needle] = result
|
|
76
|
+
pending_items.delete(result[:pair])
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
pending_needles = needles - paired.keys
|
|
81
|
+
pending_needles.each do |needle|
|
|
82
|
+
paired[needle] = {
|
|
83
|
+
pair: nil,
|
|
84
|
+
score: Score.new(0, needle.length)
|
|
85
|
+
}
|
|
86
|
+
end
|
|
87
|
+
paired.each_with_object({}) do |(needle, data), out|
|
|
88
|
+
out[needle] = data.values_at(:pair, :score)
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
module Eco
|
|
2
|
+
module Data
|
|
3
|
+
module FuzzyMatch
|
|
4
|
+
class Result < Struct.new(:match, :value, :needle_value, :dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position)
|
|
5
|
+
ALL_METHODS = [:dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position]
|
|
6
|
+
|
|
7
|
+
attr_accessor :pivot
|
|
8
|
+
|
|
9
|
+
def dice; super&.round(3); end
|
|
10
|
+
def levenshtein; super&.round(3); end
|
|
11
|
+
def jaro_winkler; super&.round(3); end
|
|
12
|
+
def ngrams; super&.round(3); end
|
|
13
|
+
def words_ngrams; super&.round(3); end
|
|
14
|
+
def chars_position; super&.round(3); end
|
|
15
|
+
|
|
16
|
+
#Shortcuts
|
|
17
|
+
def lev; levenshtein; end
|
|
18
|
+
def jaro; jaro_winkler; end
|
|
19
|
+
def wngrams; words_ngrams; end
|
|
20
|
+
def pos; chars_position; end
|
|
21
|
+
|
|
22
|
+
def average
|
|
23
|
+
values = [dice, levenshtein, jaro_winkler, ngrams, words_ngrams, chars_position]
|
|
24
|
+
(values.inject(0.0, :+) / values.length).round(3)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# TODO: print in the order of `order`
|
|
28
|
+
def print
|
|
29
|
+
msg = "(Dice: #{dice}) (Lev Dst: #{levenshtein}) "
|
|
30
|
+
msg << "(Jaro: #{jaro_winkler}) "
|
|
31
|
+
msg << "(Ngram: #{ngrams}) (WNgrams: #{words_ngrams}) "
|
|
32
|
+
msg << "(C Pos: #{chars_position}) "
|
|
33
|
+
msg << "(Avg: #{average}) "
|
|
34
|
+
msg << "'#{value}'"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def all_threshold?(methods = order, threshold = 0.15)
|
|
38
|
+
return true unless threshold
|
|
39
|
+
[methods].flatten.compact.all? {|method| threshold?(method, threshold)}
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def any_threshold?(methods = order, threshold = 0.15)
|
|
43
|
+
return true unless threshold
|
|
44
|
+
[methods].flatten.compact.any? {|method| threshold?(method, threshold)}
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def threshold?(method = :dice, threshold = 0.15)
|
|
48
|
+
raise "Uknown method '#{method}'" unless self.respond_to?(method)
|
|
49
|
+
self.send(method) >= threshold
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def order=(values)
|
|
53
|
+
@order = [values].flatten.compact.tap do |o|
|
|
54
|
+
o << [:words_ngrams, :dice] if o.empty?
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def order
|
|
59
|
+
@order ||= [:words_ngrams, :dice]
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def <=>(result)
|
|
63
|
+
compare(result)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def values_at(*keys)
|
|
67
|
+
keys.map do |key|
|
|
68
|
+
self.send(key) if self.respond_to?(key)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
private
|
|
73
|
+
|
|
74
|
+
def compare(other, order: self.order)
|
|
75
|
+
return 0 unless method = order.first
|
|
76
|
+
raise "Uknown method '#{method}'" unless self.respond_to?(method) && other.respond_to?(method)
|
|
77
|
+
return -1 if self.send(method) > other.send(method)
|
|
78
|
+
return 1 if self.send(method) < other.send(method)
|
|
79
|
+
compare(other, order: order[1..-1])
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
module Eco
|
|
2
|
+
module Data
|
|
3
|
+
module FuzzyMatch
|
|
4
|
+
class Results < Struct.new(:needle, :value, :raw_results)
|
|
5
|
+
include Enumerable
|
|
6
|
+
|
|
7
|
+
attr_accessor :threshold
|
|
8
|
+
|
|
9
|
+
def empty?
|
|
10
|
+
count < 1
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def each(&block)
|
|
14
|
+
return to_enum(:each) unless block
|
|
15
|
+
raw_results.each(&block)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Merges the results of both Results object
|
|
19
|
+
def merge(res)
|
|
20
|
+
unless self.needle == res.needle
|
|
21
|
+
raise "To merge 2 Results, needle should be the same ('#{value}'). Given '#{res.value}'"
|
|
22
|
+
end
|
|
23
|
+
self.class.new(needle, value, raw_results.concat(res.raw_results))
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def results_with_false_positives
|
|
27
|
+
relevant_results(order: :jaro_winkler, threshold: 0.5)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def relevant_results(**options)
|
|
31
|
+
options = {order: order, threshold: threshold || 0.5}.merge(options)
|
|
32
|
+
raw_results.select do |result|
|
|
33
|
+
result.all_threshold?(options[:order], options[:threshold])
|
|
34
|
+
end.yield_self do |filtered|
|
|
35
|
+
self.class.new(needle, value, filtered).tap do |results|
|
|
36
|
+
results.order = options[:order]
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# @param values[Array<Symbol>] the algorithms' results it should be ordered by
|
|
42
|
+
# * Possible values: `:dice`, `:levenshtein`, `:jaro_winkler`, `:ngrams`, `:words_ngrams`, `:chars_position`, `:average`
|
|
43
|
+
def order=(values)
|
|
44
|
+
@order = [values].flatten.compact.tap do |o|
|
|
45
|
+
raw_results.each {|r| r.order = o}
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def order
|
|
50
|
+
@order ||= [:words_ngrams, :dice]
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def results
|
|
54
|
+
raw_results.sort
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def print
|
|
58
|
+
msg = results.map do |result|
|
|
59
|
+
result.print
|
|
60
|
+
end.join("\n ")
|
|
61
|
+
|
|
62
|
+
puts "'#{value}':\n " + msg
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
def item_string(item, attr = self.method)
|
|
68
|
+
return item if !item || item.is_a?(String) || !attr
|
|
69
|
+
attr = attr.to_sym
|
|
70
|
+
return item.send(attr) if item.respond_to?(attr)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
module Eco
|
|
2
|
+
module Data
|
|
3
|
+
module FuzzyMatch
|
|
4
|
+
class Score < Struct.new(:score, :total)
|
|
5
|
+
|
|
6
|
+
def ratio(decimals = 6)
|
|
7
|
+
tot = self.total; sc = self.score
|
|
8
|
+
tot = tot && tot > 0 ? tot : 1
|
|
9
|
+
sc = sc && sc > 0 ? sc : 0
|
|
10
|
+
(sc.to_f / tot).round(decimals)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def percent(decimals = 3)
|
|
14
|
+
(100 * ratio).round(decimals)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def increase(value = 1)
|
|
18
|
+
self.score += value
|
|
19
|
+
raise "Score #{self.score} (increase: #{value}) can't be greater than total #{self.total}" if self.score > self.total
|
|
20
|
+
self.score
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def increase_total(value)
|
|
24
|
+
self.total += value
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def values_at(*keys)
|
|
28
|
+
keys.map do |key|
|
|
29
|
+
self.send(key) if self.respond_to?(key)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Merges 2 Score instance objects
|
|
34
|
+
def merge(scr)
|
|
35
|
+
Score.new(*values_at(:score, :total)).merge!(scr)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def merge!(scr)
|
|
39
|
+
raise "Expecting Score object. Given: #{scr.class}" unless scr.is_a?(Score)
|
|
40
|
+
increase_total(scr.total)
|
|
41
|
+
increase(scr.score)
|
|
42
|
+
self
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
module Eco
|
|
2
|
+
module Data
|
|
3
|
+
module FuzzyMatch
|
|
4
|
+
module StopWords
|
|
5
|
+
PREPOSITIONS = [
|
|
6
|
+
"aboard", "about", "above", "across", "after", "against", "along", "amid", "among", "around", "as", "at",
|
|
7
|
+
"before", "behind", "below", "beneath", "beside", "between", "beyond", "but", "by",
|
|
8
|
+
"concerning", "considering", "despite", "down", "during", "except", "following", "for", "from",
|
|
9
|
+
"in", "inside", "into", "like", "minus", "near", "next",
|
|
10
|
+
"of", "off", "on", "onto", "opposite", "out", "outside", "over", "past", "per", "plus",
|
|
11
|
+
"regarding", "round", "save", "since", "than", "through", "till", "to", "toward",
|
|
12
|
+
"under", "underneath", "unlike", "until", "up", "upon", "versus", "via",
|
|
13
|
+
"with", "within", "without"
|
|
14
|
+
]
|
|
15
|
+
PRONOUNS = [
|
|
16
|
+
"all", "another", "any", "anybody", "anyone", "anything", "as", "aught",
|
|
17
|
+
"both", "each", "each other", "either", "enough", "everybody", "everyone", "everything",
|
|
18
|
+
"few", "he", "her", "hers", "herself", "him", "himself", "his", "I", "idem", "it", "its", "itself",
|
|
19
|
+
"many", "me", "mine", "most", "my", "myself", "naught", "neither", "no one", "nobody", "none", "nothing", "nought",
|
|
20
|
+
"one", "one another", "other", "others", "ought", "our", "ours", "ourself", "ourselves",
|
|
21
|
+
"several", "she", "some", "somebody", "someone", "something", "somewhat", "such", "suchlike",
|
|
22
|
+
"that", "thee", "their", "theirs", "theirself", "theirselves", "them", "themself", "themselves", "there",
|
|
23
|
+
"these", "they", "thine", "this", "those", "thou", "thy", "thyself", "us",
|
|
24
|
+
"we", "what", "whatever", "whatnot", "whatsoever", "whence", "where", "whereby", "wherefrom",
|
|
25
|
+
"wherein", "whereinto", "whereof", "whereon", "wherever", "wheresoever", "whereto", "whereunto",
|
|
26
|
+
"wherewith", "wherewithal", "whether", "which", "whichever", "whichsoever", "who", "whoever", "whom",
|
|
27
|
+
"whomever", "whomso", "whomsoever", "whose", "whosever", "whosesoever", "whoso", "whosoever",
|
|
28
|
+
"ye", "yon", "yonder", "you", "your", "yours", "yourself", "yourselves"
|
|
29
|
+
]
|
|
30
|
+
ARTICLES = ["a", "an", "the"]
|
|
31
|
+
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
module Eco
|
|
2
|
+
module Data
|
|
3
|
+
module FuzzyMatch
|
|
4
|
+
module StringHelpers
|
|
5
|
+
# Downcases and trims
|
|
6
|
+
def normalize_string(value)
|
|
7
|
+
case value
|
|
8
|
+
when Array
|
|
9
|
+
value.map {|val| normalize_string(val)}
|
|
10
|
+
when Symbol
|
|
11
|
+
normalize_string(value.to_sym)
|
|
12
|
+
when String
|
|
13
|
+
value.downcase.strip
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def get_words(str, normalized: false)
|
|
18
|
+
return [] unless str
|
|
19
|
+
str = normalize_string(str) unless normalized
|
|
20
|
+
str.scan(/[a-zA-Z'-]+/).compact
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Keeps the start order of the `words` and consecutive `words` together/consecutive.
|
|
24
|
+
# @param str [String] the input string with the words.
|
|
25
|
+
# @param range [Integer, Range] determine the lenght of the generated values.
|
|
26
|
+
# @return [Array<String>] combinations of `range` length of `words`.
|
|
27
|
+
def string_ngrams(str, range=2..3, normalized: false)
|
|
28
|
+
ngrams(get_words(str, normalized: normalized), range)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Keeps the start order of the `words` of the input `Array` `words`.
|
|
32
|
+
# It does **not** keep consecutive `words` together (it can jump/skip items).
|
|
33
|
+
# @param str [String] the input string with the words.
|
|
34
|
+
# @param range [Integer, Range] determine the lenght of the generated values.
|
|
35
|
+
# @return [Array<String>] combinations of `range` length of `words`
|
|
36
|
+
def string_combinations(str, range=2..3, normalized: false)
|
|
37
|
+
combinations(get_words(str, normalized: normalized), range)
|
|
38
|
+
.map {|comb| comb.join(' ')}
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# It includes `combinations` that break the initial order of the `Array`.
|
|
42
|
+
# It does **not** keep consecutive `words` together (it can jump/skip items).
|
|
43
|
+
# @param str [String] the input string with the words.
|
|
44
|
+
# @param range [Integer, Range] determine the lenght of the generated values.
|
|
45
|
+
# @return [Array<String>] permutations of `range` length of `words`
|
|
46
|
+
def string_permutations(str, range=2..3, normalized: false)
|
|
47
|
+
permutations(get_words(str, normalized: normalized), range)
|
|
48
|
+
.map {|comb| comb.join(' ')}
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Keeps the start order of the `charts` and consecutive `charts` together/consecutive.
|
|
52
|
+
# @param str [String] the input `word` string.
|
|
53
|
+
# @param range [Integer, Range] determine the lenght of the generated values.
|
|
54
|
+
# @return [Array<String>] combinations of `range` length of `words`.
|
|
55
|
+
def word_ngrams(str, range=2..3, normalized: false)
|
|
56
|
+
str = normalize_string(str) unless normalized
|
|
57
|
+
ngrams(str.to_s.chars, range)
|
|
58
|
+
.map {|comb| no_blanks(comb)}
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def no_blanks(str)
|
|
62
|
+
return nil unless str && str.is_a?(String)
|
|
63
|
+
str.tr(' ', '')
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Deletes the words of `str1` and `str2` that match
|
|
67
|
+
# @return [Array<String>] pair of words.
|
|
68
|
+
def remove_matching_words(str1, str2, normalized: false)
|
|
69
|
+
unless normalized
|
|
70
|
+
str1 = normalize_string(str1)
|
|
71
|
+
str2 = normalize_string(str2)
|
|
72
|
+
end
|
|
73
|
+
return [str1, str2] if !str1 || !str2 || str1.empty? || str2.empty?
|
|
74
|
+
ws1 = get_words(str1)
|
|
75
|
+
ws2 = get_words(str2)
|
|
76
|
+
[(ws1 - ws2).join(" "), (ws2 - ws1).join(" ")]
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
data/lib/eco/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: eco-helpers
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.0.
|
|
4
|
+
version: 2.0.21
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Oscar Segura
|
|
@@ -16,7 +16,7 @@ dependencies:
|
|
|
16
16
|
requirements:
|
|
17
17
|
- - ">="
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: 2.2.
|
|
19
|
+
version: 2.2.17
|
|
20
20
|
- - "<"
|
|
21
21
|
- !ruby/object:Gem::Version
|
|
22
22
|
version: '2.3'
|
|
@@ -26,7 +26,7 @@ dependencies:
|
|
|
26
26
|
requirements:
|
|
27
27
|
- - ">="
|
|
28
28
|
- !ruby/object:Gem::Version
|
|
29
|
-
version: 2.2.
|
|
29
|
+
version: 2.2.17
|
|
30
30
|
- - "<"
|
|
31
31
|
- !ruby/object:Gem::Version
|
|
32
32
|
version: '2.3'
|
|
@@ -116,7 +116,7 @@ dependencies:
|
|
|
116
116
|
requirements:
|
|
117
117
|
- - ">="
|
|
118
118
|
- !ruby/object:Gem::Version
|
|
119
|
-
version: 0.8.
|
|
119
|
+
version: 0.8.3
|
|
120
120
|
- - "<"
|
|
121
121
|
- !ruby/object:Gem::Version
|
|
122
122
|
version: '0.9'
|
|
@@ -126,7 +126,7 @@ dependencies:
|
|
|
126
126
|
requirements:
|
|
127
127
|
- - ">="
|
|
128
128
|
- !ruby/object:Gem::Version
|
|
129
|
-
version: 0.8.
|
|
129
|
+
version: 0.8.3
|
|
130
130
|
- - "<"
|
|
131
131
|
- !ruby/object:Gem::Version
|
|
132
132
|
version: '0.9'
|
|
@@ -136,7 +136,7 @@ dependencies:
|
|
|
136
136
|
requirements:
|
|
137
137
|
- - ">="
|
|
138
138
|
- !ruby/object:Gem::Version
|
|
139
|
-
version: 0.8.
|
|
139
|
+
version: 0.8.7
|
|
140
140
|
- - "<"
|
|
141
141
|
- !ruby/object:Gem::Version
|
|
142
142
|
version: '0.9'
|
|
@@ -146,7 +146,7 @@ dependencies:
|
|
|
146
146
|
requirements:
|
|
147
147
|
- - ">="
|
|
148
148
|
- !ruby/object:Gem::Version
|
|
149
|
-
version: 0.8.
|
|
149
|
+
version: 0.8.7
|
|
150
150
|
- - "<"
|
|
151
151
|
- !ruby/object:Gem::Version
|
|
152
152
|
version: '0.9'
|
|
@@ -230,6 +230,146 @@ dependencies:
|
|
|
230
230
|
- - "<"
|
|
231
231
|
- !ruby/object:Gem::Version
|
|
232
232
|
version: '3.1'
|
|
233
|
+
- !ruby/object:Gem::Dependency
|
|
234
|
+
name: hashdiff
|
|
235
|
+
requirement: !ruby/object:Gem::Requirement
|
|
236
|
+
requirements:
|
|
237
|
+
- - ">="
|
|
238
|
+
- !ruby/object:Gem::Version
|
|
239
|
+
version: 1.0.1
|
|
240
|
+
- - "<"
|
|
241
|
+
- !ruby/object:Gem::Version
|
|
242
|
+
version: '1.1'
|
|
243
|
+
type: :runtime
|
|
244
|
+
prerelease: false
|
|
245
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
246
|
+
requirements:
|
|
247
|
+
- - ">="
|
|
248
|
+
- !ruby/object:Gem::Version
|
|
249
|
+
version: 1.0.1
|
|
250
|
+
- - "<"
|
|
251
|
+
- !ruby/object:Gem::Version
|
|
252
|
+
version: '1.1'
|
|
253
|
+
- !ruby/object:Gem::Dependency
|
|
254
|
+
name: fuzzy_match
|
|
255
|
+
requirement: !ruby/object:Gem::Requirement
|
|
256
|
+
requirements:
|
|
257
|
+
- - ">="
|
|
258
|
+
- !ruby/object:Gem::Version
|
|
259
|
+
version: 2.1.0
|
|
260
|
+
- - "<"
|
|
261
|
+
- !ruby/object:Gem::Version
|
|
262
|
+
version: '2.2'
|
|
263
|
+
type: :runtime
|
|
264
|
+
prerelease: false
|
|
265
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
266
|
+
requirements:
|
|
267
|
+
- - ">="
|
|
268
|
+
- !ruby/object:Gem::Version
|
|
269
|
+
version: 2.1.0
|
|
270
|
+
- - "<"
|
|
271
|
+
- !ruby/object:Gem::Version
|
|
272
|
+
version: '2.2'
|
|
273
|
+
- !ruby/object:Gem::Dependency
|
|
274
|
+
name: amatch
|
|
275
|
+
requirement: !ruby/object:Gem::Requirement
|
|
276
|
+
requirements:
|
|
277
|
+
- - ">="
|
|
278
|
+
- !ruby/object:Gem::Version
|
|
279
|
+
version: 0.4.0
|
|
280
|
+
- - "<"
|
|
281
|
+
- !ruby/object:Gem::Version
|
|
282
|
+
version: '0.5'
|
|
283
|
+
type: :runtime
|
|
284
|
+
prerelease: false
|
|
285
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
286
|
+
requirements:
|
|
287
|
+
- - ">="
|
|
288
|
+
- !ruby/object:Gem::Version
|
|
289
|
+
version: 0.4.0
|
|
290
|
+
- - "<"
|
|
291
|
+
- !ruby/object:Gem::Version
|
|
292
|
+
version: '0.5'
|
|
293
|
+
- !ruby/object:Gem::Dependency
|
|
294
|
+
name: jaro_winkler
|
|
295
|
+
requirement: !ruby/object:Gem::Requirement
|
|
296
|
+
requirements:
|
|
297
|
+
- - ">="
|
|
298
|
+
- !ruby/object:Gem::Version
|
|
299
|
+
version: 1.5.4
|
|
300
|
+
- - "<"
|
|
301
|
+
- !ruby/object:Gem::Version
|
|
302
|
+
version: '1.6'
|
|
303
|
+
type: :runtime
|
|
304
|
+
prerelease: false
|
|
305
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
306
|
+
requirements:
|
|
307
|
+
- - ">="
|
|
308
|
+
- !ruby/object:Gem::Version
|
|
309
|
+
version: 1.5.4
|
|
310
|
+
- - "<"
|
|
311
|
+
- !ruby/object:Gem::Version
|
|
312
|
+
version: '1.6'
|
|
313
|
+
- !ruby/object:Gem::Dependency
|
|
314
|
+
name: roo
|
|
315
|
+
requirement: !ruby/object:Gem::Requirement
|
|
316
|
+
requirements:
|
|
317
|
+
- - ">="
|
|
318
|
+
- !ruby/object:Gem::Version
|
|
319
|
+
version: 2.8.3
|
|
320
|
+
- - "<"
|
|
321
|
+
- !ruby/object:Gem::Version
|
|
322
|
+
version: '2.9'
|
|
323
|
+
type: :runtime
|
|
324
|
+
prerelease: false
|
|
325
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
326
|
+
requirements:
|
|
327
|
+
- - ">="
|
|
328
|
+
- !ruby/object:Gem::Version
|
|
329
|
+
version: 2.8.3
|
|
330
|
+
- - "<"
|
|
331
|
+
- !ruby/object:Gem::Version
|
|
332
|
+
version: '2.9'
|
|
333
|
+
- !ruby/object:Gem::Dependency
|
|
334
|
+
name: roo-xls
|
|
335
|
+
requirement: !ruby/object:Gem::Requirement
|
|
336
|
+
requirements:
|
|
337
|
+
- - ">="
|
|
338
|
+
- !ruby/object:Gem::Version
|
|
339
|
+
version: 1.2.0
|
|
340
|
+
- - "<"
|
|
341
|
+
- !ruby/object:Gem::Version
|
|
342
|
+
version: '1.3'
|
|
343
|
+
type: :runtime
|
|
344
|
+
prerelease: false
|
|
345
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
346
|
+
requirements:
|
|
347
|
+
- - ">="
|
|
348
|
+
- !ruby/object:Gem::Version
|
|
349
|
+
version: 1.2.0
|
|
350
|
+
- - "<"
|
|
351
|
+
- !ruby/object:Gem::Version
|
|
352
|
+
version: '1.3'
|
|
353
|
+
- !ruby/object:Gem::Dependency
|
|
354
|
+
name: creek
|
|
355
|
+
requirement: !ruby/object:Gem::Requirement
|
|
356
|
+
requirements:
|
|
357
|
+
- - ">="
|
|
358
|
+
- !ruby/object:Gem::Version
|
|
359
|
+
version: 2.5.2
|
|
360
|
+
- - "<"
|
|
361
|
+
- !ruby/object:Gem::Version
|
|
362
|
+
version: '2.6'
|
|
363
|
+
type: :runtime
|
|
364
|
+
prerelease: false
|
|
365
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
366
|
+
requirements:
|
|
367
|
+
- - ">="
|
|
368
|
+
- !ruby/object:Gem::Version
|
|
369
|
+
version: 2.5.2
|
|
370
|
+
- - "<"
|
|
371
|
+
- !ruby/object:Gem::Version
|
|
372
|
+
version: '2.6'
|
|
233
373
|
description:
|
|
234
374
|
email:
|
|
235
375
|
- oscar@ecoportal.co.nz
|
|
@@ -285,6 +425,8 @@ files:
|
|
|
285
425
|
- lib/eco/api/common/session/base_session.rb
|
|
286
426
|
- lib/eco/api/common/session/environment.rb
|
|
287
427
|
- lib/eco/api/common/session/file_manager.rb
|
|
428
|
+
- lib/eco/api/common/session/helpers.rb
|
|
429
|
+
- lib/eco/api/common/session/helpers/prompt_user.rb
|
|
288
430
|
- lib/eco/api/common/session/logger.rb
|
|
289
431
|
- lib/eco/api/common/session/logger/cache.rb
|
|
290
432
|
- lib/eco/api/common/session/logger/log.rb
|
|
@@ -318,7 +460,9 @@ files:
|
|
|
318
460
|
- lib/eco/api/microcases/people_load.rb
|
|
319
461
|
- lib/eco/api/microcases/people_refresh.rb
|
|
320
462
|
- lib/eco/api/microcases/people_search.rb
|
|
321
|
-
- lib/eco/api/microcases/
|
|
463
|
+
- lib/eco/api/microcases/preserve_default_tag.rb
|
|
464
|
+
- lib/eco/api/microcases/preserve_filter_tags.rb
|
|
465
|
+
- lib/eco/api/microcases/preserve_policy_groups.rb
|
|
322
466
|
- lib/eco/api/microcases/refresh_default_tag.rb
|
|
323
467
|
- lib/eco/api/microcases/s3upload_targets.rb
|
|
324
468
|
- lib/eco/api/microcases/set_account.rb
|
|
@@ -335,13 +479,13 @@ files:
|
|
|
335
479
|
- lib/eco/api/organization.rb
|
|
336
480
|
- lib/eco/api/organization/login_providers.rb
|
|
337
481
|
- lib/eco/api/organization/people.rb
|
|
482
|
+
- lib/eco/api/organization/people_similarity.rb
|
|
338
483
|
- lib/eco/api/organization/person_schemas.rb
|
|
339
484
|
- lib/eco/api/organization/policy_groups.rb
|
|
340
485
|
- lib/eco/api/organization/preferences.rb
|
|
341
486
|
- lib/eco/api/organization/preferences_reference.json
|
|
342
487
|
- lib/eco/api/organization/presets_factory.rb
|
|
343
488
|
- lib/eco/api/organization/presets_integrity.json
|
|
344
|
-
- lib/eco/api/organization/presets_reference.json
|
|
345
489
|
- lib/eco/api/organization/presets_values.json
|
|
346
490
|
- lib/eco/api/organization/tag_tree.rb
|
|
347
491
|
- lib/eco/api/policies.rb
|
|
@@ -375,8 +519,11 @@ files:
|
|
|
375
519
|
- lib/eco/api/usecases/base_case.rb
|
|
376
520
|
- lib/eco/api/usecases/base_io.rb
|
|
377
521
|
- lib/eco/api/usecases/default_cases.rb
|
|
522
|
+
- lib/eco/api/usecases/default_cases/abstract_policygroup_abilities_case.rb
|
|
523
|
+
- lib/eco/api/usecases/default_cases/analyse_people_case.rb
|
|
378
524
|
- lib/eco/api/usecases/default_cases/append_usergroups_case.rb
|
|
379
525
|
- lib/eco/api/usecases/default_cases/change_email_case.rb
|
|
526
|
+
- lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb
|
|
380
527
|
- lib/eco/api/usecases/default_cases/codes_to_tags_case.rb
|
|
381
528
|
- lib/eco/api/usecases/default_cases/create_case.rb
|
|
382
529
|
- lib/eco/api/usecases/default_cases/create_details_case.rb
|
|
@@ -389,7 +536,6 @@ files:
|
|
|
389
536
|
- lib/eco/api/usecases/default_cases/new_id_case.rb
|
|
390
537
|
- lib/eco/api/usecases/default_cases/new_id_case0.rb
|
|
391
538
|
- lib/eco/api/usecases/default_cases/org_data_convert_case.rb
|
|
392
|
-
- lib/eco/api/usecases/default_cases/refresh_abilities_case.rb
|
|
393
539
|
- lib/eco/api/usecases/default_cases/refresh_case.rb
|
|
394
540
|
- lib/eco/api/usecases/default_cases/reinvite_sync_case.rb
|
|
395
541
|
- lib/eco/api/usecases/default_cases/reinvite_trans_case.rb
|
|
@@ -399,6 +545,7 @@ files:
|
|
|
399
545
|
- lib/eco/api/usecases/default_cases/restore_db_case.rb
|
|
400
546
|
- lib/eco/api/usecases/default_cases/set_default_tag_case.rb
|
|
401
547
|
- lib/eco/api/usecases/default_cases/set_supervisor_case.rb
|
|
548
|
+
- lib/eco/api/usecases/default_cases/supers_cyclic_identify_case.rb
|
|
402
549
|
- lib/eco/api/usecases/default_cases/supers_hierarchy_case.rb
|
|
403
550
|
- lib/eco/api/usecases/default_cases/switch_supervisor_case.rb
|
|
404
551
|
- lib/eco/api/usecases/default_cases/to_csv_case.rb
|
|
@@ -445,6 +592,16 @@ files:
|
|
|
445
592
|
- lib/eco/data/files/directory.rb
|
|
446
593
|
- lib/eco/data/files/file_pattern.rb
|
|
447
594
|
- lib/eco/data/files/helpers.rb
|
|
595
|
+
- lib/eco/data/fuzzy_match.rb
|
|
596
|
+
- lib/eco/data/fuzzy_match/array_helpers.rb
|
|
597
|
+
- lib/eco/data/fuzzy_match/chars_position_score.rb
|
|
598
|
+
- lib/eco/data/fuzzy_match/ngrams_score.rb
|
|
599
|
+
- lib/eco/data/fuzzy_match/pairing.rb
|
|
600
|
+
- lib/eco/data/fuzzy_match/result.rb
|
|
601
|
+
- lib/eco/data/fuzzy_match/results.rb
|
|
602
|
+
- lib/eco/data/fuzzy_match/score.rb
|
|
603
|
+
- lib/eco/data/fuzzy_match/stop_words.rb
|
|
604
|
+
- lib/eco/data/fuzzy_match/string_helpers.rb
|
|
448
605
|
- lib/eco/data/mapper.rb
|
|
449
606
|
- lib/eco/language.rb
|
|
450
607
|
- lib/eco/language/curry.rb
|
|
@@ -471,7 +628,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
471
628
|
requirements:
|
|
472
629
|
- - ">="
|
|
473
630
|
- !ruby/object:Gem::Version
|
|
474
|
-
version: 2.
|
|
631
|
+
version: 2.5.0
|
|
475
632
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
476
633
|
requirements:
|
|
477
634
|
- - ">="
|