eco-helpers 2.0.16 → 2.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +50 -6
- data/eco-helpers.gemspec +6 -4
- data/lib/eco-helpers.rb +1 -0
- data/lib/eco/api/common/base_loader.rb +14 -0
- data/lib/eco/api/common/people/default_parsers/date_parser.rb +11 -1
- data/lib/eco/api/common/people/default_parsers/login_providers_parser.rb +1 -1
- data/lib/eco/api/common/people/default_parsers/policy_groups_parser.rb +11 -11
- data/lib/eco/api/common/people/person_entry.rb +4 -2
- data/lib/eco/api/common/session/mailer.rb +0 -1
- data/lib/eco/api/common/session/s3_uploader.rb +0 -1
- data/lib/eco/api/common/session/sftp.rb +0 -1
- data/lib/eco/api/microcases.rb +3 -1
- data/lib/eco/api/microcases/append_usergroups.rb +0 -1
- data/lib/eco/api/microcases/people_cache.rb +2 -2
- data/lib/eco/api/microcases/people_load.rb +2 -2
- data/lib/eco/api/microcases/people_refresh.rb +2 -2
- data/lib/eco/api/microcases/people_search.rb +6 -6
- data/lib/eco/api/microcases/preserve_default_tag.rb +23 -0
- data/lib/eco/api/microcases/preserve_filter_tags.rb +28 -0
- data/lib/eco/api/microcases/preserve_policy_groups.rb +30 -0
- data/lib/eco/api/microcases/set_account.rb +0 -1
- data/lib/eco/api/organization.rb +1 -0
- data/lib/eco/api/organization/people.rb +7 -0
- data/lib/eco/api/organization/people_analytics.rb +60 -0
- data/lib/eco/api/organization/presets_factory.rb +22 -83
- data/lib/eco/api/organization/presets_integrity.json +6 -0
- data/lib/eco/api/organization/presets_values.json +5 -4
- data/lib/eco/api/policies/default_policies/99_user_access_policy.rb +0 -30
- data/lib/eco/api/session.rb +1 -20
- data/lib/eco/api/session/batch.rb +23 -7
- data/lib/eco/api/session/config.rb +0 -10
- data/lib/eco/api/session/config/people.rb +1 -17
- data/lib/eco/api/usecases/default_cases.rb +1 -1
- data/lib/eco/api/usecases/default_cases/abstract_policygroup_abilities_case.rb +1 -1
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +76 -0
- data/lib/eco/api/usecases/default_cases/codes_to_tags_case.rb +2 -3
- data/lib/eco/api/usecases/default_cases/reset_landing_page_case.rb +11 -1
- data/lib/eco/api/usecases/default_cases/restore_db_case.rb +1 -2
- data/lib/eco/api/usecases/default_cases/supers_cyclic_identify_case.rb +1 -1
- data/lib/eco/api/usecases/default_cases/supers_hierarchy_case.rb +1 -1
- data/lib/eco/api/usecases/default_cases/to_csv_case.rb +85 -27
- data/lib/eco/api/usecases/default_cases/to_csv_detailed_case.rb +62 -36
- data/lib/eco/cli/config/default/options.rb +19 -17
- data/lib/eco/cli/config/default/people_filters.rb +3 -3
- data/lib/eco/cli/config/default/usecases.rb +66 -32
- data/lib/eco/cli/config/default/workflow.rb +1 -1
- data/lib/eco/cli/config/help.rb +1 -0
- data/lib/eco/cli/config/options_set.rb +106 -13
- data/lib/eco/cli/config/use_cases.rb +33 -33
- data/lib/eco/cli/scripting/args_helpers.rb +30 -3
- data/lib/eco/data.rb +1 -0
- data/lib/eco/data/crypto/encryption.rb +3 -3
- data/lib/eco/data/files/helpers.rb +6 -4
- data/lib/eco/data/fuzzy_match.rb +119 -0
- data/lib/eco/data/fuzzy_match/array_helpers.rb +75 -0
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +37 -0
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +73 -0
- data/lib/eco/data/fuzzy_match/pairing.rb +102 -0
- data/lib/eco/data/fuzzy_match/result.rb +67 -0
- data/lib/eco/data/fuzzy_match/results.rb +53 -0
- data/lib/eco/data/fuzzy_match/score.rb +44 -0
- data/lib/eco/data/fuzzy_match/stop_words.rb +35 -0
- data/lib/eco/data/fuzzy_match/string_helpers.rb +69 -0
- data/lib/eco/version.rb +1 -1
- metadata +82 -10
- data/lib/eco/api/microcases/refresh_abilities.rb +0 -19
- data/lib/eco/api/organization/presets_reference.json +0 -59
- data/lib/eco/api/usecases/default_cases/refresh_abilities_case.rb +0 -30
@@ -0,0 +1,73 @@
|
|
1
|
+
module Eco
|
2
|
+
module Data
|
3
|
+
module FuzzyMatch
|
4
|
+
module NGramsScore
|
5
|
+
# It does the following:
|
6
|
+
# 1. It splits both strings into words
|
7
|
+
# 2. Pairs all words by best `ngrams_score` match
|
8
|
+
# 3. Gives `0` score to those words of `str2` that lost their pair (a word of `str1` cannot be paired twice)
|
9
|
+
# 4. Merges the `ngrams_score` of all the paired words of `str2` against their `str1` word pair
|
10
|
+
# @param range [Integer, Range] determine the lenght of the generated values for each `word`.
|
11
|
+
# @normalized [Boolean] to avoid double ups in normalizing.
|
12
|
+
# @return [Score] the score object with the result.
|
13
|
+
def words_ngrams_score(str1, str2, range: 3..5, normalized: false)
|
14
|
+
str1, str2 = normalize_string([str1, str2]) unless normalized
|
15
|
+
len1 = str1 && str1.length; len2 = str2 && str2.length
|
16
|
+
|
17
|
+
Score.new(0, 0).tap do |score|
|
18
|
+
next if !str2 || !str1
|
19
|
+
next score.increase(score.total) if str1 == str2
|
20
|
+
next if str1.length < 2 || str1.length < 2
|
21
|
+
|
22
|
+
paired_words(str1, str2, normalized: true) do |needle, item|
|
23
|
+
ngrams_score(needle, item, range: range, normalized: true)
|
24
|
+
end.each do |sub_str1, (item, iscore)|
|
25
|
+
#puts "pairs '#{sub_str1}' --> '#{item}' (score: #{iscore.ratio})"
|
26
|
+
score.merge!(iscore)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# A score is kept of matching ngram combinations of `str2`.
|
32
|
+
# @note This algorithm is best suited for matching sentences, or 'firstname lastname' compared with 'lastname firstname' combinations.
|
33
|
+
# @param range [Integer, Range] determine the lenght of the generated values.
|
34
|
+
# @normalized [Boolean] to avoid double ups in normalizing.
|
35
|
+
# @return [Score] the score object with the result.
|
36
|
+
def ngrams_score(str1, str2, range: 3..5, normalized: false)
|
37
|
+
str1, str2 = normalize_string([str1, str2]) unless normalized
|
38
|
+
len1 = str1 && str1.length; len2 = str2 && str2.length
|
39
|
+
|
40
|
+
Score.new(0, len1 || 0).tap do |score|
|
41
|
+
next if !str2 || !str1
|
42
|
+
next score.increase(score.total) if str1 == str2
|
43
|
+
next if str1.length < 2 || str2.length < 2
|
44
|
+
|
45
|
+
grams = word_ngrams(str2, range, normalized: true)
|
46
|
+
next unless grams.length > 0
|
47
|
+
|
48
|
+
if range.is_a?(Integer)
|
49
|
+
item_weight = score.total.to_f / grams.length
|
50
|
+
matches = grams.select {|res| str1.include?(gram)}.length
|
51
|
+
score.increase(matches * item_weight)
|
52
|
+
else
|
53
|
+
groups = grams.group_by {|gram| gram.length}
|
54
|
+
sorted_lens = groups.keys.sort.reverse
|
55
|
+
lens = sorted_lens.length
|
56
|
+
group_weight = (1.0 / lens).round(3)
|
57
|
+
|
58
|
+
groups.each do |len, grams|
|
59
|
+
len_max_score = score.total * group_weight
|
60
|
+
item_weight = len_max_score / grams.length
|
61
|
+
matches = grams.select {|gram| str1.include?(gram)}.length
|
62
|
+
#pp "#{len} match: #{matches} (over #{grams.length}) || max_score: #{len_max_score} (over #{score.total})"
|
63
|
+
score.increase(matches * item_weight)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
module Eco
|
2
|
+
module Data
|
3
|
+
module FuzzyMatch
|
4
|
+
module Pairing
|
5
|
+
|
6
|
+
# Pair words using some algorithm.
|
7
|
+
# It does the following:
|
8
|
+
# 1. It splits both strings into words.
|
9
|
+
# 2. Pairs all words by using `block` to score the best match.
|
10
|
+
# 3. Gives `0` score to those words of `str2` that lost their pair (a word of `str1` cannot be paired twice).
|
11
|
+
# 4. Merges the `Score` of all the paired words of `str2` against their `str1` word pair.
|
12
|
+
# @yield [needle, item] offers a comparison algorithm between two strings.
|
13
|
+
# @yieldparam needle [String] the string of reference.
|
14
|
+
# @yieldparam item [String] one of the haystack items.
|
15
|
+
# @yieldreturn [Eco::Data::FuzzyMatch::Score] the `Score` object with the results of comparing `str1` and `str2`
|
16
|
+
# @param str1 [String] the string of reference.
|
17
|
+
# @param str2 [String] one of the haystack items.
|
18
|
+
# @param format [Symbol] determines the `values` of the returned `Hash`::
|
19
|
+
# 1. `:pair` for just pair
|
20
|
+
# 2. `:score` for just score
|
21
|
+
# 2. `[:pair, :score]` for `Array`
|
22
|
+
# @normalized [Boolean] to avoid double ups in normalizing.
|
23
|
+
# @return [Hash] where `keys` are the **words** of `str1` and their `values`:
|
24
|
+
# 1. if `format` is `:pair` => the `str2` words with highest match.
|
25
|
+
# 2. if `format` is `:score` => the `Score` words with highest match.
|
26
|
+
# 3. if `format` is `[:pair, :score]` => both in an `Array`.
|
27
|
+
def paired_words(str1, str2, format: [:pair, :score], normalized: false)
|
28
|
+
str1, str2 = normalize_string([str1, str2]) unless normalized
|
29
|
+
return {} if !str2 || !str1
|
30
|
+
return score.increase(score.total) if str1 == str2
|
31
|
+
return {str1 => nil} if str1.length < 2 || str1.length < 2
|
32
|
+
|
33
|
+
needles = get_words(str1, normalized: true)
|
34
|
+
haystack = get_words(str2, normalized: true)
|
35
|
+
|
36
|
+
ranking = {}
|
37
|
+
faceted = needles.each_with_object({}) do |needle, faceted|
|
38
|
+
faceted[needle] = haystack.map do |item|
|
39
|
+
{
|
40
|
+
pair: item,
|
41
|
+
score: yield(needle, item)
|
42
|
+
}.tap do |result|
|
43
|
+
ranking[item] ||= []
|
44
|
+
if result[:score].ratio > 0.05
|
45
|
+
ranking[item] << ({needle: needle, score: result[:score]})
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end.sort_by do |result|
|
49
|
+
result[:score].ratio
|
50
|
+
end.reverse
|
51
|
+
end
|
52
|
+
|
53
|
+
paired = {}
|
54
|
+
#scores = {}
|
55
|
+
ranking.each do |item, results|
|
56
|
+
sorted = results.reject do |result|
|
57
|
+
paired.key?(result[:needle])
|
58
|
+
end.sort_by do |result|
|
59
|
+
result[:score].ratio
|
60
|
+
end.reverse
|
61
|
+
if result = sorted.shift
|
62
|
+
paired[result[:needle]] = {
|
63
|
+
pair: item,
|
64
|
+
score: result[:score]
|
65
|
+
}
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
pending_items = haystack - paired.values
|
70
|
+
faceted.reject do |needle, results|
|
71
|
+
paired.key?(needle)
|
72
|
+
end.each do |needle, results|
|
73
|
+
results.select! do |result|
|
74
|
+
pending_items.include?(result[:pair]) && result[:score].ratio > 0.05
|
75
|
+
end
|
76
|
+
if result = results.shift
|
77
|
+
paired[needle] = result
|
78
|
+
pending_items.delete(result[:pair])
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
pending_needles = needles - paired.keys
|
83
|
+
pending_needles.each do |needle|
|
84
|
+
paired[needle] = {
|
85
|
+
pair: nil,
|
86
|
+
score: Score.new(0, needle.length)
|
87
|
+
}
|
88
|
+
end
|
89
|
+
paired.transform_values do |result|
|
90
|
+
case format
|
91
|
+
when Array
|
92
|
+
result.values_at(*format)
|
93
|
+
else
|
94
|
+
restult[format]
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Eco
|
2
|
+
module Data
|
3
|
+
module FuzzyMatch
|
4
|
+
class Result < Struct.new(:match, :value, :dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position)
|
5
|
+
ALL_METHODS = [:dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position]
|
6
|
+
|
7
|
+
def dice; super&.round(3); end
|
8
|
+
def levenshtein; super&.round(3); end
|
9
|
+
def jaro_winkler; super&.round(3); end
|
10
|
+
def ngrams; super&.round(3); end
|
11
|
+
def words_ngrams; super&.round(3); end
|
12
|
+
def chars_position; super&.round(3); end
|
13
|
+
|
14
|
+
# TODO: print in the order of `order`
|
15
|
+
def print
|
16
|
+
msg = "(Dice: #{dice}) (Lev Dst: #{levenshtein}) "
|
17
|
+
msg << "(Jaro: #{jaro_winkler}) "
|
18
|
+
msg << "(Ngram: #{ngrams}) (WNgrams: #{words_ngrams}) "
|
19
|
+
msg << "(C Pos: #{chars_position}) "
|
20
|
+
msg << "'#{value}'"
|
21
|
+
end
|
22
|
+
|
23
|
+
def all_threshold?(methods = order, threshold = 0.15)
|
24
|
+
return true unless threshold
|
25
|
+
[methods].flatten.compact.all? {|method| threshold?(method, threshold)}
|
26
|
+
end
|
27
|
+
|
28
|
+
def any_threshold?(methods = order, threshold = 0.15)
|
29
|
+
return true unless threshold
|
30
|
+
[methods].flatten.compact.any? {|method| threshold?(method, threshold)}
|
31
|
+
end
|
32
|
+
|
33
|
+
def threshold?(method = :dice, threshold = 0.15)
|
34
|
+
raise "Uknown method '#{method}'" unless self.respond_to?(method)
|
35
|
+
self.send(method) >= threshold
|
36
|
+
end
|
37
|
+
|
38
|
+
def order=(values)
|
39
|
+
@order = [values].flatten.compact.tap do |o|
|
40
|
+
o = [:words_ngrams, :dice] if o.empty?
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def order
|
45
|
+
@order ||= [:words_ngrams, :dice]
|
46
|
+
end
|
47
|
+
|
48
|
+
def <=>(result)
|
49
|
+
compare(result)
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def compare(other, order: self.order)
|
55
|
+
return 0 unless method = order.first
|
56
|
+
raise "Uknown method '#{method}'" unless self.respond_to?(method) && other.respond_to?(method)
|
57
|
+
return -1 if self.send(method) > other.send(method)
|
58
|
+
return 1 if self.send(method) < other.send(method)
|
59
|
+
compare(other, order: order[1..-1])
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Eco
|
2
|
+
module Data
|
3
|
+
module FuzzyMatch
|
4
|
+
class Results < Struct.new(:needle, :value, :raw_results)
|
5
|
+
|
6
|
+
def results_with_false_positives
|
7
|
+
relevant_results(methods: :jaro_winkler, threshold: 0.5)
|
8
|
+
end
|
9
|
+
|
10
|
+
def relevant_results(methods: order, threshold: 0.5)
|
11
|
+
raw_results.select do |result|
|
12
|
+
result.all_threshold?(methods, threshold)
|
13
|
+
end.yield_self do |filtered|
|
14
|
+
self.class.new(needle, value, filtered).tap do |results|
|
15
|
+
results.order = methods
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def order=(values)
|
21
|
+
@order = [values].flatten.compact
|
22
|
+
raw_results.each {|r| r.order = @order}
|
23
|
+
end
|
24
|
+
|
25
|
+
def order
|
26
|
+
@order ||= [:words_ngrams, :dice]
|
27
|
+
end
|
28
|
+
|
29
|
+
def results
|
30
|
+
raw_results.sort
|
31
|
+
end
|
32
|
+
|
33
|
+
def print
|
34
|
+
msg = results.map do |result|
|
35
|
+
result.print
|
36
|
+
end.join("\n ")
|
37
|
+
|
38
|
+
puts "'#{value}':\n " + msg
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def item_string(item, attr = self.method)
|
44
|
+
return item if !item || item.is_a?(String) || !attr
|
45
|
+
attr = attr.to_sym
|
46
|
+
return item.send(attr) if item.respond_to?(attr)
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Eco
|
2
|
+
module Data
|
3
|
+
module FuzzyMatch
|
4
|
+
class Score < Struct.new(:score, :total)
|
5
|
+
|
6
|
+
def ratio(decimals = 6)
|
7
|
+
((score || 0).to_f / (total || 1)).round(decimals)
|
8
|
+
end
|
9
|
+
|
10
|
+
def percent(decimals = 3)
|
11
|
+
(100 * ratio).round(decimals)
|
12
|
+
end
|
13
|
+
|
14
|
+
def increase(value = 1)
|
15
|
+
self.score += value
|
16
|
+
end
|
17
|
+
|
18
|
+
def increase_total(value)
|
19
|
+
self.total += value
|
20
|
+
end
|
21
|
+
|
22
|
+
def values_at(*keys)
|
23
|
+
keys.map do |key|
|
24
|
+
self.send(key) if self.respond_to?(key)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Merges 2 Score instance objects
|
29
|
+
def merge(value)
|
30
|
+
Score.new(*values_at(:score, :total)).merge!(value)
|
31
|
+
end
|
32
|
+
|
33
|
+
def merge!(value)
|
34
|
+
raise "Expecting Score object. Given: #{value.class}" unless value.is_a?(Score)
|
35
|
+
increase(value.score)
|
36
|
+
increase_total(value.total)
|
37
|
+
self
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Eco
|
2
|
+
module Data
|
3
|
+
module FuzzyMatch
|
4
|
+
module StopWords
|
5
|
+
PREPOSITIONS = [
|
6
|
+
"aboard", "about", "above", "across", "after", "against", "along", "amid", "among", "around", "as", "at",
|
7
|
+
"before", "behind", "below", "beneath", "beside", "between", "beyond", "but", "by",
|
8
|
+
"concerning", "considering", "despite", "down", "during", "except", "following", "for", "from",
|
9
|
+
"in", "inside", "into", "like", "minus", "near", "next",
|
10
|
+
"of", "off", "on", "onto", "opposite", "out", "outside", "over", "past", "per", "plus",
|
11
|
+
"regarding", "round", "save", "since", "than", "through", "till", "to", "toward",
|
12
|
+
"under", "underneath", "unlike", "until", "up", "upon", "versus", "via",
|
13
|
+
"with", "within", "without"
|
14
|
+
]
|
15
|
+
PRONOUNS = [
|
16
|
+
"all", "another", "any", "anybody", "anyone", "anything", "as", "aught",
|
17
|
+
"both", "each", "each other", "either", "enough", "everybody", "everyone", "everything",
|
18
|
+
"few", "he", "her", "hers", "herself", "him", "himself", "his", "I", "idem", "it", "its", "itself",
|
19
|
+
"many", "me", "mine", "most", "my", "myself", "naught", "neither", "no one", "nobody", "none", "nothing", "nought",
|
20
|
+
"one", "one another", "other", "others", "ought", "our", "ours", "ourself", "ourselves",
|
21
|
+
"several", "she", "some", "somebody", "someone", "something", "somewhat", "such", "suchlike",
|
22
|
+
"that", "thee", "their", "theirs", "theirself", "theirselves", "them", "themself", "themselves", "there",
|
23
|
+
"these", "they", "thine", "this", "those", "thou", "thy", "thyself", "us",
|
24
|
+
"we", "what", "whatever", "whatnot", "whatsoever", "whence", "where", "whereby", "wherefrom",
|
25
|
+
"wherein", "whereinto", "whereof", "whereon", "wherever", "wheresoever", "whereto", "whereunto",
|
26
|
+
"wherewith", "wherewithal", "whether", "which", "whichever", "whichsoever", "who", "whoever", "whom",
|
27
|
+
"whomever", "whomso", "whomsoever", "whose", "whosever", "whosesoever", "whoso", "whosoever",
|
28
|
+
"ye", "yon", "yonder", "you", "your", "yours", "yourself", "yourselves"
|
29
|
+
]
|
30
|
+
ARTICLES = ["a", "an", "the"]
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module Eco
|
2
|
+
module Data
|
3
|
+
module FuzzyMatch
|
4
|
+
module StringHelpers
|
5
|
+
# Downcases and trims
|
6
|
+
def normalize_string(value)
|
7
|
+
case value
|
8
|
+
when Array
|
9
|
+
value.map {|val| normalize_string(val)}
|
10
|
+
when Symbol
|
11
|
+
normalize_string(value.to_sym)
|
12
|
+
when String
|
13
|
+
value.downcase.strip
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def get_words(str, normalized: false)
|
18
|
+
return [] unless str
|
19
|
+
str = normalize_string(str) unless normalized
|
20
|
+
str.scan(/[a-zA-Z'-]+/)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Keeps the start order of the `words` and consecutive `words` together/consecutive.
|
24
|
+
# @param str [String] the input string with the words.
|
25
|
+
# @param range [Integer, Range] determine the lenght of the generated values.
|
26
|
+
# @return [Array<String>] combinations of `range` length of `words`.
|
27
|
+
def string_ngrams(str, range=2..3, normalized: false)
|
28
|
+
ngrams(get_words(str, normalized: normalized), range)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Keeps the start order of the `words` of the input `Array` `words`.
|
32
|
+
# It does **not** keep consecutive `words` together (it can jump/skip items).
|
33
|
+
# @param str [String] the input string with the words.
|
34
|
+
# @param range [Integer, Range] determine the lenght of the generated values.
|
35
|
+
# @return [Array<String>] combinations of `range` length of `words`
|
36
|
+
def string_combinations(str, range=2..3, normalized: false)
|
37
|
+
combinations(get_words(str, normalized: normalized), range)
|
38
|
+
.map {|comb| comb.join(' ')}
|
39
|
+
end
|
40
|
+
|
41
|
+
# It includes `combinations` that break the initial order of the `Array`.
|
42
|
+
# It does **not** keep consecutive `words` together (it can jump/skip items).
|
43
|
+
# @param str [String] the input string with the words.
|
44
|
+
# @param range [Integer, Range] determine the lenght of the generated values.
|
45
|
+
# @return [Array<String>] permutations of `range` length of `words`
|
46
|
+
def string_permutations(str, range=2..3, normalized: false)
|
47
|
+
permutations(get_words(str, normalized: normalized), range)
|
48
|
+
.map {|comb| comb.join(' ')}
|
49
|
+
end
|
50
|
+
|
51
|
+
# Keeps the start order of the `charts` and consecutive `charts` together/consecutive.
|
52
|
+
# @param str [String] the input `word` string.
|
53
|
+
# @param range [Integer, Range] determine the lenght of the generated values.
|
54
|
+
# @return [Array<String>] combinations of `range` length of `words`.
|
55
|
+
def word_ngrams(str, range=2..3, normalized: false)
|
56
|
+
str = normalize_string(str) unless normalized
|
57
|
+
ngrams(str.to_s.chars, range)
|
58
|
+
.map {|comb| no_blanks(comb)}
|
59
|
+
end
|
60
|
+
|
61
|
+
def no_blanks(str)
|
62
|
+
return nil unless str && str.is_a?(String)
|
63
|
+
str.tr(' ', '')
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|