eco-helpers 2.0.12 → 2.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +80 -73
- data/eco-helpers.gemspec +6 -4
- data/lib/eco-helpers.rb +1 -0
- data/lib/eco/api/common/base_loader.rb +14 -0
- data/lib/eco/api/common/loaders/use_case.rb +1 -1
- data/lib/eco/api/common/people/default_parsers/date_parser.rb +11 -1
- data/lib/eco/api/common/people/default_parsers/login_providers_parser.rb +1 -1
- data/lib/eco/api/common/people/default_parsers/policy_groups_parser.rb +11 -11
- data/lib/eco/api/common/people/person_entry.rb +9 -2
- data/lib/eco/api/common/people/supervisor_helpers.rb +27 -0
- data/lib/eco/api/common/session/file_manager.rb +2 -2
- data/lib/eco/api/common/session/mailer.rb +0 -1
- data/lib/eco/api/common/session/s3_uploader.rb +0 -1
- data/lib/eco/api/common/session/sftp.rb +0 -1
- data/lib/eco/api/error.rb +5 -3
- data/lib/eco/api/microcases.rb +3 -1
- data/lib/eco/api/microcases/append_usergroups.rb +0 -1
- data/lib/eco/api/microcases/people_cache.rb +2 -2
- data/lib/eco/api/microcases/people_load.rb +2 -2
- data/lib/eco/api/microcases/people_refresh.rb +2 -2
- data/lib/eco/api/microcases/people_search.rb +6 -6
- data/lib/eco/api/microcases/preserve_default_tag.rb +23 -0
- data/lib/eco/api/microcases/preserve_filter_tags.rb +28 -0
- data/lib/eco/api/microcases/preserve_policy_groups.rb +30 -0
- data/lib/eco/api/microcases/set_account.rb +0 -1
- data/lib/eco/api/organization.rb +1 -0
- data/lib/eco/api/organization/people.rb +7 -0
- data/lib/eco/api/organization/people_analytics.rb +60 -0
- data/lib/eco/api/organization/presets_factory.rb +116 -93
- data/lib/eco/api/organization/presets_integrity.json +58 -0
- data/lib/eco/api/organization/presets_values.json +5 -4
- data/lib/eco/api/policies/default_policies/99_user_access_policy.rb +0 -30
- data/lib/eco/api/session.rb +1 -20
- data/lib/eco/api/session/batch.rb +42 -10
- data/lib/eco/api/session/batch/job.rb +3 -0
- data/lib/eco/api/session/config.rb +16 -15
- data/lib/eco/api/session/config/api.rb +4 -0
- data/lib/eco/api/session/config/apis.rb +14 -0
- data/lib/eco/api/session/config/files.rb +7 -0
- data/lib/eco/api/session/config/people.rb +3 -19
- data/lib/eco/api/usecases.rb +2 -0
- data/lib/eco/api/usecases/default_cases.rb +4 -1
- data/lib/eco/api/usecases/default_cases/abstract_policygroup_abilities_case.rb +161 -0
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +76 -0
- data/lib/eco/api/usecases/default_cases/codes_to_tags_case.rb +2 -3
- data/lib/eco/api/usecases/default_cases/hris_case.rb +14 -8
- data/lib/eco/api/usecases/default_cases/reset_landing_page_case.rb +11 -1
- data/lib/eco/api/usecases/default_cases/restore_db_case.rb +1 -2
- data/lib/eco/api/usecases/default_cases/supers_cyclic_identify_case.rb +72 -0
- data/lib/eco/api/usecases/default_cases/supers_hierarchy_case.rb +59 -0
- data/lib/eco/api/usecases/default_cases/to_csv_case.rb +104 -26
- data/lib/eco/api/usecases/default_cases/to_csv_detailed_case.rb +62 -36
- data/lib/eco/cli.rb +0 -10
- data/lib/eco/cli/config/default/options.rb +19 -17
- data/lib/eco/cli/config/default/people_filters.rb +3 -3
- data/lib/eco/cli/config/default/usecases.rb +77 -25
- data/lib/eco/cli/config/default/workflow.rb +6 -1
- data/lib/eco/cli/config/help.rb +1 -0
- data/lib/eco/cli/config/options_set.rb +106 -13
- data/lib/eco/cli/config/use_cases.rb +33 -33
- data/lib/eco/cli/scripting/args_helpers.rb +30 -3
- data/lib/eco/data.rb +1 -0
- data/lib/eco/data/crypto/encryption.rb +3 -3
- data/lib/eco/data/files/directory.rb +28 -20
- data/lib/eco/data/files/helpers.rb +6 -4
- data/lib/eco/data/fuzzy_match.rb +119 -0
- data/lib/eco/data/fuzzy_match/array_helpers.rb +75 -0
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +37 -0
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +73 -0
- data/lib/eco/data/fuzzy_match/pairing.rb +102 -0
- data/lib/eco/data/fuzzy_match/result.rb +67 -0
- data/lib/eco/data/fuzzy_match/results.rb +53 -0
- data/lib/eco/data/fuzzy_match/score.rb +44 -0
- data/lib/eco/data/fuzzy_match/stop_words.rb +35 -0
- data/lib/eco/data/fuzzy_match/string_helpers.rb +69 -0
- data/lib/eco/version.rb +1 -1
- metadata +86 -10
- data/lib/eco/api/microcases/refresh_abilities.rb +0 -19
- data/lib/eco/api/organization/presets_reference.json +0 -59
- data/lib/eco/api/usecases/default_cases/refresh_abilities_case.rb +0 -30
@@ -5,18 +5,35 @@ module Eco
|
|
5
5
|
include Eco::CLI::Config::Help
|
6
6
|
attr_reader :core_config
|
7
7
|
|
8
|
+
class CaseConfig < Struct.new(:cases_config, :option, :type, :description, :casename, :callback)
|
9
|
+
|
10
|
+
def add_option(arg, desc = nil, &block)
|
11
|
+
core_config.options_set.add(arg, desc, namespace: option, &block)
|
12
|
+
self
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def core_config
|
18
|
+
cases_config.core_config
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class ActiveCase < Struct.new(:index, :option, :callback)
|
23
|
+
|
24
|
+
end
|
25
|
+
|
8
26
|
def initialize(core_config:)
|
9
27
|
@core_config = core_config
|
10
28
|
@linked_cases = {}
|
11
|
-
@description = {}
|
12
29
|
end
|
13
30
|
|
14
31
|
# @return [String] summary of the use cases.
|
15
32
|
def help
|
16
33
|
["The following are the available use cases:"].yield_self do |lines|
|
17
34
|
max_len = keys_max_len(@linked_cases.keys)
|
18
|
-
@linked_cases.keys.sort.each do |
|
19
|
-
lines << help_line(
|
35
|
+
@linked_cases.keys.sort.each do |option_case|
|
36
|
+
lines << help_line(option_case, @linked_cases[option_case].description, max_len)
|
20
37
|
end
|
21
38
|
lines
|
22
39
|
end.join("\n")
|
@@ -33,18 +50,8 @@ module Eco
|
|
33
50
|
raise "You must specify a valid 'case_name' when no block is provided" unless case_name
|
34
51
|
raise "'case_name' expected to be a String. Given: #{case_name.class}" unless case_name.is_a?(String)
|
35
52
|
end
|
36
|
-
|
37
|
-
@linked_cases[option_case] =
|
38
|
-
type => {
|
39
|
-
option: option_case,
|
40
|
-
type: type,
|
41
|
-
casename: case_name,
|
42
|
-
callback: callback
|
43
|
-
}
|
44
|
-
}
|
45
|
-
@description[option_case] = desc
|
46
|
-
|
47
|
-
self
|
53
|
+
puts "Overriding case config '#{option_case}'" if @linked_cases.key?(option_case)
|
54
|
+
@linked_cases[option_case] = CaseConfig.new(self, option_case, type, desc, case_name, callback)
|
48
55
|
end
|
49
56
|
|
50
57
|
# Scopes/identifies which usecases are being invoked from the command line
|
@@ -55,20 +62,13 @@ module Eco
|
|
55
62
|
def active(io:)
|
56
63
|
validate_io!(io)
|
57
64
|
return @active_cases unless !@active_cases
|
58
|
-
active_cases = {}
|
59
|
-
@linked_cases.each do |option_case, types|
|
65
|
+
@active_cases = @linked_cases.each_with_object({}) do |(option_case, data), active_cases|
|
60
66
|
next nil unless SCR.get_arg(option_case)
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
index: SCR.get_arg_index(option_case),
|
65
|
-
option: option_case,
|
66
|
-
callback: data[:callback]
|
67
|
-
}
|
68
|
-
end
|
67
|
+
if usecase = get_usecase(io: io, data: data)
|
68
|
+
index = SCR.get_arg_index(option_case)
|
69
|
+
active_cases[usecase] = ActiveCase.new(index, option_case, data.callback)
|
69
70
|
end
|
70
|
-
end
|
71
|
-
@active_cases = active_cases.sort_by {|c, d| d[:index]}.to_h
|
71
|
+
end.sort_by {|c, d| d.index}.to_h
|
72
72
|
end
|
73
73
|
|
74
74
|
def process(io:)
|
@@ -79,7 +79,7 @@ module Eco
|
|
79
79
|
processed = true
|
80
80
|
io = case_io(io: io, usecase: usecase)
|
81
81
|
# some usecases have a callback to collect the parameters
|
82
|
-
data
|
82
|
+
data.callback&.call(*io.params)
|
83
83
|
io = usecase.launch(io: io)
|
84
84
|
end
|
85
85
|
processed
|
@@ -100,17 +100,17 @@ module Eco
|
|
100
100
|
end
|
101
101
|
|
102
102
|
def get_usecase(io:, data:)
|
103
|
-
usecase = if case_name = data
|
104
|
-
io.session.usecases.case(case_name, type: data
|
103
|
+
usecase = if case_name = data.casename
|
104
|
+
io.session.usecases.case(case_name, type: data.type)
|
105
105
|
end
|
106
|
-
usecase ||= if callback = data
|
106
|
+
usecase ||= if callback = data.callback
|
107
107
|
# identify/retrieve usecase via callback
|
108
|
-
params = io.params(keyed: true).merge(type: data
|
108
|
+
params = io.params(keyed: true).merge(type: data.type)
|
109
109
|
io = io.new(**params, validate: false)
|
110
110
|
callback.call(*io.params).tap do |usecase|
|
111
111
|
unless usecase.is_a?(Eco::API::UseCases::UseCase)
|
112
112
|
msg = "When adding a usecase, without specifying 'case_name:', "
|
113
|
-
msg += "the block that integrates usecase for cli option '#{data
|
113
|
+
msg += "the block that integrates usecase for cli option '#{data.option}'"
|
114
114
|
msg += " must return an Eco::API::UseCases::UseCase object. It returns #{usecase.class}"
|
115
115
|
raise msg
|
116
116
|
end
|
@@ -3,6 +3,7 @@ module Eco
|
|
3
3
|
class Scripting
|
4
4
|
module ArgsHelpers
|
5
5
|
|
6
|
+
# @return [Array<String] the command line arguments.
|
6
7
|
def argv
|
7
8
|
@argv || ARGV
|
8
9
|
end
|
@@ -11,10 +12,18 @@ module Eco
|
|
11
12
|
Argument.is_modifier?(value)
|
12
13
|
end
|
13
14
|
|
15
|
+
# @return [Arguments] supported known arguments.
|
14
16
|
def arguments
|
15
17
|
@arguments ||= Arguments.new(argv)
|
16
18
|
end
|
17
19
|
|
20
|
+
# Registers an argument as a known one.
|
21
|
+
def known_argument(key, with_param: false)
|
22
|
+
arguments.add(key, with_param: with_param)
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
# Validation to stop the `script` if among `argv` there's any **unknown** argument.
|
18
27
|
def stop_on_unknown!(exclude: [], only_options: false)
|
19
28
|
# validate only those that are options
|
20
29
|
unknown = arguments.unknown(exclude: exclude)
|
@@ -23,18 +32,35 @@ module Eco
|
|
23
32
|
end
|
24
33
|
|
25
34
|
unless unknown.empty?
|
26
|
-
|
35
|
+
msg = "There are unknown options in your command line arguments:\n"
|
36
|
+
msg += "#{unknown}\n"
|
37
|
+
msg += "Please, remember that use case specific options should come after the use case in the command line.\n"
|
38
|
+
msg += "Use 'ruby main.rb -org [-usecase] --help -options' for more information"
|
39
|
+
raise msg
|
27
40
|
end
|
28
41
|
end
|
29
42
|
|
43
|
+
# @return [Boolean] if `key` is in the command line.
|
44
|
+
def arg?(key)
|
45
|
+
argv.include?(key)
|
46
|
+
end
|
47
|
+
|
48
|
+
# @return [Integer, nil] the position of `key` in the command line.
|
30
49
|
def get_arg_index(key)
|
31
|
-
return nil if !
|
50
|
+
return nil if !arg?(key)
|
32
51
|
argv.index(key)
|
33
52
|
end
|
34
53
|
|
54
|
+
# @return [Boolean] if `key1` precedes `key2` in the command line.
|
55
|
+
def arg_order?(key1, key2)
|
56
|
+
return false unless (k1 = get_arg_index(key1)) && k2 = get_arg_index(key2)
|
57
|
+
k1 < k2
|
58
|
+
end
|
59
|
+
|
60
|
+
# @return [String, Boolean] the argument value if `with_param` or a `Boolean` if not.
|
35
61
|
def get_arg(key, with_param: false, valid: true)
|
36
62
|
# track what a known option looks like
|
37
|
-
|
63
|
+
known_argument(key, with_param: with_param)
|
38
64
|
return nil unless index = get_arg_index(key)
|
39
65
|
value = true
|
40
66
|
if with_param
|
@@ -45,6 +71,7 @@ module Eco
|
|
45
71
|
return value
|
46
72
|
end
|
47
73
|
|
74
|
+
# @return [String] the filename.
|
48
75
|
def get_file(key, required: false, should_exist: true)
|
49
76
|
filename = get_arg(key, with_param: true)
|
50
77
|
if !filename && required
|
data/lib/eco/data.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'openssl'
|
2
|
-
|
2
|
+
# 'json'
|
3
3
|
require 'base64'
|
4
|
-
require 'pp'
|
4
|
+
#require 'pp'
|
5
5
|
|
6
6
|
require_relative '../../cli/scripting'
|
7
7
|
|
@@ -147,7 +147,7 @@ module Eco
|
|
147
147
|
return str_c
|
148
148
|
#EncryptedData.new({content: str_c, key: key, iv: iv})
|
149
149
|
|
150
|
-
|
150
|
+
|
151
151
|
end
|
152
152
|
def aes256_decrypt(data, key: , iv: , block_octets: BLOCK_OCTETS)
|
153
153
|
block_bits = block_bits * 8
|
@@ -5,6 +5,29 @@ module Eco
|
|
5
5
|
module Files
|
6
6
|
class Directory
|
7
7
|
|
8
|
+
class << self
|
9
|
+
def create(path, includes_file: false)
|
10
|
+
return true if Files.file_exists?(path)
|
11
|
+
|
12
|
+
parts = Files.split(File.expand_path(path))
|
13
|
+
filename = parts.pop if includes_file
|
14
|
+
|
15
|
+
return true if Files.dir_exists?(File.join(*parts))
|
16
|
+
|
17
|
+
subpath = nil
|
18
|
+
begin
|
19
|
+
parts.each do |curr|
|
20
|
+
subpath = subpath ? File.join(subpath, curr) : curr
|
21
|
+
Dir.mkdir(subpath) unless Files.dir_exists?(subpath)
|
22
|
+
end
|
23
|
+
rescue Exception => e
|
24
|
+
pp e
|
25
|
+
return false
|
26
|
+
end
|
27
|
+
true
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
8
31
|
attr_reader :dir_path
|
9
32
|
|
10
33
|
def initialize(dir_path = Dir.pwd)
|
@@ -14,12 +37,14 @@ module Eco
|
|
14
37
|
end
|
15
38
|
|
16
39
|
def exists?
|
17
|
-
Files.dir_exists(@dir_path)
|
40
|
+
Files.dir_exists?(@dir_path)
|
18
41
|
end
|
19
42
|
|
20
43
|
def create
|
21
|
-
|
22
|
-
|
44
|
+
return self.full_path if self.exists?
|
45
|
+
if succeed = Directory.create(File.expand_path(@dir_path))
|
46
|
+
return self.full_path
|
47
|
+
end
|
23
48
|
end
|
24
49
|
|
25
50
|
def full_path
|
@@ -57,23 +82,6 @@ module Eco
|
|
57
82
|
File.join(*args)
|
58
83
|
end
|
59
84
|
|
60
|
-
def self.create(path, includes_file: false)
|
61
|
-
return true if Files.file_exists?(path)
|
62
|
-
parts = Files.split(File.expand_path(path))
|
63
|
-
filename = parts.pop if includes_file
|
64
|
-
return true if Files.dir_exists?(File.join(*parts))
|
65
|
-
subpath = nil
|
66
|
-
begin
|
67
|
-
parts.each do |curr|
|
68
|
-
subpath = subpath ? File.join(subpath, curr) : curr
|
69
|
-
Dir.mkdir(subpath) unless Files.dir_exists?(subpath)
|
70
|
-
end
|
71
|
-
rescue Exception => e
|
72
|
-
pp e
|
73
|
-
end
|
74
|
-
false
|
75
|
-
end
|
76
|
-
|
77
85
|
private
|
78
86
|
|
79
87
|
def file_pattern(value)
|
@@ -3,11 +3,13 @@ module Eco
|
|
3
3
|
module Files
|
4
4
|
DEFAULT_TIMESTAMP_PATTERN = '%Y-%m-%dT%H%M%S'
|
5
5
|
|
6
|
-
|
7
|
-
base
|
8
|
-
|
6
|
+
class << self
|
7
|
+
def included(base)
|
8
|
+
base.send(:include, InstanceMethods)
|
9
|
+
base.extend(ClassMethods)
|
10
|
+
end
|
9
11
|
end
|
10
|
-
|
12
|
+
|
11
13
|
module InstanceMethods
|
12
14
|
|
13
15
|
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'fuzzy_match'
|
2
|
+
require 'amatch'
|
3
|
+
require 'jaro_winkler'
|
4
|
+
|
5
|
+
require_relative 'fuzzy_match/stop_words'
|
6
|
+
require_relative 'fuzzy_match/array_helpers'
|
7
|
+
require_relative 'fuzzy_match/string_helpers'
|
8
|
+
require_relative 'fuzzy_match/pairing'
|
9
|
+
require_relative 'fuzzy_match/chars_position_score'
|
10
|
+
require_relative 'fuzzy_match/ngrams_score'
|
11
|
+
|
12
|
+
module Eco
|
13
|
+
module Data
|
14
|
+
module FuzzyMatch
|
15
|
+
|
16
|
+
class << self
|
17
|
+
def included(base)
|
18
|
+
base.send(:include, InstanceMethods)
|
19
|
+
base.extend(ClassMethods)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
module ClassMethods
|
24
|
+
include ArrayHelpers
|
25
|
+
include StringHelpers
|
26
|
+
include Pairing
|
27
|
+
include CharsPositionScore
|
28
|
+
include NGramsScore
|
29
|
+
|
30
|
+
def jaro_winkler(str1, str2)
|
31
|
+
options = {
|
32
|
+
ignore_case: true,
|
33
|
+
weight: 0.25
|
34
|
+
}
|
35
|
+
JaroWinkler.distance(str1, str2, **options)
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
module InstanceMethods
|
41
|
+
include StopWords
|
42
|
+
|
43
|
+
attr_accessor :fuzzy_options
|
44
|
+
|
45
|
+
def fuzzy_options
|
46
|
+
@fuzzy_options ||= {}
|
47
|
+
end
|
48
|
+
|
49
|
+
def fuzzy_match(haystack = nil, **options)
|
50
|
+
return @fuzzy_match if instance_variable_defined?(:@fuzzy_match)
|
51
|
+
@fuzzy_options = options.merge({
|
52
|
+
stop_words: PREPOSITIONS + PRONOUNS + ARTICLES
|
53
|
+
})
|
54
|
+
# make it run with a native C extension (for better performance: ~130 % increase of performance)
|
55
|
+
::FuzzyMatch.engine = :amatch
|
56
|
+
haystack = obtain_haystack(haystack).tap do |items|
|
57
|
+
if !fuzzy_read_method && found = items.find {|item| !item.is_a?(String)}
|
58
|
+
raise "To use non String objects as 'haystack' you should provide `read:` or `options[:read]`. Given element: #{found.class}"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
@fuzzy_match = ::FuzzyMatch.new(haystack, fuzzy_options)
|
62
|
+
end
|
63
|
+
|
64
|
+
# @note
|
65
|
+
# - When the `haystack` elements are **non** `String` objects, it excludes the needle itself from the results
|
66
|
+
# @param needle [String, Object] object is allowed when `fuzzy_options` includes `read:` key
|
67
|
+
# @return [Eco::Data::FuzzyMatch::Results]
|
68
|
+
def find_all_with_score(needle, **options)
|
69
|
+
results = fuzzy_match(**options).find_all_with_score(needle).each_with_object([]) do |fuzzy_results, results|
|
70
|
+
item, dice, lev = fuzzy_results
|
71
|
+
unless item == needle
|
72
|
+
needle_str = item_string(needle)
|
73
|
+
item_str = item_string(item)
|
74
|
+
jaro_res = self.class.jaro_winkler(needle_str, item_str)
|
75
|
+
ngram_res = self.class.ngrams_score(needle_str, item_str, range: 3..5).ratio
|
76
|
+
wngram_res = self.class.words_ngrams_score(needle_str, item_str, range: 3..7).ratio
|
77
|
+
pos_res = self.class.chars_position_score(needle_str, item_str).ratio
|
78
|
+
results << Result.new(item, item_str, dice, lev, jaro_res, ngram_res, wngram_res, pos_res)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
Results.new(needle, item_string(needle), results)
|
82
|
+
end
|
83
|
+
|
84
|
+
private
|
85
|
+
|
86
|
+
# @note
|
87
|
+
# - When used in an `Enumerable` it will use `to_a`, or `values` if it's a `Hash`
|
88
|
+
# @param data [Enumerable, nil]
|
89
|
+
# @return [Array<Object>] the non-repeated values of `data`
|
90
|
+
def obtain_haystack(data = nil)
|
91
|
+
data = self if self.is_a?(Enumerable) && !data
|
92
|
+
raise "'data' should be an Enumerable. Given: #{data.class}" unless data.is_a?(Enumerable)
|
93
|
+
data = self.is_a?(Hash) ? self.values.flatten : to_a.flatten
|
94
|
+
data.uniq.compact
|
95
|
+
end
|
96
|
+
|
97
|
+
def item_string(item, attr = fuzzy_read_method)
|
98
|
+
return item if !item || item.is_a?(String) || !attr
|
99
|
+
attr = attr.to_sym
|
100
|
+
return item.send(attr) if item.respond_to?(attr)
|
101
|
+
end
|
102
|
+
|
103
|
+
def fuzzy_read_method
|
104
|
+
fuzzy_options[:read]
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
108
|
+
|
109
|
+
class << self
|
110
|
+
include FuzzyMatch::ClassMethods
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
require_relative 'fuzzy_match/score'
|
118
|
+
require_relative 'fuzzy_match/result'
|
119
|
+
require_relative 'fuzzy_match/results'
|
@@ -0,0 +1,75 @@
|
|
1
|
+
module Eco
|
2
|
+
module Data
|
3
|
+
module FuzzyMatch
|
4
|
+
module ArrayHelpers
|
5
|
+
# Keeps the start order of the `values` and consecutive `values` together/consecutive.
|
6
|
+
# @param values [Array] the input array with the values.
|
7
|
+
# @param range [Integer, Range] determine the lenght of the generated values.
|
8
|
+
# @return [Array<Array<Value>>] combinations of `range` length of `values`.
|
9
|
+
def ngrams(values, range=2..3)
|
10
|
+
[].tap do |out|
|
11
|
+
if range.is_a?(Integer)
|
12
|
+
n = range
|
13
|
+
values_count = values.length
|
14
|
+
values.each_with_index do |word, i|
|
15
|
+
min = i
|
16
|
+
max = i + (n - 1)
|
17
|
+
break if values_count <= max
|
18
|
+
out << values[min..max].join(' ')
|
19
|
+
end
|
20
|
+
out.uniq!
|
21
|
+
else
|
22
|
+
range.each {|n| out.concat(ngrams(values, n))}
|
23
|
+
out.uniq!
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Keeps the start order of the `values` of the input `Array` `values`.
|
29
|
+
# It does **not** keep consecutive `values` together (it can jump/skip items).
|
30
|
+
# @param values [Array] the input array with the values.
|
31
|
+
# @param range [Integer, Range] determine the lenght of the generated values.
|
32
|
+
# @return [Array<Array<Value>>] combinations of `range` length of `values`
|
33
|
+
def combinations(values, range=2..3)
|
34
|
+
if range.is_a?(Integer)
|
35
|
+
values.combination(range).to_a
|
36
|
+
else
|
37
|
+
range.flat_map {|size| values.combination(size).to_a}
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# It includes `combinations` that break the initial order of the `Array`.
|
42
|
+
# It does **not** keep consecutive `values` together (it can jump/skip items).
|
43
|
+
# @param values [Array] the input array with the values.
|
44
|
+
# @param range [Integer, Range] determine the lenght of the generated values.
|
45
|
+
# @return [Array<Array<Value>>] permutations of `range` length of `values`
|
46
|
+
def permutations(values, range=2..3)
|
47
|
+
combinations(values, range).tap do |out|
|
48
|
+
range = range.is_a?(Integer)? (range..range) : range
|
49
|
+
out.dup.select do |item|
|
50
|
+
range.include?(item.length)
|
51
|
+
end.each do |comb|
|
52
|
+
comb.permutation.to_a.tap do |perms|
|
53
|
+
perms.each {|perm| out << perm}
|
54
|
+
end
|
55
|
+
end
|
56
|
+
out.uniq!
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Helper to praper facet structure
|
61
|
+
# @param values1 [Array] the input array with the values to have their facet against.
|
62
|
+
# @param values2 [Array] the input array with the values to facet against.
|
63
|
+
# @return [Hash] where `keys` are `values1` and `value` of each `key` all `values2`
|
64
|
+
def facet(values1, values2)
|
65
|
+
{}.tap do |out|
|
66
|
+
next unless values1.is_a?(Enumerable)
|
67
|
+
values1 = values1.is_a?(Hash) ? values1.values : values1.to_a
|
68
|
+
values1.each {|val| out[val] = values2.dup}
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|