eco-helpers 2.0.12 → 2.0.17
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +80 -73
- data/eco-helpers.gemspec +6 -4
- data/lib/eco-helpers.rb +1 -0
- data/lib/eco/api/common/base_loader.rb +14 -0
- data/lib/eco/api/common/loaders/use_case.rb +1 -1
- data/lib/eco/api/common/people/default_parsers/date_parser.rb +11 -1
- data/lib/eco/api/common/people/default_parsers/login_providers_parser.rb +1 -1
- data/lib/eco/api/common/people/default_parsers/policy_groups_parser.rb +11 -11
- data/lib/eco/api/common/people/person_entry.rb +9 -2
- data/lib/eco/api/common/people/supervisor_helpers.rb +27 -0
- data/lib/eco/api/common/session/file_manager.rb +2 -2
- data/lib/eco/api/common/session/mailer.rb +0 -1
- data/lib/eco/api/common/session/s3_uploader.rb +0 -1
- data/lib/eco/api/common/session/sftp.rb +0 -1
- data/lib/eco/api/error.rb +5 -3
- data/lib/eco/api/microcases.rb +3 -1
- data/lib/eco/api/microcases/append_usergroups.rb +0 -1
- data/lib/eco/api/microcases/people_cache.rb +2 -2
- data/lib/eco/api/microcases/people_load.rb +2 -2
- data/lib/eco/api/microcases/people_refresh.rb +2 -2
- data/lib/eco/api/microcases/people_search.rb +6 -6
- data/lib/eco/api/microcases/preserve_default_tag.rb +23 -0
- data/lib/eco/api/microcases/preserve_filter_tags.rb +28 -0
- data/lib/eco/api/microcases/preserve_policy_groups.rb +30 -0
- data/lib/eco/api/microcases/set_account.rb +0 -1
- data/lib/eco/api/organization.rb +1 -0
- data/lib/eco/api/organization/people.rb +7 -0
- data/lib/eco/api/organization/people_analytics.rb +60 -0
- data/lib/eco/api/organization/presets_factory.rb +116 -93
- data/lib/eco/api/organization/presets_integrity.json +58 -0
- data/lib/eco/api/organization/presets_values.json +5 -4
- data/lib/eco/api/policies/default_policies/99_user_access_policy.rb +0 -30
- data/lib/eco/api/session.rb +1 -20
- data/lib/eco/api/session/batch.rb +42 -10
- data/lib/eco/api/session/batch/job.rb +3 -0
- data/lib/eco/api/session/config.rb +16 -15
- data/lib/eco/api/session/config/api.rb +4 -0
- data/lib/eco/api/session/config/apis.rb +14 -0
- data/lib/eco/api/session/config/files.rb +7 -0
- data/lib/eco/api/session/config/people.rb +3 -19
- data/lib/eco/api/usecases.rb +2 -0
- data/lib/eco/api/usecases/default_cases.rb +4 -1
- data/lib/eco/api/usecases/default_cases/abstract_policygroup_abilities_case.rb +161 -0
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +76 -0
- data/lib/eco/api/usecases/default_cases/codes_to_tags_case.rb +2 -3
- data/lib/eco/api/usecases/default_cases/hris_case.rb +14 -8
- data/lib/eco/api/usecases/default_cases/reset_landing_page_case.rb +11 -1
- data/lib/eco/api/usecases/default_cases/restore_db_case.rb +1 -2
- data/lib/eco/api/usecases/default_cases/supers_cyclic_identify_case.rb +72 -0
- data/lib/eco/api/usecases/default_cases/supers_hierarchy_case.rb +59 -0
- data/lib/eco/api/usecases/default_cases/to_csv_case.rb +104 -26
- data/lib/eco/api/usecases/default_cases/to_csv_detailed_case.rb +62 -36
- data/lib/eco/cli.rb +0 -10
- data/lib/eco/cli/config/default/options.rb +19 -17
- data/lib/eco/cli/config/default/people_filters.rb +3 -3
- data/lib/eco/cli/config/default/usecases.rb +77 -25
- data/lib/eco/cli/config/default/workflow.rb +6 -1
- data/lib/eco/cli/config/help.rb +1 -0
- data/lib/eco/cli/config/options_set.rb +106 -13
- data/lib/eco/cli/config/use_cases.rb +33 -33
- data/lib/eco/cli/scripting/args_helpers.rb +30 -3
- data/lib/eco/data.rb +1 -0
- data/lib/eco/data/crypto/encryption.rb +3 -3
- data/lib/eco/data/files/directory.rb +28 -20
- data/lib/eco/data/files/helpers.rb +6 -4
- data/lib/eco/data/fuzzy_match.rb +119 -0
- data/lib/eco/data/fuzzy_match/array_helpers.rb +75 -0
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +37 -0
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +73 -0
- data/lib/eco/data/fuzzy_match/pairing.rb +102 -0
- data/lib/eco/data/fuzzy_match/result.rb +67 -0
- data/lib/eco/data/fuzzy_match/results.rb +53 -0
- data/lib/eco/data/fuzzy_match/score.rb +44 -0
- data/lib/eco/data/fuzzy_match/stop_words.rb +35 -0
- data/lib/eco/data/fuzzy_match/string_helpers.rb +69 -0
- data/lib/eco/version.rb +1 -1
- metadata +86 -10
- data/lib/eco/api/microcases/refresh_abilities.rb +0 -19
- data/lib/eco/api/organization/presets_reference.json +0 -59
- data/lib/eco/api/usecases/default_cases/refresh_abilities_case.rb +0 -30
@@ -5,18 +5,35 @@ module Eco
|
|
5
5
|
include Eco::CLI::Config::Help
|
6
6
|
attr_reader :core_config
|
7
7
|
|
8
|
+
class CaseConfig < Struct.new(:cases_config, :option, :type, :description, :casename, :callback)
|
9
|
+
|
10
|
+
def add_option(arg, desc = nil, &block)
|
11
|
+
core_config.options_set.add(arg, desc, namespace: option, &block)
|
12
|
+
self
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def core_config
|
18
|
+
cases_config.core_config
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class ActiveCase < Struct.new(:index, :option, :callback)
|
23
|
+
|
24
|
+
end
|
25
|
+
|
8
26
|
def initialize(core_config:)
|
9
27
|
@core_config = core_config
|
10
28
|
@linked_cases = {}
|
11
|
-
@description = {}
|
12
29
|
end
|
13
30
|
|
14
31
|
# @return [String] summary of the use cases.
|
15
32
|
def help
|
16
33
|
["The following are the available use cases:"].yield_self do |lines|
|
17
34
|
max_len = keys_max_len(@linked_cases.keys)
|
18
|
-
@linked_cases.keys.sort.each do |
|
19
|
-
lines << help_line(
|
35
|
+
@linked_cases.keys.sort.each do |option_case|
|
36
|
+
lines << help_line(option_case, @linked_cases[option_case].description, max_len)
|
20
37
|
end
|
21
38
|
lines
|
22
39
|
end.join("\n")
|
@@ -33,18 +50,8 @@ module Eco
|
|
33
50
|
raise "You must specify a valid 'case_name' when no block is provided" unless case_name
|
34
51
|
raise "'case_name' expected to be a String. Given: #{case_name.class}" unless case_name.is_a?(String)
|
35
52
|
end
|
36
|
-
|
37
|
-
@linked_cases[option_case] =
|
38
|
-
type => {
|
39
|
-
option: option_case,
|
40
|
-
type: type,
|
41
|
-
casename: case_name,
|
42
|
-
callback: callback
|
43
|
-
}
|
44
|
-
}
|
45
|
-
@description[option_case] = desc
|
46
|
-
|
47
|
-
self
|
53
|
+
puts "Overriding case config '#{option_case}'" if @linked_cases.key?(option_case)
|
54
|
+
@linked_cases[option_case] = CaseConfig.new(self, option_case, type, desc, case_name, callback)
|
48
55
|
end
|
49
56
|
|
50
57
|
# Scopes/identifies which usecases are being invoked from the command line
|
@@ -55,20 +62,13 @@ module Eco
|
|
55
62
|
def active(io:)
|
56
63
|
validate_io!(io)
|
57
64
|
return @active_cases unless !@active_cases
|
58
|
-
active_cases = {}
|
59
|
-
@linked_cases.each do |option_case, types|
|
65
|
+
@active_cases = @linked_cases.each_with_object({}) do |(option_case, data), active_cases|
|
60
66
|
next nil unless SCR.get_arg(option_case)
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
index: SCR.get_arg_index(option_case),
|
65
|
-
option: option_case,
|
66
|
-
callback: data[:callback]
|
67
|
-
}
|
68
|
-
end
|
67
|
+
if usecase = get_usecase(io: io, data: data)
|
68
|
+
index = SCR.get_arg_index(option_case)
|
69
|
+
active_cases[usecase] = ActiveCase.new(index, option_case, data.callback)
|
69
70
|
end
|
70
|
-
end
|
71
|
-
@active_cases = active_cases.sort_by {|c, d| d[:index]}.to_h
|
71
|
+
end.sort_by {|c, d| d.index}.to_h
|
72
72
|
end
|
73
73
|
|
74
74
|
def process(io:)
|
@@ -79,7 +79,7 @@ module Eco
|
|
79
79
|
processed = true
|
80
80
|
io = case_io(io: io, usecase: usecase)
|
81
81
|
# some usecases have a callback to collect the parameters
|
82
|
-
data
|
82
|
+
data.callback&.call(*io.params)
|
83
83
|
io = usecase.launch(io: io)
|
84
84
|
end
|
85
85
|
processed
|
@@ -100,17 +100,17 @@ module Eco
|
|
100
100
|
end
|
101
101
|
|
102
102
|
def get_usecase(io:, data:)
|
103
|
-
usecase = if case_name = data
|
104
|
-
io.session.usecases.case(case_name, type: data
|
103
|
+
usecase = if case_name = data.casename
|
104
|
+
io.session.usecases.case(case_name, type: data.type)
|
105
105
|
end
|
106
|
-
usecase ||= if callback = data
|
106
|
+
usecase ||= if callback = data.callback
|
107
107
|
# identify/retrieve usecase via callback
|
108
|
-
params = io.params(keyed: true).merge(type: data
|
108
|
+
params = io.params(keyed: true).merge(type: data.type)
|
109
109
|
io = io.new(**params, validate: false)
|
110
110
|
callback.call(*io.params).tap do |usecase|
|
111
111
|
unless usecase.is_a?(Eco::API::UseCases::UseCase)
|
112
112
|
msg = "When adding a usecase, without specifying 'case_name:', "
|
113
|
-
msg += "the block that integrates usecase for cli option '#{data
|
113
|
+
msg += "the block that integrates usecase for cli option '#{data.option}'"
|
114
114
|
msg += " must return an Eco::API::UseCases::UseCase object. It returns #{usecase.class}"
|
115
115
|
raise msg
|
116
116
|
end
|
@@ -3,6 +3,7 @@ module Eco
|
|
3
3
|
class Scripting
|
4
4
|
module ArgsHelpers
|
5
5
|
|
6
|
+
# @return [Array<String] the command line arguments.
|
6
7
|
def argv
|
7
8
|
@argv || ARGV
|
8
9
|
end
|
@@ -11,10 +12,18 @@ module Eco
|
|
11
12
|
Argument.is_modifier?(value)
|
12
13
|
end
|
13
14
|
|
15
|
+
# @return [Arguments] supported known arguments.
|
14
16
|
def arguments
|
15
17
|
@arguments ||= Arguments.new(argv)
|
16
18
|
end
|
17
19
|
|
20
|
+
# Registers an argument as a known one.
|
21
|
+
def known_argument(key, with_param: false)
|
22
|
+
arguments.add(key, with_param: with_param)
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
# Validation to stop the `script` if among `argv` there's any **unknown** argument.
|
18
27
|
def stop_on_unknown!(exclude: [], only_options: false)
|
19
28
|
# validate only those that are options
|
20
29
|
unknown = arguments.unknown(exclude: exclude)
|
@@ -23,18 +32,35 @@ module Eco
|
|
23
32
|
end
|
24
33
|
|
25
34
|
unless unknown.empty?
|
26
|
-
|
35
|
+
msg = "There are unknown options in your command line arguments:\n"
|
36
|
+
msg += "#{unknown}\n"
|
37
|
+
msg += "Please, remember that use case specific options should come after the use case in the command line.\n"
|
38
|
+
msg += "Use 'ruby main.rb -org [-usecase] --help -options' for more information"
|
39
|
+
raise msg
|
27
40
|
end
|
28
41
|
end
|
29
42
|
|
43
|
+
# @return [Boolean] if `key` is in the command line.
|
44
|
+
def arg?(key)
|
45
|
+
argv.include?(key)
|
46
|
+
end
|
47
|
+
|
48
|
+
# @return [Integer, nil] the position of `key` in the command line.
|
30
49
|
def get_arg_index(key)
|
31
|
-
return nil if !
|
50
|
+
return nil if !arg?(key)
|
32
51
|
argv.index(key)
|
33
52
|
end
|
34
53
|
|
54
|
+
# @return [Boolean] if `key1` precedes `key2` in the command line.
|
55
|
+
def arg_order?(key1, key2)
|
56
|
+
return false unless (k1 = get_arg_index(key1)) && k2 = get_arg_index(key2)
|
57
|
+
k1 < k2
|
58
|
+
end
|
59
|
+
|
60
|
+
# @return [String, Boolean] the argument value if `with_param` or a `Boolean` if not.
|
35
61
|
def get_arg(key, with_param: false, valid: true)
|
36
62
|
# track what a known option looks like
|
37
|
-
|
63
|
+
known_argument(key, with_param: with_param)
|
38
64
|
return nil unless index = get_arg_index(key)
|
39
65
|
value = true
|
40
66
|
if with_param
|
@@ -45,6 +71,7 @@ module Eco
|
|
45
71
|
return value
|
46
72
|
end
|
47
73
|
|
74
|
+
# @return [String] the filename.
|
48
75
|
def get_file(key, required: false, should_exist: true)
|
49
76
|
filename = get_arg(key, with_param: true)
|
50
77
|
if !filename && required
|
data/lib/eco/data.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'openssl'
|
2
|
-
|
2
|
+
# 'json'
|
3
3
|
require 'base64'
|
4
|
-
require 'pp'
|
4
|
+
#require 'pp'
|
5
5
|
|
6
6
|
require_relative '../../cli/scripting'
|
7
7
|
|
@@ -147,7 +147,7 @@ module Eco
|
|
147
147
|
return str_c
|
148
148
|
#EncryptedData.new({content: str_c, key: key, iv: iv})
|
149
149
|
|
150
|
-
|
150
|
+
|
151
151
|
end
|
152
152
|
def aes256_decrypt(data, key: , iv: , block_octets: BLOCK_OCTETS)
|
153
153
|
block_bits = block_bits * 8
|
@@ -5,6 +5,29 @@ module Eco
|
|
5
5
|
module Files
|
6
6
|
class Directory
|
7
7
|
|
8
|
+
class << self
|
9
|
+
def create(path, includes_file: false)
|
10
|
+
return true if Files.file_exists?(path)
|
11
|
+
|
12
|
+
parts = Files.split(File.expand_path(path))
|
13
|
+
filename = parts.pop if includes_file
|
14
|
+
|
15
|
+
return true if Files.dir_exists?(File.join(*parts))
|
16
|
+
|
17
|
+
subpath = nil
|
18
|
+
begin
|
19
|
+
parts.each do |curr|
|
20
|
+
subpath = subpath ? File.join(subpath, curr) : curr
|
21
|
+
Dir.mkdir(subpath) unless Files.dir_exists?(subpath)
|
22
|
+
end
|
23
|
+
rescue Exception => e
|
24
|
+
pp e
|
25
|
+
return false
|
26
|
+
end
|
27
|
+
true
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
8
31
|
attr_reader :dir_path
|
9
32
|
|
10
33
|
def initialize(dir_path = Dir.pwd)
|
@@ -14,12 +37,14 @@ module Eco
|
|
14
37
|
end
|
15
38
|
|
16
39
|
def exists?
|
17
|
-
Files.dir_exists(@dir_path)
|
40
|
+
Files.dir_exists?(@dir_path)
|
18
41
|
end
|
19
42
|
|
20
43
|
def create
|
21
|
-
|
22
|
-
|
44
|
+
return self.full_path if self.exists?
|
45
|
+
if succeed = Directory.create(File.expand_path(@dir_path))
|
46
|
+
return self.full_path
|
47
|
+
end
|
23
48
|
end
|
24
49
|
|
25
50
|
def full_path
|
@@ -57,23 +82,6 @@ module Eco
|
|
57
82
|
File.join(*args)
|
58
83
|
end
|
59
84
|
|
60
|
-
def self.create(path, includes_file: false)
|
61
|
-
return true if Files.file_exists?(path)
|
62
|
-
parts = Files.split(File.expand_path(path))
|
63
|
-
filename = parts.pop if includes_file
|
64
|
-
return true if Files.dir_exists?(File.join(*parts))
|
65
|
-
subpath = nil
|
66
|
-
begin
|
67
|
-
parts.each do |curr|
|
68
|
-
subpath = subpath ? File.join(subpath, curr) : curr
|
69
|
-
Dir.mkdir(subpath) unless Files.dir_exists?(subpath)
|
70
|
-
end
|
71
|
-
rescue Exception => e
|
72
|
-
pp e
|
73
|
-
end
|
74
|
-
false
|
75
|
-
end
|
76
|
-
|
77
85
|
private
|
78
86
|
|
79
87
|
def file_pattern(value)
|
@@ -3,11 +3,13 @@ module Eco
|
|
3
3
|
module Files
|
4
4
|
DEFAULT_TIMESTAMP_PATTERN = '%Y-%m-%dT%H%M%S'
|
5
5
|
|
6
|
-
|
7
|
-
base
|
8
|
-
|
6
|
+
class << self
|
7
|
+
def included(base)
|
8
|
+
base.send(:include, InstanceMethods)
|
9
|
+
base.extend(ClassMethods)
|
10
|
+
end
|
9
11
|
end
|
10
|
-
|
12
|
+
|
11
13
|
module InstanceMethods
|
12
14
|
|
13
15
|
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'fuzzy_match'
|
2
|
+
require 'amatch'
|
3
|
+
require 'jaro_winkler'
|
4
|
+
|
5
|
+
require_relative 'fuzzy_match/stop_words'
|
6
|
+
require_relative 'fuzzy_match/array_helpers'
|
7
|
+
require_relative 'fuzzy_match/string_helpers'
|
8
|
+
require_relative 'fuzzy_match/pairing'
|
9
|
+
require_relative 'fuzzy_match/chars_position_score'
|
10
|
+
require_relative 'fuzzy_match/ngrams_score'
|
11
|
+
|
12
|
+
module Eco
|
13
|
+
module Data
|
14
|
+
module FuzzyMatch
|
15
|
+
|
16
|
+
class << self
|
17
|
+
def included(base)
|
18
|
+
base.send(:include, InstanceMethods)
|
19
|
+
base.extend(ClassMethods)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
module ClassMethods
|
24
|
+
include ArrayHelpers
|
25
|
+
include StringHelpers
|
26
|
+
include Pairing
|
27
|
+
include CharsPositionScore
|
28
|
+
include NGramsScore
|
29
|
+
|
30
|
+
def jaro_winkler(str1, str2)
|
31
|
+
options = {
|
32
|
+
ignore_case: true,
|
33
|
+
weight: 0.25
|
34
|
+
}
|
35
|
+
JaroWinkler.distance(str1, str2, **options)
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
module InstanceMethods
|
41
|
+
include StopWords
|
42
|
+
|
43
|
+
attr_accessor :fuzzy_options
|
44
|
+
|
45
|
+
def fuzzy_options
|
46
|
+
@fuzzy_options ||= {}
|
47
|
+
end
|
48
|
+
|
49
|
+
def fuzzy_match(haystack = nil, **options)
|
50
|
+
return @fuzzy_match if instance_variable_defined?(:@fuzzy_match)
|
51
|
+
@fuzzy_options = options.merge({
|
52
|
+
stop_words: PREPOSITIONS + PRONOUNS + ARTICLES
|
53
|
+
})
|
54
|
+
# make it run with a native C extension (for better performance: ~130 % increase of performance)
|
55
|
+
::FuzzyMatch.engine = :amatch
|
56
|
+
haystack = obtain_haystack(haystack).tap do |items|
|
57
|
+
if !fuzzy_read_method && found = items.find {|item| !item.is_a?(String)}
|
58
|
+
raise "To use non String objects as 'haystack' you should provide `read:` or `options[:read]`. Given element: #{found.class}"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
@fuzzy_match = ::FuzzyMatch.new(haystack, fuzzy_options)
|
62
|
+
end
|
63
|
+
|
64
|
+
# @note
|
65
|
+
# - When the `haystack` elements are **non** `String` objects, it excludes the needle itself from the results
|
66
|
+
# @param needle [String, Object] object is allowed when `fuzzy_options` includes `read:` key
|
67
|
+
# @return [Eco::Data::FuzzyMatch::Results]
|
68
|
+
def find_all_with_score(needle, **options)
|
69
|
+
results = fuzzy_match(**options).find_all_with_score(needle).each_with_object([]) do |fuzzy_results, results|
|
70
|
+
item, dice, lev = fuzzy_results
|
71
|
+
unless item == needle
|
72
|
+
needle_str = item_string(needle)
|
73
|
+
item_str = item_string(item)
|
74
|
+
jaro_res = self.class.jaro_winkler(needle_str, item_str)
|
75
|
+
ngram_res = self.class.ngrams_score(needle_str, item_str, range: 3..5).ratio
|
76
|
+
wngram_res = self.class.words_ngrams_score(needle_str, item_str, range: 3..7).ratio
|
77
|
+
pos_res = self.class.chars_position_score(needle_str, item_str).ratio
|
78
|
+
results << Result.new(item, item_str, dice, lev, jaro_res, ngram_res, wngram_res, pos_res)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
Results.new(needle, item_string(needle), results)
|
82
|
+
end
|
83
|
+
|
84
|
+
private
|
85
|
+
|
86
|
+
# @note
|
87
|
+
# - When used in an `Enumerable` it will use `to_a`, or `values` if it's a `Hash`
|
88
|
+
# @param data [Enumerable, nil]
|
89
|
+
# @return [Array<Object>] the non-repeated values of `data`
|
90
|
+
def obtain_haystack(data = nil)
|
91
|
+
data = self if self.is_a?(Enumerable) && !data
|
92
|
+
raise "'data' should be an Enumerable. Given: #{data.class}" unless data.is_a?(Enumerable)
|
93
|
+
data = self.is_a?(Hash) ? self.values.flatten : to_a.flatten
|
94
|
+
data.uniq.compact
|
95
|
+
end
|
96
|
+
|
97
|
+
def item_string(item, attr = fuzzy_read_method)
|
98
|
+
return item if !item || item.is_a?(String) || !attr
|
99
|
+
attr = attr.to_sym
|
100
|
+
return item.send(attr) if item.respond_to?(attr)
|
101
|
+
end
|
102
|
+
|
103
|
+
def fuzzy_read_method
|
104
|
+
fuzzy_options[:read]
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
108
|
+
|
109
|
+
class << self
|
110
|
+
include FuzzyMatch::ClassMethods
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
require_relative 'fuzzy_match/score'
|
118
|
+
require_relative 'fuzzy_match/result'
|
119
|
+
require_relative 'fuzzy_match/results'
|
@@ -0,0 +1,75 @@
|
|
1
|
+
module Eco
|
2
|
+
module Data
|
3
|
+
module FuzzyMatch
|
4
|
+
module ArrayHelpers
|
5
|
+
# Keeps the start order of the `values` and consecutive `values` together/consecutive.
|
6
|
+
# @param values [Array] the input array with the values.
|
7
|
+
# @param range [Integer, Range] determine the lenght of the generated values.
|
8
|
+
# @return [Array<Array<Value>>] combinations of `range` length of `values`.
|
9
|
+
def ngrams(values, range=2..3)
|
10
|
+
[].tap do |out|
|
11
|
+
if range.is_a?(Integer)
|
12
|
+
n = range
|
13
|
+
values_count = values.length
|
14
|
+
values.each_with_index do |word, i|
|
15
|
+
min = i
|
16
|
+
max = i + (n - 1)
|
17
|
+
break if values_count <= max
|
18
|
+
out << values[min..max].join(' ')
|
19
|
+
end
|
20
|
+
out.uniq!
|
21
|
+
else
|
22
|
+
range.each {|n| out.concat(ngrams(values, n))}
|
23
|
+
out.uniq!
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Keeps the start order of the `values` of the input `Array` `values`.
|
29
|
+
# It does **not** keep consecutive `values` together (it can jump/skip items).
|
30
|
+
# @param values [Array] the input array with the values.
|
31
|
+
# @param range [Integer, Range] determine the lenght of the generated values.
|
32
|
+
# @return [Array<Array<Value>>] combinations of `range` length of `values`
|
33
|
+
def combinations(values, range=2..3)
|
34
|
+
if range.is_a?(Integer)
|
35
|
+
values.combination(range).to_a
|
36
|
+
else
|
37
|
+
range.flat_map {|size| values.combination(size).to_a}
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# It includes `combinations` that break the initial order of the `Array`.
|
42
|
+
# It does **not** keep consecutive `values` together (it can jump/skip items).
|
43
|
+
# @param values [Array] the input array with the values.
|
44
|
+
# @param range [Integer, Range] determine the lenght of the generated values.
|
45
|
+
# @return [Array<Array<Value>>] permutations of `range` length of `values`
|
46
|
+
def permutations(values, range=2..3)
|
47
|
+
combinations(values, range).tap do |out|
|
48
|
+
range = range.is_a?(Integer)? (range..range) : range
|
49
|
+
out.dup.select do |item|
|
50
|
+
range.include?(item.length)
|
51
|
+
end.each do |comb|
|
52
|
+
comb.permutation.to_a.tap do |perms|
|
53
|
+
perms.each {|perm| out << perm}
|
54
|
+
end
|
55
|
+
end
|
56
|
+
out.uniq!
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Helper to praper facet structure
|
61
|
+
# @param values1 [Array] the input array with the values to have their facet against.
|
62
|
+
# @param values2 [Array] the input array with the values to facet against.
|
63
|
+
# @return [Hash] where `keys` are `values1` and `value` of each `key` all `values2`
|
64
|
+
def facet(values1, values2)
|
65
|
+
{}.tap do |out|
|
66
|
+
next unless values1.is_a?(Enumerable)
|
67
|
+
values1 = values1.is_a?(Hash) ? values1.values : values1.to_a
|
68
|
+
values1.each {|val| out[val] = values2.dup}
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|