eco-helpers 2.0.17 → 2.0.23
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +85 -1
- data/eco-helpers.gemspec +4 -1
- data/lib/eco-helpers.rb +1 -0
- data/lib/eco/api/common/base_loader.rb +9 -5
- data/lib/eco/api/common/loaders/parser.rb +1 -0
- data/lib/eco/api/common/people/default_parsers.rb +1 -0
- data/lib/eco/api/common/people/default_parsers/xls_parser.rb +53 -0
- data/lib/eco/api/common/people/entries.rb +1 -0
- data/lib/eco/api/common/people/entry_factory.rb +88 -23
- data/lib/eco/api/common/people/person_entry.rb +1 -0
- data/lib/eco/api/common/people/person_parser.rb +1 -1
- data/lib/eco/api/common/session.rb +1 -0
- data/lib/eco/api/common/session/base_session.rb +2 -0
- data/lib/eco/api/common/session/helpers.rb +30 -0
- data/lib/eco/api/common/session/helpers/prompt_user.rb +34 -0
- data/lib/eco/api/common/version_patches/ecoportal_api/external_person.rb +1 -1
- data/lib/eco/api/common/version_patches/ecoportal_api/internal_person.rb +7 -4
- data/lib/eco/api/common/version_patches/exception.rb +11 -4
- data/lib/eco/api/microcases/with_each.rb +67 -6
- data/lib/eco/api/microcases/with_each_present.rb +4 -2
- data/lib/eco/api/microcases/with_each_starter.rb +4 -2
- data/lib/eco/api/organization.rb +1 -1
- data/lib/eco/api/organization/people.rb +94 -25
- data/lib/eco/api/organization/people_similarity.rb +272 -0
- data/lib/eco/api/organization/person_schemas.rb +5 -1
- data/lib/eco/api/organization/policy_groups.rb +5 -1
- data/lib/eco/api/organization/tag_tree.rb +33 -0
- data/lib/eco/api/session.rb +19 -8
- data/lib/eco/api/session/batch.rb +7 -5
- data/lib/eco/api/session/batch/job.rb +27 -8
- data/lib/eco/api/session/config/apis.rb +80 -14
- data/lib/eco/api/usecases.rb +2 -2
- data/lib/eco/api/usecases/base_case.rb +2 -2
- data/lib/eco/api/usecases/base_io.rb +17 -4
- data/lib/eco/api/usecases/default_cases.rb +1 -0
- data/lib/eco/api/usecases/default_cases/abstract_policygroup_abilities_case.rb +3 -3
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +179 -32
- data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
- data/lib/eco/api/usecases/default_cases/to_csv_case.rb +81 -36
- data/lib/eco/api/usecases/default_cases/to_csv_detailed_case.rb +3 -4
- data/lib/eco/api/usecases/ooze_samples/ooze_update_case.rb +3 -2
- data/lib/eco/cli/config/default/input.rb +61 -8
- data/lib/eco/cli/config/default/options.rb +36 -2
- data/lib/eco/cli/config/default/people.rb +18 -24
- data/lib/eco/cli/config/default/usecases.rb +33 -2
- data/lib/eco/cli/config/default/workflow.rb +21 -12
- data/lib/eco/cli/scripting/args_helpers.rb +2 -2
- data/lib/eco/csv.rb +4 -2
- data/lib/eco/csv/table.rb +121 -21
- data/lib/eco/data/fuzzy_match.rb +109 -27
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +19 -10
- data/lib/eco/data/fuzzy_match/pairing.rb +12 -19
- data/lib/eco/data/fuzzy_match/result.rb +22 -2
- data/lib/eco/data/fuzzy_match/results.rb +30 -6
- data/lib/eco/data/fuzzy_match/score.rb +12 -7
- data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
- data/lib/eco/version.rb +1 -1
- metadata +67 -3
- data/lib/eco/api/organization/people_analytics.rb +0 -60
@@ -6,11 +6,15 @@ ASSETS.cli.config do |config|
|
|
6
6
|
|
7
7
|
# default rescue
|
8
8
|
wf.rescue do |exception, io|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
9
|
+
begin
|
10
|
+
next io if rescued
|
11
|
+
rescued = true
|
12
|
+
|
13
|
+
io.session.logger.debug(exception.patch_full_message)
|
14
|
+
wf.run(:close, io: io)
|
15
|
+
rescue Exception => e
|
16
|
+
puts "Some problem in workflow.rescue: #{e}"
|
17
|
+
end
|
14
18
|
io
|
15
19
|
end
|
16
20
|
|
@@ -24,7 +28,10 @@ ASSETS.cli.config do |config|
|
|
24
28
|
cases_with_input = config.usecases.active(io: io).select do |usecase, data|
|
25
29
|
io.class.input_required?(usecase.type)
|
26
30
|
end
|
27
|
-
|
31
|
+
|
32
|
+
input_is_required = !cases_with_input.empty? || io.options.dig(:input, :entries_from)
|
33
|
+
missing_input = !io.input || io.input.empty?
|
34
|
+
next io unless missing_input && input_is_required
|
28
35
|
|
29
36
|
if io.options.dig(:input, :entries_from)
|
30
37
|
io = io.new(input: config.input.get(io: io))
|
@@ -46,8 +53,7 @@ ASSETS.cli.config do |config|
|
|
46
53
|
cases_with_people = config.usecases.active(io: io).select do |usecase, data|
|
47
54
|
io.class.people_required?(usecase.type)
|
48
55
|
end
|
49
|
-
|
50
|
-
next io unless !cases_with_people.empty? || get_people
|
56
|
+
next io if cases_with_people.empty? && !io.options.dig(:people, :get)
|
51
57
|
io = io.new(people: config.people(io: io))
|
52
58
|
end
|
53
59
|
|
@@ -60,7 +66,8 @@ ASSETS.cli.config do |config|
|
|
60
66
|
|
61
67
|
wf.before(:usecases) do |wf_cases, io|
|
62
68
|
# save partial entries -> should be native to session.workflow
|
63
|
-
|
69
|
+
get_people = io.options.dig(:people, :get)
|
70
|
+
partial_update = get_people && get_people.dig(:type) == :partial
|
64
71
|
if !io.options[:dry_run] && partial_update
|
65
72
|
partial_file = io.session.config.people.partial_cache
|
66
73
|
io.session.file_manager.save_json(io.people, partial_file, :timestamp)
|
@@ -91,11 +98,12 @@ ASSETS.cli.config do |config|
|
|
91
98
|
if io.session.post_launch.empty?
|
92
99
|
wf_post.skip!
|
93
100
|
else
|
94
|
-
|
101
|
+
get_people = io.options.dig(:people, :get)
|
102
|
+
partial_update = get_people && get_people.dig(:type) == :partial
|
95
103
|
if !io.options[:dry_run] && partial_update
|
96
104
|
# get target people afresh
|
97
105
|
people = io.session.micro.people_refresh(people: io.people, include_created: true)
|
98
|
-
io = io.new(people: people)
|
106
|
+
io = io.base.new(people: people)
|
99
107
|
else
|
100
108
|
wf_post.skip!
|
101
109
|
msg = "Although there are post_launch cases, they will NOT be RUN"
|
@@ -132,7 +140,8 @@ ASSETS.cli.config do |config|
|
|
132
140
|
end
|
133
141
|
|
134
142
|
wf.on(:end) do |wf_end, io|
|
135
|
-
|
143
|
+
get_people = io.options.dig(:people, :get)
|
144
|
+
partial_update = get_people && get_people.dig(:type) == :partial
|
136
145
|
unless !io.options[:end_get] || io.options[:dry_run] || partial_update
|
137
146
|
people = io.session.micro.people_cache
|
138
147
|
io = io.new(people: people)
|
@@ -75,10 +75,10 @@ module Eco
|
|
75
75
|
def get_file(key, required: false, should_exist: true)
|
76
76
|
filename = get_arg(key, with_param: true)
|
77
77
|
if !filename && required
|
78
|
-
puts "You need to specify a file '#{key}
|
78
|
+
puts "You need to specify a file or folder '#{key} file_or_folder'"
|
79
79
|
exit(1)
|
80
80
|
elsif !file_exists?(filename) && should_exist && required
|
81
|
-
puts "This file doesn't exist '#{filename}'"
|
81
|
+
puts "This file/folder doesn't exist '#{filename}'"
|
82
82
|
exit(1)
|
83
83
|
end
|
84
84
|
|
data/lib/eco/csv.rb
CHANGED
@@ -18,8 +18,10 @@ module Eco
|
|
18
18
|
kargs = {headers: true, skip_blanks: true}.merge(kargs)
|
19
19
|
|
20
20
|
args = [file].tap do |arg|
|
21
|
-
|
22
|
-
|
21
|
+
encoding = Eco::API::Common::Session::FileManager.encoding(file)
|
22
|
+
#encoding = (encoding != "utf-8")? "#{encoding}|utf-8": encoding
|
23
|
+
#arg.push(encoding)
|
24
|
+
arg.push("rb:bom|utf-8") if encoding == "bom"
|
23
25
|
end
|
24
26
|
|
25
27
|
out = super(*args, **kargs).reject do |row|
|
data/lib/eco/csv/table.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
module Eco
|
3
2
|
class CSV
|
4
3
|
class Table < ::CSV::Table
|
@@ -9,6 +8,70 @@ module Eco
|
|
9
8
|
super(to_rows_array(input))
|
10
9
|
end
|
11
10
|
|
11
|
+
# @return [Hash] where keys are the groups and the values a `Eco::CSV::Table`
|
12
|
+
def group_by(&block)
|
13
|
+
rows.group_by(&block).transform_values do |rows|
|
14
|
+
self.class.new(rows)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# @return [Eco::CSV::Table]
|
19
|
+
def transform_values
|
20
|
+
transformed_rows = rows.map do |row|
|
21
|
+
res = yield(row)
|
22
|
+
case res
|
23
|
+
when Array
|
24
|
+
::CSV::Row.new(row.headers, res)
|
25
|
+
when ::CSV::Row
|
26
|
+
res
|
27
|
+
end
|
28
|
+
end
|
29
|
+
self.class.new(transformed_rows)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Slices the selected rows
|
33
|
+
# @return [Eco::CSV::Table]
|
34
|
+
def slice(*index)
|
35
|
+
case index.first
|
36
|
+
when Range, Numeric
|
37
|
+
self.class.new(rows.slice(index.first))
|
38
|
+
else
|
39
|
+
self
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# @return [Eco::CSV::Table]
|
44
|
+
def slice_columns(*index)
|
45
|
+
case index.first
|
46
|
+
when Range, Numeric
|
47
|
+
columns_to_table(columns.slice(index.first))
|
48
|
+
when String
|
49
|
+
csv_cols = columns
|
50
|
+
csv_cols = index.each_with_object([]) do |name, cols|
|
51
|
+
col = csv_cols.find {|col| col.first == name}
|
52
|
+
cols << col if col
|
53
|
+
end
|
54
|
+
columns_to_table(csv_cols)
|
55
|
+
else
|
56
|
+
self
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# @return [Eco::CSV::Table]
|
61
|
+
def delete_column(i)
|
62
|
+
csv_cols = columns
|
63
|
+
csv_cols.delete(i)
|
64
|
+
columns_to_table(csv_cols)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Adds a new column at the end
|
68
|
+
# @param header_name [String] header of the new column
|
69
|
+
# @return [Eco::CSV::Table] with a new empty column
|
70
|
+
def add_column(header_name)
|
71
|
+
new_col = Array.new(length).unshift(header_name)
|
72
|
+
columns_to_table(columns.push(new_col))
|
73
|
+
end
|
74
|
+
|
12
75
|
# @return [Array<::CSV::Row>]
|
13
76
|
def rows
|
14
77
|
[].tap do |out|
|
@@ -16,24 +79,40 @@ module Eco
|
|
16
79
|
end
|
17
80
|
end
|
18
81
|
|
82
|
+
# It removes all rows where all columns' values are the same
|
83
|
+
def delete_duplicates!
|
84
|
+
unique_rows = []
|
85
|
+
self.by_row!.delete_if do |row|
|
86
|
+
unique_rows.any? {|done| equal_rows?(row, done)}.tap do |found|
|
87
|
+
unique_rows << row unless found
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# @param row1 [CSV:Row] row to be compared
|
93
|
+
# @param row2 [CSV:Row] row to be compared
|
94
|
+
# @param [Boolean] `true` if all values of `row1` are as of `row2`
|
95
|
+
def equal_rows?(row1, row2)
|
96
|
+
row1.fields.zip(row2.fields).all? do |(v1, v2)|
|
97
|
+
v1 == v2
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
19
101
|
# @return [Integer] total number of rows not including the header
|
20
102
|
def length
|
21
103
|
to_a.length - 1
|
22
104
|
end
|
23
105
|
|
106
|
+
def empty?
|
107
|
+
length < 1
|
108
|
+
end
|
109
|
+
|
24
110
|
# @return [Array<Array>] each array is the column header followed by its values
|
25
111
|
def columns
|
26
112
|
to_a.transpose
|
27
113
|
end
|
28
114
|
|
29
|
-
#
|
30
|
-
# @param header_name [String] header of the new column
|
31
|
-
# @return [Eco::CSV::Table] with a new empty column
|
32
|
-
def add_column(header_name)
|
33
|
-
new_col = Array.new(length).unshift(header_name)
|
34
|
-
columns_to_table(columns.push(new_col))
|
35
|
-
end
|
36
|
-
|
115
|
+
# Creates a single `Hash` where each key, value is a column (header + values)
|
37
116
|
# @note it will override columns with same header name
|
38
117
|
# @return [Hash] keys are headers, values are arrays
|
39
118
|
def columns_hash
|
@@ -42,6 +121,17 @@ module Eco
|
|
42
121
|
end.to_h
|
43
122
|
end
|
44
123
|
|
124
|
+
# Returns an array of row hashes
|
125
|
+
# @note it will override columns with same header
|
126
|
+
def to_a_h
|
127
|
+
rows.map(&:to_h)
|
128
|
+
end
|
129
|
+
|
130
|
+
# @see #to_a_h
|
131
|
+
def to_array_of_hashes
|
132
|
+
to_a_h
|
133
|
+
end
|
134
|
+
|
45
135
|
private
|
46
136
|
|
47
137
|
def columns_to_table(columns_array)
|
@@ -51,24 +141,34 @@ module Eco
|
|
51
141
|
|
52
142
|
def to_rows_array(data)
|
53
143
|
case data
|
54
|
-
when Array
|
55
|
-
return data unless data.length > 0
|
56
|
-
if data.first.is_a?(::CSV::Row)
|
57
|
-
data
|
58
|
-
elsif data.first.is_a?(Array)
|
59
|
-
headers = data.shift
|
60
|
-
data.map do |arr_row|
|
61
|
-
CSV::Row.new(headers, arr_row)
|
62
|
-
end.compact
|
63
|
-
else
|
64
|
-
raise "Expected data that can be transformed into Array<Array>"
|
65
|
-
end
|
66
144
|
when ::CSV::Table
|
67
145
|
to_rows_array(data.to_a)
|
68
146
|
when Hash
|
69
147
|
# hash of columns header as key and column array as value
|
70
148
|
rows_arrays = [a.keys].concat(a.values.first.zip(*a.values[1..-1]))
|
71
149
|
to_rows_array(data.keys)
|
150
|
+
when Enumerable
|
151
|
+
data = data.dup.compact
|
152
|
+
return data unless data.count > 0
|
153
|
+
sample = data.first
|
154
|
+
|
155
|
+
case sample
|
156
|
+
when ::CSV::Row
|
157
|
+
data
|
158
|
+
when Array
|
159
|
+
headers = data.shift
|
160
|
+
data.map do |arr_row|
|
161
|
+
::CSV::Row.new(headers, arr_row)
|
162
|
+
end.compact
|
163
|
+
when Hash
|
164
|
+
headers = sample.keys
|
165
|
+
headers_str = headers.map(&:to_s)
|
166
|
+
data.map do |hash|
|
167
|
+
::CSV::Row.new(headers_str, hash.values_at(*headers))
|
168
|
+
end.compact
|
169
|
+
else
|
170
|
+
raise "Expected data that can be transformed into Array<::CSV::Row>. Given 'Enumerable' of '#{sample.class}'"
|
171
|
+
end
|
72
172
|
else
|
73
173
|
raise "Input type not supported. Given: #{data.class}"
|
74
174
|
end
|
data/lib/eco/data/fuzzy_match.rb
CHANGED
@@ -27,17 +27,29 @@ module Eco
|
|
27
27
|
include CharsPositionScore
|
28
28
|
include NGramsScore
|
29
29
|
|
30
|
-
def jaro_winkler(str1, str2)
|
30
|
+
def jaro_winkler(str1, str2, **options)
|
31
|
+
return 0 if !str1 || !str2
|
31
32
|
options = {
|
32
33
|
ignore_case: true,
|
33
34
|
weight: 0.25
|
34
|
-
}
|
35
|
+
}.merge(options)
|
35
36
|
JaroWinkler.distance(str1, str2, **options)
|
36
37
|
end
|
37
38
|
|
38
39
|
end
|
39
40
|
|
40
41
|
module InstanceMethods
|
42
|
+
FUZZY_MATCH_OPTIONS = [
|
43
|
+
:identities, :groupings, :stop_words, :read,
|
44
|
+
:must_match_grouping, :must_match_at_least_one_word,
|
45
|
+
:gather_last_result, :threshold
|
46
|
+
]
|
47
|
+
|
48
|
+
JARO_OPTIONS = [:ignore_case, :weight]
|
49
|
+
NGRAMS_OPTIONS = [:range]
|
50
|
+
POSITION_OPTIONS = [:max_distance]
|
51
|
+
RESULTS_OPTIONS = [:order, :threshold]
|
52
|
+
|
41
53
|
include StopWords
|
42
54
|
|
43
55
|
attr_accessor :fuzzy_options
|
@@ -46,62 +58,132 @@ module Eco
|
|
46
58
|
@fuzzy_options ||= {}
|
47
59
|
end
|
48
60
|
|
49
|
-
def fuzzy_match(
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
61
|
+
def fuzzy_match(haystack_data = nil, **options)
|
62
|
+
if instance_variable_defined?(:@fuzzy_match) && !haystack_data
|
63
|
+
return @fuzzy_match if fuzzy_match_options == fuzzy_match_options(options)
|
64
|
+
end
|
65
|
+
@fuzzy_options = options
|
54
66
|
# make it run with a native C extension (for better performance: ~130 % increase of performance)
|
55
67
|
::FuzzyMatch.engine = :amatch
|
56
|
-
|
57
|
-
if !fuzzy_read_method && found = items.find {|item| !item.is_a?(String)}
|
58
|
-
raise "To use non String objects as 'haystack' you should provide `read:` or `options[:read]`. Given element: #{found.class}"
|
59
|
-
end
|
60
|
-
end
|
61
|
-
@fuzzy_match = ::FuzzyMatch.new(haystack, fuzzy_options)
|
68
|
+
@fuzzy_match = ::FuzzyMatch.new(haystack(haystack_data), fuzzy_match_options)
|
62
69
|
end
|
63
70
|
|
71
|
+
# TODO: integration for options[:unique_words] => to ensure repeated words do not bring down the score are cut by threshold
|
64
72
|
# @note
|
65
73
|
# - When the `haystack` elements are **non** `String` objects, it excludes the needle itself from the results
|
66
|
-
# @param needle [String, Object] object is allowed when `fuzzy_options` includes `read:` key
|
74
|
+
# @param needle [String, Object] object is allowed when `fuzzy_options` includes `read:` key.
|
75
|
+
# @param needle_str [String, nil] the actual value of needle_str to be used.
|
76
|
+
# @param haystack [Enumerable] the items to find `needle` among.
|
67
77
|
# @return [Eco::Data::FuzzyMatch::Results]
|
68
|
-
def find_all_with_score(needle, **options)
|
69
|
-
|
78
|
+
def find_all_with_score(needle, needle_str: nil, haystack: nil, **options)
|
79
|
+
base_match = fuzzy_match(haystack, **options)
|
80
|
+
match_results = base_match.find_all_with_score(needle_str || needle)
|
81
|
+
needle_str ||= item_string(needle)
|
82
|
+
results = match_results.each_with_object([]) do |fuzzy_results, results|
|
70
83
|
item, dice, lev = fuzzy_results
|
71
84
|
unless item == needle
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
85
|
+
item_str = item_string(item)
|
86
|
+
|
87
|
+
if item_str.to_s.strip.empty? || needle_str.to_s.strip.empty?
|
88
|
+
dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 0
|
89
|
+
end
|
90
|
+
|
91
|
+
jaro_res ||= jaro(needle_str, item_str)
|
92
|
+
ngram_res ||= ngram(needle_str, item_str)
|
93
|
+
wngram_res ||= words_ngram(needle_str, item_str)
|
94
|
+
pos_res ||= position(needle_str, item_str)
|
95
|
+
|
96
|
+
results << Result.new(item, item_str, needle_str, dice, lev, jaro_res, ngram_res, wngram_res, pos_res)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
Results.new(needle, needle_str, results).tap do |res|
|
100
|
+
res.order = fuzzy_options[:order] if fuzzy_options[:order]
|
101
|
+
res.threshold = fuzzy_options[:threshold] if fuzzy_options[:threshold]
|
102
|
+
end.relevant_results
|
103
|
+
end
|
104
|
+
|
105
|
+
def recalculate_results(results, needle_str: nil, **options)
|
106
|
+
raise "You should provide a block |needle_str, item_str, needle, item|" unless block_given?
|
107
|
+
new_results = results.each_with_object([]) do |result, new_results|
|
108
|
+
nstr, istr = yield(needle_str || results.value, result.value, results.needle, result.match)
|
109
|
+
|
110
|
+
if istr.to_s.strip.empty?
|
111
|
+
dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 1
|
112
|
+
elsif nstr.to_s.strip.empty?
|
113
|
+
unless istr = needle_str
|
114
|
+
dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 0
|
115
|
+
end
|
79
116
|
end
|
117
|
+
|
118
|
+
res = ::FuzzyMatch.score_class.new(nstr, istr) unless dice && lev
|
119
|
+
dice ||= res&.dices_coefficient_similar || 0
|
120
|
+
lev ||= res&.levenshtein_similar || 0
|
121
|
+
jaro_res ||= jaro(nstr, istr)
|
122
|
+
ngram_res ||= ngram(nstr, istr)
|
123
|
+
wngram_res ||= words_ngram(nstr, istr)
|
124
|
+
pos_res ||= position(nstr, istr)
|
125
|
+
|
126
|
+
new_results << Result.new(*result.values_at(:match, :value, :needle_str), dice, lev, jaro_res, ngram_res, wngram_res, pos_res)
|
80
127
|
end
|
81
|
-
Results.new(needle,
|
128
|
+
Results.new(results.needle, results.value, new_results).tap do |res|
|
129
|
+
res.order = options[:order] if options[:order]
|
130
|
+
res.threshold = options[:threshold] if options[:threshold]
|
131
|
+
end.relevant_results
|
82
132
|
end
|
83
133
|
|
84
134
|
private
|
85
135
|
|
136
|
+
def jaro(str1, str2)
|
137
|
+
options = fuzzy_options.slice(*JARO_OPTIONS)
|
138
|
+
self.class.jaro_winkler(str1, str2, **options)
|
139
|
+
end
|
140
|
+
|
141
|
+
def ngram(str1, str2)
|
142
|
+
options = { range: 3..5 }.merge(fuzzy_options.slice(*NGRAMS_OPTIONS))
|
143
|
+
self.class.ngrams_score(str1, str2, **options).ratio
|
144
|
+
end
|
145
|
+
|
146
|
+
def words_ngram(str1, str2)
|
147
|
+
options = { range: 3..7 }.merge(fuzzy_options.slice(*NGRAMS_OPTIONS))
|
148
|
+
self.class.words_ngrams_score(str1, str2, **options).ratio
|
149
|
+
end
|
150
|
+
|
151
|
+
def position(str1, str2)
|
152
|
+
options = fuzzy_options.slice(*POSITION_OPTIONS)
|
153
|
+
self.class.chars_position_score(str1, str2, **options).ratio
|
154
|
+
end
|
155
|
+
|
86
156
|
# @note
|
87
157
|
# - When used in an `Enumerable` it will use `to_a`, or `values` if it's a `Hash`
|
88
158
|
# @param data [Enumerable, nil]
|
89
159
|
# @return [Array<Object>] the non-repeated values of `data`
|
90
|
-
def
|
160
|
+
def haystack(data = nil)
|
91
161
|
data = self if self.is_a?(Enumerable) && !data
|
92
162
|
raise "'data' should be an Enumerable. Given: #{data.class}" unless data.is_a?(Enumerable)
|
93
163
|
data = self.is_a?(Hash) ? self.values.flatten : to_a.flatten
|
94
|
-
data.uniq.compact
|
164
|
+
data.uniq.compact.tap do |items|
|
165
|
+
if !fuzzy_read_method && found = items.find {|item| !item.is_a?(String)}
|
166
|
+
raise "To use non String objects as 'haystack' you should provide `read:` or `options[:read]`. Given element: #{found.class}"
|
167
|
+
end
|
168
|
+
end
|
95
169
|
end
|
96
170
|
|
97
171
|
def item_string(item, attr = fuzzy_read_method)
|
98
172
|
return item if !item || item.is_a?(String) || !attr
|
173
|
+
return attr.call(item) if attr.is_a?(Proc)
|
99
174
|
attr = attr.to_sym
|
100
175
|
return item.send(attr) if item.respond_to?(attr)
|
101
176
|
end
|
102
177
|
|
178
|
+
def fuzzy_match_options(options = nil)
|
179
|
+
options = fuzzy_options unless options
|
180
|
+
options.slice(*FUZZY_MATCH_OPTIONS).merge({
|
181
|
+
stop_words: PREPOSITIONS + PRONOUNS + ARTICLES
|
182
|
+
})
|
183
|
+
end
|
184
|
+
|
103
185
|
def fuzzy_read_method
|
104
|
-
|
186
|
+
fuzzy_match_options[:read]
|
105
187
|
end
|
106
188
|
|
107
189
|
end
|