eco-helpers 2.0.18 → 2.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +80 -1
- data/eco-helpers.gemspec +4 -1
- data/lib/eco/api/common/base_loader.rb +9 -5
- data/lib/eco/api/common/loaders/parser.rb +1 -0
- data/lib/eco/api/common/people/default_parsers.rb +1 -0
- data/lib/eco/api/common/people/default_parsers/xls_parser.rb +53 -0
- data/lib/eco/api/common/people/entries.rb +1 -0
- data/lib/eco/api/common/people/entry_factory.rb +88 -23
- data/lib/eco/api/common/people/person_entry.rb +1 -0
- data/lib/eco/api/common/people/person_parser.rb +1 -1
- data/lib/eco/api/common/session.rb +1 -0
- data/lib/eco/api/common/session/base_session.rb +2 -0
- data/lib/eco/api/common/session/helpers.rb +30 -0
- data/lib/eco/api/common/session/helpers/prompt_user.rb +34 -0
- data/lib/eco/api/common/version_patches/ecoportal_api/external_person.rb +1 -1
- data/lib/eco/api/common/version_patches/ecoportal_api/internal_person.rb +7 -4
- data/lib/eco/api/common/version_patches/exception.rb +5 -2
- data/lib/eco/api/microcases/with_each.rb +67 -6
- data/lib/eco/api/microcases/with_each_present.rb +4 -2
- data/lib/eco/api/microcases/with_each_starter.rb +4 -2
- data/lib/eco/api/organization.rb +1 -1
- data/lib/eco/api/organization/people.rb +94 -25
- data/lib/eco/api/organization/people_similarity.rb +272 -0
- data/lib/eco/api/organization/person_schemas.rb +5 -1
- data/lib/eco/api/organization/policy_groups.rb +5 -1
- data/lib/eco/api/organization/tag_tree.rb +33 -0
- data/lib/eco/api/session.rb +19 -8
- data/lib/eco/api/session/batch.rb +7 -5
- data/lib/eco/api/session/batch/job.rb +34 -9
- data/lib/eco/api/usecases.rb +2 -2
- data/lib/eco/api/usecases/base_case.rb +2 -2
- data/lib/eco/api/usecases/base_io.rb +17 -4
- data/lib/eco/api/usecases/default_cases.rb +1 -0
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +179 -32
- data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
- data/lib/eco/api/usecases/default_cases/to_csv_case.rb +81 -36
- data/lib/eco/api/usecases/default_cases/to_csv_detailed_case.rb +3 -4
- data/lib/eco/api/usecases/ooze_samples/ooze_update_case.rb +3 -2
- data/lib/eco/cli/config/default/input.rb +61 -8
- data/lib/eco/cli/config/default/options.rb +47 -2
- data/lib/eco/cli/config/default/people.rb +18 -24
- data/lib/eco/cli/config/default/usecases.rb +33 -2
- data/lib/eco/cli/config/default/workflow.rb +12 -7
- data/lib/eco/cli/scripting/args_helpers.rb +2 -2
- data/lib/eco/csv.rb +4 -2
- data/lib/eco/csv/table.rb +121 -21
- data/lib/eco/data/fuzzy_match.rb +109 -27
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +19 -10
- data/lib/eco/data/fuzzy_match/pairing.rb +12 -19
- data/lib/eco/data/fuzzy_match/result.rb +22 -2
- data/lib/eco/data/fuzzy_match/results.rb +30 -6
- data/lib/eco/data/fuzzy_match/score.rb +12 -7
- data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
- data/lib/eco/version.rb +1 -1
- metadata +67 -3
- data/lib/eco/api/organization/people_analytics.rb +0 -60
@@ -28,7 +28,10 @@ ASSETS.cli.config do |config|
|
|
28
28
|
cases_with_input = config.usecases.active(io: io).select do |usecase, data|
|
29
29
|
io.class.input_required?(usecase.type)
|
30
30
|
end
|
31
|
-
|
31
|
+
|
32
|
+
input_is_required = !cases_with_input.empty? || io.options.dig(:input, :entries_from)
|
33
|
+
missing_input = !io.input || io.input.empty?
|
34
|
+
next io unless missing_input && input_is_required
|
32
35
|
|
33
36
|
if io.options.dig(:input, :entries_from)
|
34
37
|
io = io.new(input: config.input.get(io: io))
|
@@ -50,8 +53,7 @@ ASSETS.cli.config do |config|
|
|
50
53
|
cases_with_people = config.usecases.active(io: io).select do |usecase, data|
|
51
54
|
io.class.people_required?(usecase.type)
|
52
55
|
end
|
53
|
-
|
54
|
-
next io unless !cases_with_people.empty? || get_people
|
56
|
+
next io if cases_with_people.empty? && !io.options.dig(:people, :get)
|
55
57
|
io = io.new(people: config.people(io: io))
|
56
58
|
end
|
57
59
|
|
@@ -64,7 +66,8 @@ ASSETS.cli.config do |config|
|
|
64
66
|
|
65
67
|
wf.before(:usecases) do |wf_cases, io|
|
66
68
|
# save partial entries -> should be native to session.workflow
|
67
|
-
|
69
|
+
get_people = io.options.dig(:people, :get)
|
70
|
+
partial_update = get_people && get_people.dig(:type) == :partial
|
68
71
|
if !io.options[:dry_run] && partial_update
|
69
72
|
partial_file = io.session.config.people.partial_cache
|
70
73
|
io.session.file_manager.save_json(io.people, partial_file, :timestamp)
|
@@ -95,11 +98,12 @@ ASSETS.cli.config do |config|
|
|
95
98
|
if io.session.post_launch.empty?
|
96
99
|
wf_post.skip!
|
97
100
|
else
|
98
|
-
|
101
|
+
get_people = io.options.dig(:people, :get)
|
102
|
+
partial_update = get_people && get_people.dig(:type) == :partial
|
99
103
|
if !io.options[:dry_run] && partial_update
|
100
104
|
# get target people afresh
|
101
105
|
people = io.session.micro.people_refresh(people: io.people, include_created: true)
|
102
|
-
io = io.new(people: people)
|
106
|
+
io = io.base.new(people: people)
|
103
107
|
else
|
104
108
|
wf_post.skip!
|
105
109
|
msg = "Although there are post_launch cases, they will NOT be RUN"
|
@@ -136,7 +140,8 @@ ASSETS.cli.config do |config|
|
|
136
140
|
end
|
137
141
|
|
138
142
|
wf.on(:end) do |wf_end, io|
|
139
|
-
|
143
|
+
get_people = io.options.dig(:people, :get)
|
144
|
+
partial_update = get_people && get_people.dig(:type) == :partial
|
140
145
|
unless !io.options[:end_get] || io.options[:dry_run] || partial_update
|
141
146
|
people = io.session.micro.people_cache
|
142
147
|
io = io.new(people: people)
|
@@ -75,10 +75,10 @@ module Eco
|
|
75
75
|
def get_file(key, required: false, should_exist: true)
|
76
76
|
filename = get_arg(key, with_param: true)
|
77
77
|
if !filename && required
|
78
|
-
puts "You need to specify a file '#{key}
|
78
|
+
puts "You need to specify a file or folder '#{key} file_or_folder'"
|
79
79
|
exit(1)
|
80
80
|
elsif !file_exists?(filename) && should_exist && required
|
81
|
-
puts "This file doesn't exist '#{filename}'"
|
81
|
+
puts "This file/folder doesn't exist '#{filename}'"
|
82
82
|
exit(1)
|
83
83
|
end
|
84
84
|
|
data/lib/eco/csv.rb
CHANGED
@@ -18,8 +18,10 @@ module Eco
|
|
18
18
|
kargs = {headers: true, skip_blanks: true}.merge(kargs)
|
19
19
|
|
20
20
|
args = [file].tap do |arg|
|
21
|
-
|
22
|
-
|
21
|
+
encoding = Eco::API::Common::Session::FileManager.encoding(file)
|
22
|
+
#encoding = (encoding != "utf-8")? "#{encoding}|utf-8": encoding
|
23
|
+
#arg.push(encoding)
|
24
|
+
arg.push("rb:bom|utf-8") if encoding == "bom"
|
23
25
|
end
|
24
26
|
|
25
27
|
out = super(*args, **kargs).reject do |row|
|
data/lib/eco/csv/table.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
module Eco
|
3
2
|
class CSV
|
4
3
|
class Table < ::CSV::Table
|
@@ -9,6 +8,70 @@ module Eco
|
|
9
8
|
super(to_rows_array(input))
|
10
9
|
end
|
11
10
|
|
11
|
+
# @return [Hash] where keys are the groups and the values a `Eco::CSV::Table`
|
12
|
+
def group_by(&block)
|
13
|
+
rows.group_by(&block).transform_values do |rows|
|
14
|
+
self.class.new(rows)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# @return [Eco::CSV::Table]
|
19
|
+
def transform_values
|
20
|
+
transformed_rows = rows.map do |row|
|
21
|
+
res = yield(row)
|
22
|
+
case res
|
23
|
+
when Array
|
24
|
+
::CSV::Row.new(row.headers, res)
|
25
|
+
when ::CSV::Row
|
26
|
+
res
|
27
|
+
end
|
28
|
+
end
|
29
|
+
self.class.new(transformed_rows)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Slices the selected rows
|
33
|
+
# @return [Eco::CSV::Table]
|
34
|
+
def slice(*index)
|
35
|
+
case index.first
|
36
|
+
when Range, Numeric
|
37
|
+
self.class.new(rows.slice(index.first))
|
38
|
+
else
|
39
|
+
self
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# @return [Eco::CSV::Table]
|
44
|
+
def slice_columns(*index)
|
45
|
+
case index.first
|
46
|
+
when Range, Numeric
|
47
|
+
columns_to_table(columns.slice(index.first))
|
48
|
+
when String
|
49
|
+
csv_cols = columns
|
50
|
+
csv_cols = index.each_with_object([]) do |name, cols|
|
51
|
+
col = csv_cols.find {|col| col.first == name}
|
52
|
+
cols << col if col
|
53
|
+
end
|
54
|
+
columns_to_table(csv_cols)
|
55
|
+
else
|
56
|
+
self
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# @return [Eco::CSV::Table]
|
61
|
+
def delete_column(i)
|
62
|
+
csv_cols = columns
|
63
|
+
csv_cols.delete(i)
|
64
|
+
columns_to_table(csv_cols)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Adds a new column at the end
|
68
|
+
# @param header_name [String] header of the new column
|
69
|
+
# @return [Eco::CSV::Table] with a new empty column
|
70
|
+
def add_column(header_name)
|
71
|
+
new_col = Array.new(length).unshift(header_name)
|
72
|
+
columns_to_table(columns.push(new_col))
|
73
|
+
end
|
74
|
+
|
12
75
|
# @return [Array<::CSV::Row>]
|
13
76
|
def rows
|
14
77
|
[].tap do |out|
|
@@ -16,24 +79,40 @@ module Eco
|
|
16
79
|
end
|
17
80
|
end
|
18
81
|
|
82
|
+
# It removes all rows where all columns' values are the same
|
83
|
+
def delete_duplicates!
|
84
|
+
unique_rows = []
|
85
|
+
self.by_row!.delete_if do |row|
|
86
|
+
unique_rows.any? {|done| equal_rows?(row, done)}.tap do |found|
|
87
|
+
unique_rows << row unless found
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# @param row1 [CSV:Row] row to be compared
|
93
|
+
# @param row2 [CSV:Row] row to be compared
|
94
|
+
# @param [Boolean] `true` if all values of `row1` are as of `row2`
|
95
|
+
def equal_rows?(row1, row2)
|
96
|
+
row1.fields.zip(row2.fields).all? do |(v1, v2)|
|
97
|
+
v1 == v2
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
19
101
|
# @return [Integer] total number of rows not including the header
|
20
102
|
def length
|
21
103
|
to_a.length - 1
|
22
104
|
end
|
23
105
|
|
106
|
+
def empty?
|
107
|
+
length < 1
|
108
|
+
end
|
109
|
+
|
24
110
|
# @return [Array<Array>] each array is the column header followed by its values
|
25
111
|
def columns
|
26
112
|
to_a.transpose
|
27
113
|
end
|
28
114
|
|
29
|
-
#
|
30
|
-
# @param header_name [String] header of the new column
|
31
|
-
# @return [Eco::CSV::Table] with a new empty column
|
32
|
-
def add_column(header_name)
|
33
|
-
new_col = Array.new(length).unshift(header_name)
|
34
|
-
columns_to_table(columns.push(new_col))
|
35
|
-
end
|
36
|
-
|
115
|
+
# Creates a single `Hash` where each key, value is a column (header + values)
|
37
116
|
# @note it will override columns with same header name
|
38
117
|
# @return [Hash] keys are headers, values are arrays
|
39
118
|
def columns_hash
|
@@ -42,6 +121,17 @@ module Eco
|
|
42
121
|
end.to_h
|
43
122
|
end
|
44
123
|
|
124
|
+
# Returns an array of row hashes
|
125
|
+
# @note it will override columns with same header
|
126
|
+
def to_a_h
|
127
|
+
rows.map(&:to_h)
|
128
|
+
end
|
129
|
+
|
130
|
+
# @see #to_a_h
|
131
|
+
def to_array_of_hashes
|
132
|
+
to_a_h
|
133
|
+
end
|
134
|
+
|
45
135
|
private
|
46
136
|
|
47
137
|
def columns_to_table(columns_array)
|
@@ -51,24 +141,34 @@ module Eco
|
|
51
141
|
|
52
142
|
def to_rows_array(data)
|
53
143
|
case data
|
54
|
-
when Array
|
55
|
-
return data unless data.length > 0
|
56
|
-
if data.first.is_a?(::CSV::Row)
|
57
|
-
data
|
58
|
-
elsif data.first.is_a?(Array)
|
59
|
-
headers = data.shift
|
60
|
-
data.map do |arr_row|
|
61
|
-
CSV::Row.new(headers, arr_row)
|
62
|
-
end.compact
|
63
|
-
else
|
64
|
-
raise "Expected data that can be transformed into Array<Array>"
|
65
|
-
end
|
66
144
|
when ::CSV::Table
|
67
145
|
to_rows_array(data.to_a)
|
68
146
|
when Hash
|
69
147
|
# hash of columns header as key and column array as value
|
70
148
|
rows_arrays = [a.keys].concat(a.values.first.zip(*a.values[1..-1]))
|
71
149
|
to_rows_array(data.keys)
|
150
|
+
when Enumerable
|
151
|
+
data = data.dup.compact
|
152
|
+
return data unless data.count > 0
|
153
|
+
sample = data.first
|
154
|
+
|
155
|
+
case sample
|
156
|
+
when ::CSV::Row
|
157
|
+
data
|
158
|
+
when Array
|
159
|
+
headers = data.shift
|
160
|
+
data.map do |arr_row|
|
161
|
+
::CSV::Row.new(headers, arr_row)
|
162
|
+
end.compact
|
163
|
+
when Hash
|
164
|
+
headers = sample.keys
|
165
|
+
headers_str = headers.map(&:to_s)
|
166
|
+
data.map do |hash|
|
167
|
+
::CSV::Row.new(headers_str, hash.values_at(*headers))
|
168
|
+
end.compact
|
169
|
+
else
|
170
|
+
raise "Expected data that can be transformed into Array<::CSV::Row>. Given 'Enumerable' of '#{sample.class}'"
|
171
|
+
end
|
72
172
|
else
|
73
173
|
raise "Input type not supported. Given: #{data.class}"
|
74
174
|
end
|
data/lib/eco/data/fuzzy_match.rb
CHANGED
@@ -27,17 +27,29 @@ module Eco
|
|
27
27
|
include CharsPositionScore
|
28
28
|
include NGramsScore
|
29
29
|
|
30
|
-
def jaro_winkler(str1, str2)
|
30
|
+
def jaro_winkler(str1, str2, **options)
|
31
|
+
return 0 if !str1 || !str2
|
31
32
|
options = {
|
32
33
|
ignore_case: true,
|
33
34
|
weight: 0.25
|
34
|
-
}
|
35
|
+
}.merge(options)
|
35
36
|
JaroWinkler.distance(str1, str2, **options)
|
36
37
|
end
|
37
38
|
|
38
39
|
end
|
39
40
|
|
40
41
|
module InstanceMethods
|
42
|
+
FUZZY_MATCH_OPTIONS = [
|
43
|
+
:identities, :groupings, :stop_words, :read,
|
44
|
+
:must_match_grouping, :must_match_at_least_one_word,
|
45
|
+
:gather_last_result, :threshold
|
46
|
+
]
|
47
|
+
|
48
|
+
JARO_OPTIONS = [:ignore_case, :weight]
|
49
|
+
NGRAMS_OPTIONS = [:range]
|
50
|
+
POSITION_OPTIONS = [:max_distance]
|
51
|
+
RESULTS_OPTIONS = [:order, :threshold]
|
52
|
+
|
41
53
|
include StopWords
|
42
54
|
|
43
55
|
attr_accessor :fuzzy_options
|
@@ -46,62 +58,132 @@ module Eco
|
|
46
58
|
@fuzzy_options ||= {}
|
47
59
|
end
|
48
60
|
|
49
|
-
def fuzzy_match(
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
61
|
+
def fuzzy_match(haystack_data = nil, **options)
|
62
|
+
if instance_variable_defined?(:@fuzzy_match) && !haystack_data
|
63
|
+
return @fuzzy_match if fuzzy_match_options == fuzzy_match_options(options)
|
64
|
+
end
|
65
|
+
@fuzzy_options = options
|
54
66
|
# make it run with a native C extension (for better performance: ~130 % increase of performance)
|
55
67
|
::FuzzyMatch.engine = :amatch
|
56
|
-
|
57
|
-
if !fuzzy_read_method && found = items.find {|item| !item.is_a?(String)}
|
58
|
-
raise "To use non String objects as 'haystack' you should provide `read:` or `options[:read]`. Given element: #{found.class}"
|
59
|
-
end
|
60
|
-
end
|
61
|
-
@fuzzy_match = ::FuzzyMatch.new(haystack, fuzzy_options)
|
68
|
+
@fuzzy_match = ::FuzzyMatch.new(haystack(haystack_data), fuzzy_match_options)
|
62
69
|
end
|
63
70
|
|
71
|
+
# TODO: integration for options[:unique_words] => to ensure repeated words do not bring down the score are cut by threshold
|
64
72
|
# @note
|
65
73
|
# - When the `haystack` elements are **non** `String` objects, it excludes the needle itself from the results
|
66
|
-
# @param needle [String, Object] object is allowed when `fuzzy_options` includes `read:` key
|
74
|
+
# @param needle [String, Object] object is allowed when `fuzzy_options` includes `read:` key.
|
75
|
+
# @param needle_str [String, nil] the actual value of needle_str to be used.
|
76
|
+
# @param haystack [Enumerable] the items to find `needle` among.
|
67
77
|
# @return [Eco::Data::FuzzyMatch::Results]
|
68
|
-
def find_all_with_score(needle, **options)
|
69
|
-
|
78
|
+
def find_all_with_score(needle, needle_str: nil, haystack: nil, **options)
|
79
|
+
base_match = fuzzy_match(haystack, **options)
|
80
|
+
match_results = base_match.find_all_with_score(needle_str || needle)
|
81
|
+
needle_str ||= item_string(needle)
|
82
|
+
results = match_results.each_with_object([]) do |fuzzy_results, results|
|
70
83
|
item, dice, lev = fuzzy_results
|
71
84
|
unless item == needle
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
85
|
+
item_str = item_string(item)
|
86
|
+
|
87
|
+
if item_str.to_s.strip.empty? || needle_str.to_s.strip.empty?
|
88
|
+
dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 0
|
89
|
+
end
|
90
|
+
|
91
|
+
jaro_res ||= jaro(needle_str, item_str)
|
92
|
+
ngram_res ||= ngram(needle_str, item_str)
|
93
|
+
wngram_res ||= words_ngram(needle_str, item_str)
|
94
|
+
pos_res ||= position(needle_str, item_str)
|
95
|
+
|
96
|
+
results << Result.new(item, item_str, needle_str, dice, lev, jaro_res, ngram_res, wngram_res, pos_res)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
Results.new(needle, needle_str, results).tap do |res|
|
100
|
+
res.order = fuzzy_options[:order] if fuzzy_options[:order]
|
101
|
+
res.threshold = fuzzy_options[:threshold] if fuzzy_options[:threshold]
|
102
|
+
end.relevant_results
|
103
|
+
end
|
104
|
+
|
105
|
+
def recalculate_results(results, needle_str: nil, **options)
|
106
|
+
raise "You should provide a block |needle_str, item_str, needle, item|" unless block_given?
|
107
|
+
new_results = results.each_with_object([]) do |result, new_results|
|
108
|
+
nstr, istr = yield(needle_str || results.value, result.value, results.needle, result.match)
|
109
|
+
|
110
|
+
if istr.to_s.strip.empty?
|
111
|
+
dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 1
|
112
|
+
elsif nstr.to_s.strip.empty?
|
113
|
+
unless istr = needle_str
|
114
|
+
dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 0
|
115
|
+
end
|
79
116
|
end
|
117
|
+
|
118
|
+
res = ::FuzzyMatch.score_class.new(nstr, istr) unless dice && lev
|
119
|
+
dice ||= res&.dices_coefficient_similar || 0
|
120
|
+
lev ||= res&.levenshtein_similar || 0
|
121
|
+
jaro_res ||= jaro(nstr, istr)
|
122
|
+
ngram_res ||= ngram(nstr, istr)
|
123
|
+
wngram_res ||= words_ngram(nstr, istr)
|
124
|
+
pos_res ||= position(nstr, istr)
|
125
|
+
|
126
|
+
new_results << Result.new(*result.values_at(:match, :value, :needle_str), dice, lev, jaro_res, ngram_res, wngram_res, pos_res)
|
80
127
|
end
|
81
|
-
Results.new(needle,
|
128
|
+
Results.new(results.needle, results.value, new_results).tap do |res|
|
129
|
+
res.order = options[:order] if options[:order]
|
130
|
+
res.threshold = options[:threshold] if options[:threshold]
|
131
|
+
end.relevant_results
|
82
132
|
end
|
83
133
|
|
84
134
|
private
|
85
135
|
|
136
|
+
def jaro(str1, str2)
|
137
|
+
options = fuzzy_options.slice(*JARO_OPTIONS)
|
138
|
+
self.class.jaro_winkler(str1, str2, **options)
|
139
|
+
end
|
140
|
+
|
141
|
+
def ngram(str1, str2)
|
142
|
+
options = { range: 3..5 }.merge(fuzzy_options.slice(*NGRAMS_OPTIONS))
|
143
|
+
self.class.ngrams_score(str1, str2, **options).ratio
|
144
|
+
end
|
145
|
+
|
146
|
+
def words_ngram(str1, str2)
|
147
|
+
options = { range: 3..7 }.merge(fuzzy_options.slice(*NGRAMS_OPTIONS))
|
148
|
+
self.class.words_ngrams_score(str1, str2, **options).ratio
|
149
|
+
end
|
150
|
+
|
151
|
+
def position(str1, str2)
|
152
|
+
options = fuzzy_options.slice(*POSITION_OPTIONS)
|
153
|
+
self.class.chars_position_score(str1, str2, **options).ratio
|
154
|
+
end
|
155
|
+
|
86
156
|
# @note
|
87
157
|
# - When used in an `Enumerable` it will use `to_a`, or `values` if it's a `Hash`
|
88
158
|
# @param data [Enumerable, nil]
|
89
159
|
# @return [Array<Object>] the non-repeated values of `data`
|
90
|
-
def
|
160
|
+
def haystack(data = nil)
|
91
161
|
data = self if self.is_a?(Enumerable) && !data
|
92
162
|
raise "'data' should be an Enumerable. Given: #{data.class}" unless data.is_a?(Enumerable)
|
93
163
|
data = self.is_a?(Hash) ? self.values.flatten : to_a.flatten
|
94
|
-
data.uniq.compact
|
164
|
+
data.uniq.compact.tap do |items|
|
165
|
+
if !fuzzy_read_method && found = items.find {|item| !item.is_a?(String)}
|
166
|
+
raise "To use non String objects as 'haystack' you should provide `read:` or `options[:read]`. Given element: #{found.class}"
|
167
|
+
end
|
168
|
+
end
|
95
169
|
end
|
96
170
|
|
97
171
|
def item_string(item, attr = fuzzy_read_method)
|
98
172
|
return item if !item || item.is_a?(String) || !attr
|
173
|
+
return attr.call(item) if attr.is_a?(Proc)
|
99
174
|
attr = attr.to_sym
|
100
175
|
return item.send(attr) if item.respond_to?(attr)
|
101
176
|
end
|
102
177
|
|
178
|
+
def fuzzy_match_options(options = nil)
|
179
|
+
options = fuzzy_options unless options
|
180
|
+
options.slice(*FUZZY_MATCH_OPTIONS).merge({
|
181
|
+
stop_words: PREPOSITIONS + PRONOUNS + ARTICLES
|
182
|
+
})
|
183
|
+
end
|
184
|
+
|
103
185
|
def fuzzy_read_method
|
104
|
-
|
186
|
+
fuzzy_match_options[:read]
|
105
187
|
end
|
106
188
|
|
107
189
|
end
|