eco-helpers 2.0.19 → 2.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -1
- data/eco-helpers.gemspec +5 -1
- data/lib/eco/api/common/loaders/parser.rb +1 -0
- data/lib/eco/api/common/people/entries.rb +1 -0
- data/lib/eco/api/common/people/entry_factory.rb +49 -15
- data/lib/eco/api/common/version_patches/exception.rb +5 -2
- data/lib/eco/api/organization/people.rb +2 -2
- data/lib/eco/api/organization/people_similarity.rb +171 -11
- data/lib/eco/api/organization/tag_tree.rb +33 -0
- data/lib/eco/api/session.rb +4 -2
- data/lib/eco/api/usecases/default_cases.rb +1 -0
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +189 -19
- data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
- data/lib/eco/cli/config/default/options.rb +29 -1
- data/lib/eco/cli/config/default/people.rb +18 -24
- data/lib/eco/cli/config/default/usecases.rb +31 -2
- data/lib/eco/cli/config/default/workflow.rb +7 -5
- data/lib/eco/csv/table.rb +121 -21
- data/lib/eco/data/fuzzy_match.rb +52 -12
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +13 -9
- data/lib/eco/data/fuzzy_match/pairing.rb +12 -18
- data/lib/eco/data/fuzzy_match/result.rb +15 -1
- data/lib/eco/data/fuzzy_match/results.rb +18 -0
- data/lib/eco/data/fuzzy_match/score.rb +12 -7
- data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
- data/lib/eco/version.rb +1 -1
- metadata +83 -2
@@ -53,8 +53,7 @@ ASSETS.cli.config do |config|
|
|
53
53
|
cases_with_people = config.usecases.active(io: io).select do |usecase, data|
|
54
54
|
io.class.people_required?(usecase.type)
|
55
55
|
end
|
56
|
-
|
57
|
-
next io unless !cases_with_people.empty? || get_people
|
56
|
+
next io if cases_with_people.empty? && !io.options.dig(:people, :get)
|
58
57
|
io = io.new(people: config.people(io: io))
|
59
58
|
end
|
60
59
|
|
@@ -67,7 +66,8 @@ ASSETS.cli.config do |config|
|
|
67
66
|
|
68
67
|
wf.before(:usecases) do |wf_cases, io|
|
69
68
|
# save partial entries -> should be native to session.workflow
|
70
|
-
|
69
|
+
get_people = io.options.dig(:people, :get)
|
70
|
+
partial_update = get_people && get_people.dig(:type) == :partial
|
71
71
|
if !io.options[:dry_run] && partial_update
|
72
72
|
partial_file = io.session.config.people.partial_cache
|
73
73
|
io.session.file_manager.save_json(io.people, partial_file, :timestamp)
|
@@ -98,7 +98,8 @@ ASSETS.cli.config do |config|
|
|
98
98
|
if io.session.post_launch.empty?
|
99
99
|
wf_post.skip!
|
100
100
|
else
|
101
|
-
|
101
|
+
get_people = io.options.dig(:people, :get)
|
102
|
+
partial_update = get_people && get_people.dig(:type) == :partial
|
102
103
|
if !io.options[:dry_run] && partial_update
|
103
104
|
# get target people afresh
|
104
105
|
people = io.session.micro.people_refresh(people: io.people, include_created: true)
|
@@ -139,7 +140,8 @@ ASSETS.cli.config do |config|
|
|
139
140
|
end
|
140
141
|
|
141
142
|
wf.on(:end) do |wf_end, io|
|
142
|
-
|
143
|
+
get_people = io.options.dig(:people, :get)
|
144
|
+
partial_update = get_people && get_people.dig(:type) == :partial
|
143
145
|
unless !io.options[:end_get] || io.options[:dry_run] || partial_update
|
144
146
|
people = io.session.micro.people_cache
|
145
147
|
io = io.new(people: people)
|
data/lib/eco/csv/table.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
module Eco
|
3
2
|
class CSV
|
4
3
|
class Table < ::CSV::Table
|
@@ -9,6 +8,70 @@ module Eco
|
|
9
8
|
super(to_rows_array(input))
|
10
9
|
end
|
11
10
|
|
11
|
+
# @return [Hash] where keys are the groups and the values a `Eco::CSV::Table`
|
12
|
+
def group_by(&block)
|
13
|
+
rows.group_by(&block).transform_values do |rows|
|
14
|
+
self.class.new(rows)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# @return [Eco::CSV::Table]
|
19
|
+
def transform_values
|
20
|
+
transformed_rows = rows.map do |row|
|
21
|
+
res = yield(row)
|
22
|
+
case res
|
23
|
+
when Array
|
24
|
+
::CSV::Row.new(row.headers, res)
|
25
|
+
when ::CSV::Row
|
26
|
+
res
|
27
|
+
end
|
28
|
+
end
|
29
|
+
self.class.new(transformed_rows)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Slices the selected rows
|
33
|
+
# @return [Eco::CSV::Table]
|
34
|
+
def slice(*index)
|
35
|
+
case index.first
|
36
|
+
when Range, Numeric
|
37
|
+
self.class.new(rows.slice(index.first))
|
38
|
+
else
|
39
|
+
self
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# @return [Eco::CSV::Table]
|
44
|
+
def slice_columns(*index)
|
45
|
+
case index.first
|
46
|
+
when Range, Numeric
|
47
|
+
columns_to_table(columns.slice(index.first))
|
48
|
+
when String
|
49
|
+
csv_cols = columns
|
50
|
+
csv_cols = index.each_with_object([]) do |name, cols|
|
51
|
+
col = csv_cols.find {|col| col.first == name}
|
52
|
+
cols << col if col
|
53
|
+
end
|
54
|
+
columns_to_table(csv_cols)
|
55
|
+
else
|
56
|
+
self
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# @return [Eco::CSV::Table]
|
61
|
+
def delete_column(i)
|
62
|
+
csv_cols = columns
|
63
|
+
csv_cols.delete(i)
|
64
|
+
columns_to_table(csv_cols)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Adds a new column at the end
|
68
|
+
# @param header_name [String] header of the new column
|
69
|
+
# @return [Eco::CSV::Table] with a new empty column
|
70
|
+
def add_column(header_name)
|
71
|
+
new_col = Array.new(length).unshift(header_name)
|
72
|
+
columns_to_table(columns.push(new_col))
|
73
|
+
end
|
74
|
+
|
12
75
|
# @return [Array<::CSV::Row>]
|
13
76
|
def rows
|
14
77
|
[].tap do |out|
|
@@ -16,24 +79,40 @@ module Eco
|
|
16
79
|
end
|
17
80
|
end
|
18
81
|
|
82
|
+
# It removes all rows where all columns' values are the same
|
83
|
+
def delete_duplicates!
|
84
|
+
unique_rows = []
|
85
|
+
self.by_row!.delete_if do |row|
|
86
|
+
unique_rows.any? {|done| equal_rows?(row, done)}.tap do |found|
|
87
|
+
unique_rows << row unless found
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# @param row1 [CSV:Row] row to be compared
|
93
|
+
# @param row2 [CSV:Row] row to be compared
|
94
|
+
# @param [Boolean] `true` if all values of `row1` are as of `row2`
|
95
|
+
def equal_rows?(row1, row2)
|
96
|
+
row1.fields.zip(row2.fields).all? do |(v1, v2)|
|
97
|
+
v1 == v2
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
19
101
|
# @return [Integer] total number of rows not including the header
|
20
102
|
def length
|
21
103
|
to_a.length - 1
|
22
104
|
end
|
23
105
|
|
106
|
+
def empty?
|
107
|
+
length < 1
|
108
|
+
end
|
109
|
+
|
24
110
|
# @return [Array<Array>] each array is the column header followed by its values
|
25
111
|
def columns
|
26
112
|
to_a.transpose
|
27
113
|
end
|
28
114
|
|
29
|
-
#
|
30
|
-
# @param header_name [String] header of the new column
|
31
|
-
# @return [Eco::CSV::Table] with a new empty column
|
32
|
-
def add_column(header_name)
|
33
|
-
new_col = Array.new(length).unshift(header_name)
|
34
|
-
columns_to_table(columns.push(new_col))
|
35
|
-
end
|
36
|
-
|
115
|
+
# Creates a single `Hash` where each key, value is a column (header + values)
|
37
116
|
# @note it will override columns with same header name
|
38
117
|
# @return [Hash] keys are headers, values are arrays
|
39
118
|
def columns_hash
|
@@ -42,6 +121,17 @@ module Eco
|
|
42
121
|
end.to_h
|
43
122
|
end
|
44
123
|
|
124
|
+
# Returns an array of row hashes
|
125
|
+
# @note it will override columns with same header
|
126
|
+
def to_a_h
|
127
|
+
rows.map(&:to_h)
|
128
|
+
end
|
129
|
+
|
130
|
+
# @see #to_a_h
|
131
|
+
def to_array_of_hashes
|
132
|
+
to_a_h
|
133
|
+
end
|
134
|
+
|
45
135
|
private
|
46
136
|
|
47
137
|
def columns_to_table(columns_array)
|
@@ -51,24 +141,34 @@ module Eco
|
|
51
141
|
|
52
142
|
def to_rows_array(data)
|
53
143
|
case data
|
54
|
-
when Array
|
55
|
-
return data unless data.length > 0
|
56
|
-
if data.first.is_a?(::CSV::Row)
|
57
|
-
data
|
58
|
-
elsif data.first.is_a?(Array)
|
59
|
-
headers = data.shift
|
60
|
-
data.map do |arr_row|
|
61
|
-
CSV::Row.new(headers, arr_row)
|
62
|
-
end.compact
|
63
|
-
else
|
64
|
-
raise "Expected data that can be transformed into Array<Array>"
|
65
|
-
end
|
66
144
|
when ::CSV::Table
|
67
145
|
to_rows_array(data.to_a)
|
68
146
|
when Hash
|
69
147
|
# hash of columns header as key and column array as value
|
70
148
|
rows_arrays = [a.keys].concat(a.values.first.zip(*a.values[1..-1]))
|
71
149
|
to_rows_array(data.keys)
|
150
|
+
when Enumerable
|
151
|
+
data = data.dup.compact
|
152
|
+
return data unless data.count > 0
|
153
|
+
sample = data.first
|
154
|
+
|
155
|
+
case sample
|
156
|
+
when ::CSV::Row
|
157
|
+
data
|
158
|
+
when Array
|
159
|
+
headers = data.shift
|
160
|
+
data.map do |arr_row|
|
161
|
+
::CSV::Row.new(headers, arr_row)
|
162
|
+
end.compact
|
163
|
+
when Hash
|
164
|
+
headers = sample.keys
|
165
|
+
headers_str = headers.map(&:to_s)
|
166
|
+
data.map do |hash|
|
167
|
+
::CSV::Row.new(headers_str, hash.values_at(*headers))
|
168
|
+
end.compact
|
169
|
+
else
|
170
|
+
raise "Expected data that can be transformed into Array<::CSV::Row>. Given 'Enumerable' of '#{sample.class}'"
|
171
|
+
end
|
72
172
|
else
|
73
173
|
raise "Input type not supported. Given: #{data.class}"
|
74
174
|
end
|
data/lib/eco/data/fuzzy_match.rb
CHANGED
@@ -28,6 +28,7 @@ module Eco
|
|
28
28
|
include NGramsScore
|
29
29
|
|
30
30
|
def jaro_winkler(str1, str2, **options)
|
31
|
+
return 0 if !str1 || !str2
|
31
32
|
options = {
|
32
33
|
ignore_case: true,
|
33
34
|
weight: 0.25
|
@@ -67,28 +68,67 @@ module Eco
|
|
67
68
|
@fuzzy_match = ::FuzzyMatch.new(haystack(haystack_data), fuzzy_match_options)
|
68
69
|
end
|
69
70
|
|
71
|
+
# TODO: integration for options[:unique_words] => to ensure repeated words do not bring down the score are cut by threshold
|
70
72
|
# @note
|
71
73
|
# - When the `haystack` elements are **non** `String` objects, it excludes the needle itself from the results
|
72
|
-
# @param needle [String, Object] object is allowed when `fuzzy_options` includes `read:` key
|
74
|
+
# @param needle [String, Object] object is allowed when `fuzzy_options` includes `read:` key.
|
75
|
+
# @param needle_str [String, nil] the actual value of needle_str to be used.
|
76
|
+
# @param haystack [Enumerable] the items to find `needle` among.
|
73
77
|
# @return [Eco::Data::FuzzyMatch::Results]
|
74
|
-
def find_all_with_score(needle, **options)
|
75
|
-
|
78
|
+
def find_all_with_score(needle, needle_str: nil, haystack: nil, **options)
|
79
|
+
base_match = fuzzy_match(haystack, **options)
|
80
|
+
match_results = base_match.find_all_with_score(needle_str || needle)
|
81
|
+
needle_str ||= item_string(needle)
|
82
|
+
results = match_results.each_with_object([]) do |fuzzy_results, results|
|
76
83
|
item, dice, lev = fuzzy_results
|
77
84
|
unless item == needle
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
85
|
+
item_str = item_string(item)
|
86
|
+
|
87
|
+
if item_str.to_s.strip.empty? || needle_str.to_s.strip.empty?
|
88
|
+
dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 0
|
89
|
+
end
|
90
|
+
|
91
|
+
jaro_res ||= jaro(needle_str, item_str)
|
92
|
+
ngram_res ||= ngram(needle_str, item_str)
|
93
|
+
wngram_res ||= words_ngram(needle_str, item_str)
|
94
|
+
pos_res ||= position(needle_str, item_str)
|
95
|
+
|
96
|
+
results << Result.new(item, item_str, needle_str, dice, lev, jaro_res, ngram_res, wngram_res, pos_res)
|
86
97
|
end
|
87
98
|
end
|
88
|
-
Results.new(needle,
|
99
|
+
Results.new(needle, needle_str, results).tap do |res|
|
89
100
|
res.order = fuzzy_options[:order] if fuzzy_options[:order]
|
90
101
|
res.threshold = fuzzy_options[:threshold] if fuzzy_options[:threshold]
|
102
|
+
end.relevant_results
|
103
|
+
end
|
104
|
+
|
105
|
+
def recalculate_results(results, needle_str: nil, **options)
|
106
|
+
raise "You should provide a block |needle_str, item_str, needle, item|" unless block_given?
|
107
|
+
new_results = results.each_with_object([]) do |result, new_results|
|
108
|
+
nstr, istr = yield(needle_str || results.value, result.value, results.needle, result.match)
|
109
|
+
|
110
|
+
if istr.to_s.strip.empty?
|
111
|
+
dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 1
|
112
|
+
elsif nstr.to_s.strip.empty?
|
113
|
+
unless istr = needle_str
|
114
|
+
dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 0
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
res = ::FuzzyMatch.score_class.new(nstr, istr) unless dice && lev
|
119
|
+
dice ||= res&.dices_coefficient_similar || 0
|
120
|
+
lev ||= res&.levenshtein_similar || 0
|
121
|
+
jaro_res ||= jaro(nstr, istr)
|
122
|
+
ngram_res ||= ngram(nstr, istr)
|
123
|
+
wngram_res ||= words_ngram(nstr, istr)
|
124
|
+
pos_res ||= position(nstr, istr)
|
125
|
+
|
126
|
+
new_results << Result.new(*result.values_at(:match, :value, :needle_str), dice, lev, jaro_res, ngram_res, wngram_res, pos_res)
|
91
127
|
end
|
128
|
+
Results.new(results.needle, results.value, new_results).tap do |res|
|
129
|
+
res.order = options[:order] if options[:order]
|
130
|
+
res.threshold = options[:threshold] if options[:threshold]
|
131
|
+
end.relevant_results
|
92
132
|
end
|
93
133
|
|
94
134
|
private
|
@@ -12,8 +12,9 @@ module Eco
|
|
12
12
|
def chars_position_score(str1, str2, max_distance: 3, normalized: false)
|
13
13
|
str1, str2 = normalize_string([str1, str2]) unless normalized
|
14
14
|
len1 = str1 && str1.length; len2 = str2 && str2.length
|
15
|
-
Score.new(0,
|
16
|
-
next if !str1 ||
|
15
|
+
Score.new(0, 0).tap do |score|
|
16
|
+
next if !str2 || !str1 || str2.empty? || str1.empty?
|
17
|
+
score.total = len1
|
17
18
|
next score.increase(score.total) if str1 == str2
|
18
19
|
next if len1 < 2
|
19
20
|
pos = 0
|
@@ -16,18 +16,19 @@ module Eco
|
|
16
16
|
|
17
17
|
Score.new(0, 0).tap do |score|
|
18
18
|
next if !str2 || !str1
|
19
|
+
next score.increase_total(len1) if str2.empty? || str1.empty?
|
19
20
|
if str1 == str2
|
20
|
-
score.
|
21
|
+
score.total = len1
|
21
22
|
score.increase(score.total)
|
22
23
|
end
|
23
24
|
if str1.length < 2 || str1.length < 2
|
24
25
|
score.increase_total(len1)
|
25
26
|
end
|
26
27
|
|
27
|
-
paired_words(str1, str2, normalized: true) do |needle, item|
|
28
|
+
pairs = paired_words(str1, str2, normalized: true) do |needle, item|
|
28
29
|
ngrams_score(needle, item, range: range, normalized: true)
|
29
|
-
end.each do |sub_str1,
|
30
|
-
|
30
|
+
end.each do |sub_str1, data|
|
31
|
+
item, iscore = data
|
31
32
|
score.merge!(iscore)
|
32
33
|
end
|
33
34
|
end
|
@@ -44,14 +45,17 @@ module Eco
|
|
44
45
|
|
45
46
|
Score.new(0, len1 || 0).tap do |score|
|
46
47
|
next if !str2 || !str1
|
48
|
+
next if str2.empty? || str1.empty?
|
49
|
+
score.total = len1
|
47
50
|
next score.increase(score.total) if str1 == str2
|
48
51
|
next if str1.length < 2 || str2.length < 2
|
49
52
|
|
50
|
-
grams
|
51
|
-
|
53
|
+
grams = word_ngrams(str2, range, normalized: true)
|
54
|
+
grams_count = grams.length
|
55
|
+
next unless grams_count > 0
|
52
56
|
|
53
57
|
if range.is_a?(Integer)
|
54
|
-
item_weight = score.total.to_f /
|
58
|
+
item_weight = score.total.to_f / grams_count
|
55
59
|
matches = grams.select {|res| str1.include?(gram)}.length
|
56
60
|
score.increase(matches * item_weight)
|
57
61
|
else
|
@@ -62,9 +66,9 @@ module Eco
|
|
62
66
|
|
63
67
|
groups.each do |len, grams|
|
64
68
|
len_max_score = score.total * group_weight
|
65
|
-
item_weight = len_max_score /
|
69
|
+
item_weight = len_max_score / grams_count
|
66
70
|
matches = grams.select {|gram| str1.include?(gram)}.length
|
67
|
-
#pp "#{len} match: #{matches} (
|
71
|
+
#pp "(#{len}) match: #{matches} (of #{grams.length} of total #{grams_count}) || max_score: #{len_max_score} (over #{score.total})"
|
68
72
|
score.increase(matches * item_weight)
|
69
73
|
end
|
70
74
|
end
|
@@ -15,19 +15,12 @@ module Eco
|
|
15
15
|
# @yieldreturn [Eco::Data::FuzzyMatch::Score] the `Score` object with the results of comparing `str1` and `str2`
|
16
16
|
# @param str1 [String] the string of reference.
|
17
17
|
# @param str2 [String] one of the haystack items.
|
18
|
-
# @param format [Symbol] determines the `values` of the returned `Hash`::
|
19
|
-
# 1. `:pair` for just pair
|
20
|
-
# 2. `:score` for just score
|
21
|
-
# 2. `[:pair, :score]` for `Array`
|
22
18
|
# @normalized [Boolean] to avoid double ups in normalizing.
|
23
|
-
# @return [Hash] where `keys` are the **words** of `str1` and their `values
|
24
|
-
|
25
|
-
# 2. if `format` is `:score` => the `Score` words with highest match.
|
26
|
-
# 3. if `format` is `[:pair, :score]` => both in an `Array`.
|
27
|
-
def paired_words(str1, str2, format: [:pair, :score], normalized: false)
|
19
|
+
# @return [Hash] where `keys` are the **words** of `str1` and their `values` a pair array of `pair` and `Score`
|
20
|
+
def paired_words(str1, str2, normalized: false)
|
28
21
|
str1, str2 = normalize_string([str1, str2]) unless normalized
|
29
|
-
return {} if !str2 || !str1
|
30
|
-
return {str1 => nil} if str1.length < 2 || str1.length < 2
|
22
|
+
return {nil => [nil, Score.new(0, 0)]} if !str2 || !str1
|
23
|
+
return {str1 => [nil, Score.new(0, 0)]} if str1.length < 2 || str1.length < 2
|
31
24
|
|
32
25
|
needles = get_words(str1, normalized: true)
|
33
26
|
haystack = get_words(str2, normalized: true)
|
@@ -58,6 +51,9 @@ module Eco
|
|
58
51
|
result[:score].ratio
|
59
52
|
end.reverse
|
60
53
|
if result = sorted.shift
|
54
|
+
unless result[:score].is_a?(Eco::Data::FuzzyMatch::Score)
|
55
|
+
raise "Parining ('#{str1}' vs '#{str2}') -> Something got sour with needle '#{result[:needle]}' and item #{item}"
|
56
|
+
end
|
61
57
|
paired[result[:needle]] = {
|
62
58
|
pair: item,
|
63
59
|
score: result[:score]
|
@@ -73,6 +69,9 @@ module Eco
|
|
73
69
|
pending_items.include?(result[:pair]) && result[:score].ratio > 0.05
|
74
70
|
end
|
75
71
|
if result = results.shift
|
72
|
+
unless result[:score].is_a?(Eco::Data::FuzzyMatch::Score)
|
73
|
+
raise "Parining ('#{str1}' vs '#{str2}') -> Something got sour with needle '#{needle}' and item #{result[:pair]}"
|
74
|
+
end
|
76
75
|
paired[needle] = result
|
77
76
|
pending_items.delete(result[:pair])
|
78
77
|
end
|
@@ -85,13 +84,8 @@ module Eco
|
|
85
84
|
score: Score.new(0, needle.length)
|
86
85
|
}
|
87
86
|
end
|
88
|
-
paired.
|
89
|
-
|
90
|
-
when Array
|
91
|
-
result.values_at(*format)
|
92
|
-
else
|
93
|
-
restult[format]
|
94
|
-
end
|
87
|
+
paired.each_with_object({}) do |(needle, data), out|
|
88
|
+
out[needle] = data.values_at(:pair, :score)
|
95
89
|
end
|
96
90
|
end
|
97
91
|
|
@@ -1,9 +1,11 @@
|
|
1
1
|
module Eco
|
2
2
|
module Data
|
3
3
|
module FuzzyMatch
|
4
|
-
class Result < Struct.new(:match, :value, :dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position)
|
4
|
+
class Result < Struct.new(:match, :value, :needle_value, :dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position)
|
5
5
|
ALL_METHODS = [:dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position]
|
6
6
|
|
7
|
+
attr_accessor :pivot
|
8
|
+
|
7
9
|
def dice; super&.round(3); end
|
8
10
|
def levenshtein; super&.round(3); end
|
9
11
|
def jaro_winkler; super&.round(3); end
|
@@ -11,6 +13,12 @@ module Eco
|
|
11
13
|
def words_ngrams; super&.round(3); end
|
12
14
|
def chars_position; super&.round(3); end
|
13
15
|
|
16
|
+
#Shortcuts
|
17
|
+
def lev; levenshtein; end
|
18
|
+
def jaro; jaro_winkler; end
|
19
|
+
def wngrams; words_ngrams; end
|
20
|
+
def pos; chars_position; end
|
21
|
+
|
14
22
|
def average
|
15
23
|
values = [dice, levenshtein, jaro_winkler, ngrams, words_ngrams, chars_position]
|
16
24
|
(values.inject(0.0, :+) / values.length).round(3)
|
@@ -55,6 +63,12 @@ module Eco
|
|
55
63
|
compare(result)
|
56
64
|
end
|
57
65
|
|
66
|
+
def values_at(*keys)
|
67
|
+
keys.map do |key|
|
68
|
+
self.send(key) if self.respond_to?(key)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
58
72
|
private
|
59
73
|
|
60
74
|
def compare(other, order: self.order)
|