eco-helpers 2.0.19 → 2.0.21
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -1
- data/eco-helpers.gemspec +5 -1
- data/lib/eco/api/common/loaders/parser.rb +1 -0
- data/lib/eco/api/common/people/entries.rb +1 -0
- data/lib/eco/api/common/people/entry_factory.rb +49 -15
- data/lib/eco/api/common/version_patches/exception.rb +5 -2
- data/lib/eco/api/organization/people.rb +2 -2
- data/lib/eco/api/organization/people_similarity.rb +171 -11
- data/lib/eco/api/organization/tag_tree.rb +33 -0
- data/lib/eco/api/session.rb +4 -2
- data/lib/eco/api/usecases/default_cases.rb +1 -0
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +189 -19
- data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
- data/lib/eco/cli/config/default/options.rb +29 -1
- data/lib/eco/cli/config/default/people.rb +18 -24
- data/lib/eco/cli/config/default/usecases.rb +31 -2
- data/lib/eco/cli/config/default/workflow.rb +7 -5
- data/lib/eco/csv/table.rb +121 -21
- data/lib/eco/data/fuzzy_match.rb +52 -12
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +13 -9
- data/lib/eco/data/fuzzy_match/pairing.rb +12 -18
- data/lib/eco/data/fuzzy_match/result.rb +15 -1
- data/lib/eco/data/fuzzy_match/results.rb +18 -0
- data/lib/eco/data/fuzzy_match/score.rb +12 -7
- data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
- data/lib/eco/version.rb +1 -1
- metadata +83 -2
@@ -53,8 +53,7 @@ ASSETS.cli.config do |config|
|
|
53
53
|
cases_with_people = config.usecases.active(io: io).select do |usecase, data|
|
54
54
|
io.class.people_required?(usecase.type)
|
55
55
|
end
|
56
|
-
|
57
|
-
next io unless !cases_with_people.empty? || get_people
|
56
|
+
next io if cases_with_people.empty? && !io.options.dig(:people, :get)
|
58
57
|
io = io.new(people: config.people(io: io))
|
59
58
|
end
|
60
59
|
|
@@ -67,7 +66,8 @@ ASSETS.cli.config do |config|
|
|
67
66
|
|
68
67
|
wf.before(:usecases) do |wf_cases, io|
|
69
68
|
# save partial entries -> should be native to session.workflow
|
70
|
-
|
69
|
+
get_people = io.options.dig(:people, :get)
|
70
|
+
partial_update = get_people && get_people.dig(:type) == :partial
|
71
71
|
if !io.options[:dry_run] && partial_update
|
72
72
|
partial_file = io.session.config.people.partial_cache
|
73
73
|
io.session.file_manager.save_json(io.people, partial_file, :timestamp)
|
@@ -98,7 +98,8 @@ ASSETS.cli.config do |config|
|
|
98
98
|
if io.session.post_launch.empty?
|
99
99
|
wf_post.skip!
|
100
100
|
else
|
101
|
-
|
101
|
+
get_people = io.options.dig(:people, :get)
|
102
|
+
partial_update = get_people && get_people.dig(:type) == :partial
|
102
103
|
if !io.options[:dry_run] && partial_update
|
103
104
|
# get target people afresh
|
104
105
|
people = io.session.micro.people_refresh(people: io.people, include_created: true)
|
@@ -139,7 +140,8 @@ ASSETS.cli.config do |config|
|
|
139
140
|
end
|
140
141
|
|
141
142
|
wf.on(:end) do |wf_end, io|
|
142
|
-
|
143
|
+
get_people = io.options.dig(:people, :get)
|
144
|
+
partial_update = get_people && get_people.dig(:type) == :partial
|
143
145
|
unless !io.options[:end_get] || io.options[:dry_run] || partial_update
|
144
146
|
people = io.session.micro.people_cache
|
145
147
|
io = io.new(people: people)
|
data/lib/eco/csv/table.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
module Eco
|
3
2
|
class CSV
|
4
3
|
class Table < ::CSV::Table
|
@@ -9,6 +8,70 @@ module Eco
|
|
9
8
|
super(to_rows_array(input))
|
10
9
|
end
|
11
10
|
|
11
|
+
# @return [Hash] where keys are the groups and the values a `Eco::CSV::Table`
|
12
|
+
def group_by(&block)
|
13
|
+
rows.group_by(&block).transform_values do |rows|
|
14
|
+
self.class.new(rows)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# @return [Eco::CSV::Table]
|
19
|
+
def transform_values
|
20
|
+
transformed_rows = rows.map do |row|
|
21
|
+
res = yield(row)
|
22
|
+
case res
|
23
|
+
when Array
|
24
|
+
::CSV::Row.new(row.headers, res)
|
25
|
+
when ::CSV::Row
|
26
|
+
res
|
27
|
+
end
|
28
|
+
end
|
29
|
+
self.class.new(transformed_rows)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Slices the selected rows
|
33
|
+
# @return [Eco::CSV::Table]
|
34
|
+
def slice(*index)
|
35
|
+
case index.first
|
36
|
+
when Range, Numeric
|
37
|
+
self.class.new(rows.slice(index.first))
|
38
|
+
else
|
39
|
+
self
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# @return [Eco::CSV::Table]
|
44
|
+
def slice_columns(*index)
|
45
|
+
case index.first
|
46
|
+
when Range, Numeric
|
47
|
+
columns_to_table(columns.slice(index.first))
|
48
|
+
when String
|
49
|
+
csv_cols = columns
|
50
|
+
csv_cols = index.each_with_object([]) do |name, cols|
|
51
|
+
col = csv_cols.find {|col| col.first == name}
|
52
|
+
cols << col if col
|
53
|
+
end
|
54
|
+
columns_to_table(csv_cols)
|
55
|
+
else
|
56
|
+
self
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# @return [Eco::CSV::Table]
|
61
|
+
def delete_column(i)
|
62
|
+
csv_cols = columns
|
63
|
+
csv_cols.delete(i)
|
64
|
+
columns_to_table(csv_cols)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Adds a new column at the end
|
68
|
+
# @param header_name [String] header of the new column
|
69
|
+
# @return [Eco::CSV::Table] with a new empty column
|
70
|
+
def add_column(header_name)
|
71
|
+
new_col = Array.new(length).unshift(header_name)
|
72
|
+
columns_to_table(columns.push(new_col))
|
73
|
+
end
|
74
|
+
|
12
75
|
# @return [Array<::CSV::Row>]
|
13
76
|
def rows
|
14
77
|
[].tap do |out|
|
@@ -16,24 +79,40 @@ module Eco
|
|
16
79
|
end
|
17
80
|
end
|
18
81
|
|
82
|
+
# It removes all rows where all columns' values are the same
|
83
|
+
def delete_duplicates!
|
84
|
+
unique_rows = []
|
85
|
+
self.by_row!.delete_if do |row|
|
86
|
+
unique_rows.any? {|done| equal_rows?(row, done)}.tap do |found|
|
87
|
+
unique_rows << row unless found
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# @param row1 [CSV:Row] row to be compared
|
93
|
+
# @param row2 [CSV:Row] row to be compared
|
94
|
+
# @param [Boolean] `true` if all values of `row1` are as of `row2`
|
95
|
+
def equal_rows?(row1, row2)
|
96
|
+
row1.fields.zip(row2.fields).all? do |(v1, v2)|
|
97
|
+
v1 == v2
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
19
101
|
# @return [Integer] total number of rows not including the header
|
20
102
|
def length
|
21
103
|
to_a.length - 1
|
22
104
|
end
|
23
105
|
|
106
|
+
def empty?
|
107
|
+
length < 1
|
108
|
+
end
|
109
|
+
|
24
110
|
# @return [Array<Array>] each array is the column header followed by its values
|
25
111
|
def columns
|
26
112
|
to_a.transpose
|
27
113
|
end
|
28
114
|
|
29
|
-
#
|
30
|
-
# @param header_name [String] header of the new column
|
31
|
-
# @return [Eco::CSV::Table] with a new empty column
|
32
|
-
def add_column(header_name)
|
33
|
-
new_col = Array.new(length).unshift(header_name)
|
34
|
-
columns_to_table(columns.push(new_col))
|
35
|
-
end
|
36
|
-
|
115
|
+
# Creates a single `Hash` where each key, value is a column (header + values)
|
37
116
|
# @note it will override columns with same header name
|
38
117
|
# @return [Hash] keys are headers, values are arrays
|
39
118
|
def columns_hash
|
@@ -42,6 +121,17 @@ module Eco
|
|
42
121
|
end.to_h
|
43
122
|
end
|
44
123
|
|
124
|
+
# Returns an array of row hashes
|
125
|
+
# @note it will override columns with same header
|
126
|
+
def to_a_h
|
127
|
+
rows.map(&:to_h)
|
128
|
+
end
|
129
|
+
|
130
|
+
# @see #to_a_h
|
131
|
+
def to_array_of_hashes
|
132
|
+
to_a_h
|
133
|
+
end
|
134
|
+
|
45
135
|
private
|
46
136
|
|
47
137
|
def columns_to_table(columns_array)
|
@@ -51,24 +141,34 @@ module Eco
|
|
51
141
|
|
52
142
|
def to_rows_array(data)
|
53
143
|
case data
|
54
|
-
when Array
|
55
|
-
return data unless data.length > 0
|
56
|
-
if data.first.is_a?(::CSV::Row)
|
57
|
-
data
|
58
|
-
elsif data.first.is_a?(Array)
|
59
|
-
headers = data.shift
|
60
|
-
data.map do |arr_row|
|
61
|
-
CSV::Row.new(headers, arr_row)
|
62
|
-
end.compact
|
63
|
-
else
|
64
|
-
raise "Expected data that can be transformed into Array<Array>"
|
65
|
-
end
|
66
144
|
when ::CSV::Table
|
67
145
|
to_rows_array(data.to_a)
|
68
146
|
when Hash
|
69
147
|
# hash of columns header as key and column array as value
|
70
148
|
rows_arrays = [a.keys].concat(a.values.first.zip(*a.values[1..-1]))
|
71
149
|
to_rows_array(data.keys)
|
150
|
+
when Enumerable
|
151
|
+
data = data.dup.compact
|
152
|
+
return data unless data.count > 0
|
153
|
+
sample = data.first
|
154
|
+
|
155
|
+
case sample
|
156
|
+
when ::CSV::Row
|
157
|
+
data
|
158
|
+
when Array
|
159
|
+
headers = data.shift
|
160
|
+
data.map do |arr_row|
|
161
|
+
::CSV::Row.new(headers, arr_row)
|
162
|
+
end.compact
|
163
|
+
when Hash
|
164
|
+
headers = sample.keys
|
165
|
+
headers_str = headers.map(&:to_s)
|
166
|
+
data.map do |hash|
|
167
|
+
::CSV::Row.new(headers_str, hash.values_at(*headers))
|
168
|
+
end.compact
|
169
|
+
else
|
170
|
+
raise "Expected data that can be transformed into Array<::CSV::Row>. Given 'Enumerable' of '#{sample.class}'"
|
171
|
+
end
|
72
172
|
else
|
73
173
|
raise "Input type not supported. Given: #{data.class}"
|
74
174
|
end
|
data/lib/eco/data/fuzzy_match.rb
CHANGED
@@ -28,6 +28,7 @@ module Eco
|
|
28
28
|
include NGramsScore
|
29
29
|
|
30
30
|
def jaro_winkler(str1, str2, **options)
|
31
|
+
return 0 if !str1 || !str2
|
31
32
|
options = {
|
32
33
|
ignore_case: true,
|
33
34
|
weight: 0.25
|
@@ -67,28 +68,67 @@ module Eco
|
|
67
68
|
@fuzzy_match = ::FuzzyMatch.new(haystack(haystack_data), fuzzy_match_options)
|
68
69
|
end
|
69
70
|
|
71
|
+
# TODO: integration for options[:unique_words] => to ensure repeated words do not bring down the score are cut by threshold
|
70
72
|
# @note
|
71
73
|
# - When the `haystack` elements are **non** `String` objects, it excludes the needle itself from the results
|
72
|
-
# @param needle [String, Object] object is allowed when `fuzzy_options` includes `read:` key
|
74
|
+
# @param needle [String, Object] object is allowed when `fuzzy_options` includes `read:` key.
|
75
|
+
# @param needle_str [String, nil] the actual value of needle_str to be used.
|
76
|
+
# @param haystack [Enumerable] the items to find `needle` among.
|
73
77
|
# @return [Eco::Data::FuzzyMatch::Results]
|
74
|
-
def find_all_with_score(needle, **options)
|
75
|
-
|
78
|
+
def find_all_with_score(needle, needle_str: nil, haystack: nil, **options)
|
79
|
+
base_match = fuzzy_match(haystack, **options)
|
80
|
+
match_results = base_match.find_all_with_score(needle_str || needle)
|
81
|
+
needle_str ||= item_string(needle)
|
82
|
+
results = match_results.each_with_object([]) do |fuzzy_results, results|
|
76
83
|
item, dice, lev = fuzzy_results
|
77
84
|
unless item == needle
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
85
|
+
item_str = item_string(item)
|
86
|
+
|
87
|
+
if item_str.to_s.strip.empty? || needle_str.to_s.strip.empty?
|
88
|
+
dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 0
|
89
|
+
end
|
90
|
+
|
91
|
+
jaro_res ||= jaro(needle_str, item_str)
|
92
|
+
ngram_res ||= ngram(needle_str, item_str)
|
93
|
+
wngram_res ||= words_ngram(needle_str, item_str)
|
94
|
+
pos_res ||= position(needle_str, item_str)
|
95
|
+
|
96
|
+
results << Result.new(item, item_str, needle_str, dice, lev, jaro_res, ngram_res, wngram_res, pos_res)
|
86
97
|
end
|
87
98
|
end
|
88
|
-
Results.new(needle,
|
99
|
+
Results.new(needle, needle_str, results).tap do |res|
|
89
100
|
res.order = fuzzy_options[:order] if fuzzy_options[:order]
|
90
101
|
res.threshold = fuzzy_options[:threshold] if fuzzy_options[:threshold]
|
102
|
+
end.relevant_results
|
103
|
+
end
|
104
|
+
|
105
|
+
def recalculate_results(results, needle_str: nil, **options)
|
106
|
+
raise "You should provide a block |needle_str, item_str, needle, item|" unless block_given?
|
107
|
+
new_results = results.each_with_object([]) do |result, new_results|
|
108
|
+
nstr, istr = yield(needle_str || results.value, result.value, results.needle, result.match)
|
109
|
+
|
110
|
+
if istr.to_s.strip.empty?
|
111
|
+
dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 1
|
112
|
+
elsif nstr.to_s.strip.empty?
|
113
|
+
unless istr = needle_str
|
114
|
+
dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 0
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
res = ::FuzzyMatch.score_class.new(nstr, istr) unless dice && lev
|
119
|
+
dice ||= res&.dices_coefficient_similar || 0
|
120
|
+
lev ||= res&.levenshtein_similar || 0
|
121
|
+
jaro_res ||= jaro(nstr, istr)
|
122
|
+
ngram_res ||= ngram(nstr, istr)
|
123
|
+
wngram_res ||= words_ngram(nstr, istr)
|
124
|
+
pos_res ||= position(nstr, istr)
|
125
|
+
|
126
|
+
new_results << Result.new(*result.values_at(:match, :value, :needle_str), dice, lev, jaro_res, ngram_res, wngram_res, pos_res)
|
91
127
|
end
|
128
|
+
Results.new(results.needle, results.value, new_results).tap do |res|
|
129
|
+
res.order = options[:order] if options[:order]
|
130
|
+
res.threshold = options[:threshold] if options[:threshold]
|
131
|
+
end.relevant_results
|
92
132
|
end
|
93
133
|
|
94
134
|
private
|
@@ -12,8 +12,9 @@ module Eco
|
|
12
12
|
def chars_position_score(str1, str2, max_distance: 3, normalized: false)
|
13
13
|
str1, str2 = normalize_string([str1, str2]) unless normalized
|
14
14
|
len1 = str1 && str1.length; len2 = str2 && str2.length
|
15
|
-
Score.new(0,
|
16
|
-
next if !str1 ||
|
15
|
+
Score.new(0, 0).tap do |score|
|
16
|
+
next if !str2 || !str1 || str2.empty? || str1.empty?
|
17
|
+
score.total = len1
|
17
18
|
next score.increase(score.total) if str1 == str2
|
18
19
|
next if len1 < 2
|
19
20
|
pos = 0
|
@@ -16,18 +16,19 @@ module Eco
|
|
16
16
|
|
17
17
|
Score.new(0, 0).tap do |score|
|
18
18
|
next if !str2 || !str1
|
19
|
+
next score.increase_total(len1) if str2.empty? || str1.empty?
|
19
20
|
if str1 == str2
|
20
|
-
score.
|
21
|
+
score.total = len1
|
21
22
|
score.increase(score.total)
|
22
23
|
end
|
23
24
|
if str1.length < 2 || str1.length < 2
|
24
25
|
score.increase_total(len1)
|
25
26
|
end
|
26
27
|
|
27
|
-
paired_words(str1, str2, normalized: true) do |needle, item|
|
28
|
+
pairs = paired_words(str1, str2, normalized: true) do |needle, item|
|
28
29
|
ngrams_score(needle, item, range: range, normalized: true)
|
29
|
-
end.each do |sub_str1,
|
30
|
-
|
30
|
+
end.each do |sub_str1, data|
|
31
|
+
item, iscore = data
|
31
32
|
score.merge!(iscore)
|
32
33
|
end
|
33
34
|
end
|
@@ -44,14 +45,17 @@ module Eco
|
|
44
45
|
|
45
46
|
Score.new(0, len1 || 0).tap do |score|
|
46
47
|
next if !str2 || !str1
|
48
|
+
next if str2.empty? || str1.empty?
|
49
|
+
score.total = len1
|
47
50
|
next score.increase(score.total) if str1 == str2
|
48
51
|
next if str1.length < 2 || str2.length < 2
|
49
52
|
|
50
|
-
grams
|
51
|
-
|
53
|
+
grams = word_ngrams(str2, range, normalized: true)
|
54
|
+
grams_count = grams.length
|
55
|
+
next unless grams_count > 0
|
52
56
|
|
53
57
|
if range.is_a?(Integer)
|
54
|
-
item_weight = score.total.to_f /
|
58
|
+
item_weight = score.total.to_f / grams_count
|
55
59
|
matches = grams.select {|res| str1.include?(gram)}.length
|
56
60
|
score.increase(matches * item_weight)
|
57
61
|
else
|
@@ -62,9 +66,9 @@ module Eco
|
|
62
66
|
|
63
67
|
groups.each do |len, grams|
|
64
68
|
len_max_score = score.total * group_weight
|
65
|
-
item_weight = len_max_score /
|
69
|
+
item_weight = len_max_score / grams_count
|
66
70
|
matches = grams.select {|gram| str1.include?(gram)}.length
|
67
|
-
#pp "#{len} match: #{matches} (
|
71
|
+
#pp "(#{len}) match: #{matches} (of #{grams.length} of total #{grams_count}) || max_score: #{len_max_score} (over #{score.total})"
|
68
72
|
score.increase(matches * item_weight)
|
69
73
|
end
|
70
74
|
end
|
@@ -15,19 +15,12 @@ module Eco
|
|
15
15
|
# @yieldreturn [Eco::Data::FuzzyMatch::Score] the `Score` object with the results of comparing `str1` and `str2`
|
16
16
|
# @param str1 [String] the string of reference.
|
17
17
|
# @param str2 [String] one of the haystack items.
|
18
|
-
# @param format [Symbol] determines the `values` of the returned `Hash`::
|
19
|
-
# 1. `:pair` for just pair
|
20
|
-
# 2. `:score` for just score
|
21
|
-
# 2. `[:pair, :score]` for `Array`
|
22
18
|
# @normalized [Boolean] to avoid double ups in normalizing.
|
23
|
-
# @return [Hash] where `keys` are the **words** of `str1` and their `values
|
24
|
-
|
25
|
-
# 2. if `format` is `:score` => the `Score` words with highest match.
|
26
|
-
# 3. if `format` is `[:pair, :score]` => both in an `Array`.
|
27
|
-
def paired_words(str1, str2, format: [:pair, :score], normalized: false)
|
19
|
+
# @return [Hash] where `keys` are the **words** of `str1` and their `values` a pair array of `pair` and `Score`
|
20
|
+
def paired_words(str1, str2, normalized: false)
|
28
21
|
str1, str2 = normalize_string([str1, str2]) unless normalized
|
29
|
-
return {} if !str2 || !str1
|
30
|
-
return {str1 => nil} if str1.length < 2 || str1.length < 2
|
22
|
+
return {nil => [nil, Score.new(0, 0)]} if !str2 || !str1
|
23
|
+
return {str1 => [nil, Score.new(0, 0)]} if str1.length < 2 || str1.length < 2
|
31
24
|
|
32
25
|
needles = get_words(str1, normalized: true)
|
33
26
|
haystack = get_words(str2, normalized: true)
|
@@ -58,6 +51,9 @@ module Eco
|
|
58
51
|
result[:score].ratio
|
59
52
|
end.reverse
|
60
53
|
if result = sorted.shift
|
54
|
+
unless result[:score].is_a?(Eco::Data::FuzzyMatch::Score)
|
55
|
+
raise "Parining ('#{str1}' vs '#{str2}') -> Something got sour with needle '#{result[:needle]}' and item #{item}"
|
56
|
+
end
|
61
57
|
paired[result[:needle]] = {
|
62
58
|
pair: item,
|
63
59
|
score: result[:score]
|
@@ -73,6 +69,9 @@ module Eco
|
|
73
69
|
pending_items.include?(result[:pair]) && result[:score].ratio > 0.05
|
74
70
|
end
|
75
71
|
if result = results.shift
|
72
|
+
unless result[:score].is_a?(Eco::Data::FuzzyMatch::Score)
|
73
|
+
raise "Parining ('#{str1}' vs '#{str2}') -> Something got sour with needle '#{needle}' and item #{result[:pair]}"
|
74
|
+
end
|
76
75
|
paired[needle] = result
|
77
76
|
pending_items.delete(result[:pair])
|
78
77
|
end
|
@@ -85,13 +84,8 @@ module Eco
|
|
85
84
|
score: Score.new(0, needle.length)
|
86
85
|
}
|
87
86
|
end
|
88
|
-
paired.
|
89
|
-
|
90
|
-
when Array
|
91
|
-
result.values_at(*format)
|
92
|
-
else
|
93
|
-
restult[format]
|
94
|
-
end
|
87
|
+
paired.each_with_object({}) do |(needle, data), out|
|
88
|
+
out[needle] = data.values_at(:pair, :score)
|
95
89
|
end
|
96
90
|
end
|
97
91
|
|
@@ -1,9 +1,11 @@
|
|
1
1
|
module Eco
|
2
2
|
module Data
|
3
3
|
module FuzzyMatch
|
4
|
-
class Result < Struct.new(:match, :value, :dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position)
|
4
|
+
class Result < Struct.new(:match, :value, :needle_value, :dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position)
|
5
5
|
ALL_METHODS = [:dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position]
|
6
6
|
|
7
|
+
attr_accessor :pivot
|
8
|
+
|
7
9
|
def dice; super&.round(3); end
|
8
10
|
def levenshtein; super&.round(3); end
|
9
11
|
def jaro_winkler; super&.round(3); end
|
@@ -11,6 +13,12 @@ module Eco
|
|
11
13
|
def words_ngrams; super&.round(3); end
|
12
14
|
def chars_position; super&.round(3); end
|
13
15
|
|
16
|
+
#Shortcuts
|
17
|
+
def lev; levenshtein; end
|
18
|
+
def jaro; jaro_winkler; end
|
19
|
+
def wngrams; words_ngrams; end
|
20
|
+
def pos; chars_position; end
|
21
|
+
|
14
22
|
def average
|
15
23
|
values = [dice, levenshtein, jaro_winkler, ngrams, words_ngrams, chars_position]
|
16
24
|
(values.inject(0.0, :+) / values.length).round(3)
|
@@ -55,6 +63,12 @@ module Eco
|
|
55
63
|
compare(result)
|
56
64
|
end
|
57
65
|
|
66
|
+
def values_at(*keys)
|
67
|
+
keys.map do |key|
|
68
|
+
self.send(key) if self.respond_to?(key)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
58
72
|
private
|
59
73
|
|
60
74
|
def compare(other, order: self.order)
|