rubocop 0.56.0 → 0.57.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -4
- data/assets/output.html.erb +1 -1
- data/bin/console +9 -0
- data/config/default.yml +23 -3
- data/config/disabled.yml +2 -2
- data/config/enabled.yml +29 -13
- data/{bin → exe}/rubocop +0 -0
- data/lib/rubocop.rb +6 -2
- data/lib/rubocop/ast/node.rb +3 -1
- data/lib/rubocop/ast/node/mixin/method_dispatch_node.rb +26 -5
- data/lib/rubocop/config_loader.rb +0 -1
- data/lib/rubocop/config_loader_resolver.rb +4 -2
- data/lib/rubocop/cop/correctors/alignment_corrector.rb +1 -1
- data/lib/rubocop/cop/generator.rb +1 -1
- data/lib/rubocop/cop/layout/class_structure.rb +1 -1
- data/lib/rubocop/cop/layout/closing_heredoc_indentation.rb +130 -0
- data/lib/rubocop/cop/layout/dot_position.rb +2 -6
- data/lib/rubocop/cop/layout/empty_lines_around_access_modifier.rb +1 -1
- data/lib/rubocop/cop/layout/empty_lines_around_begin_body.rb +0 -1
- data/lib/rubocop/cop/layout/extra_spacing.rb +2 -2
- data/lib/rubocop/cop/layout/indent_heredoc.rb +29 -5
- data/lib/rubocop/cop/layout/indentation_consistency.rb +1 -1
- data/lib/rubocop/cop/layout/indentation_width.rb +2 -2
- data/lib/rubocop/cop/layout/leading_blank_lines.rb +53 -0
- data/lib/rubocop/cop/layout/space_inside_reference_brackets.rb +11 -2
- data/lib/rubocop/cop/lint/ineffective_access_modifier.rb +1 -1
- data/lib/rubocop/cop/lint/string_conversion_in_interpolation.rb +4 -3
- data/lib/rubocop/cop/lint/useless_access_modifier.rb +2 -2
- data/lib/rubocop/cop/mixin/array_syntax.rb +1 -1
- data/lib/rubocop/cop/mixin/range_help.rb +3 -7
- data/lib/rubocop/cop/rails/assert_not.rb +1 -1
- data/lib/rubocop/cop/rails/bulk_change_table.rb +272 -0
- data/lib/rubocop/cop/rails/dynamic_find_by.rb +1 -1
- data/lib/rubocop/cop/rails/file_path.rb +40 -10
- data/lib/rubocop/cop/rails/http_positional_arguments.rb +1 -1
- data/lib/rubocop/cop/rails/time_zone.rb +3 -3
- data/lib/rubocop/cop/style/access_modifier_declarations.rb +111 -0
- data/lib/rubocop/cop/style/bare_percent_literals.rb +1 -1
- data/lib/rubocop/cop/style/command_literal.rb +1 -5
- data/lib/rubocop/cop/style/frozen_string_literal_comment.rb +30 -7
- data/lib/rubocop/cop/style/mixin_grouping.rb +8 -3
- data/lib/rubocop/cop/style/next.rb +1 -1
- data/lib/rubocop/cop/style/numeric_literal_prefix.rb +26 -3
- data/lib/rubocop/cop/style/symbol_proc.rb +1 -1
- data/lib/rubocop/cop/style/unneeded_condition.rb +73 -0
- data/lib/rubocop/cop/style/unneeded_percent_q.rb +13 -0
- data/lib/rubocop/cop/variable_force.rb +16 -17
- data/lib/rubocop/options.rb +15 -5
- data/lib/rubocop/result_cache.rb +3 -3
- data/lib/rubocop/string_util.rb +2 -147
- data/lib/rubocop/token.rb +2 -1
- data/lib/rubocop/version.rb +1 -1
- metadata +28 -9
- data/lib/rubocop/cop/lint/splat_keyword_arguments.rb +0 -36
@@ -82,7 +82,7 @@ module RuboCop
|
|
82
82
|
MSG_USE_PERCENT_X = 'Use `%x` around command string.'.freeze
|
83
83
|
|
84
84
|
def on_xstr(node)
|
85
|
-
return if
|
85
|
+
return if node.heredoc?
|
86
86
|
|
87
87
|
if backtick_literal?(node)
|
88
88
|
check_backtick_literal(node)
|
@@ -161,10 +161,6 @@ module RuboCop
|
|
161
161
|
loc.expression.source[loc.begin.length...-loc.end.length]
|
162
162
|
end
|
163
163
|
|
164
|
-
def heredoc_literal?(node)
|
165
|
-
node.loc.respond_to?(:heredoc_body)
|
166
|
-
end
|
167
|
-
|
168
164
|
def backtick_literal?(node)
|
169
165
|
node.loc.begin.source == '`'
|
170
166
|
end
|
@@ -139,14 +139,37 @@ module RuboCop
|
|
139
139
|
def insert_comment(corrector)
|
140
140
|
last_special_comment = last_special_comment(processed_source)
|
141
141
|
if last_special_comment.nil?
|
142
|
-
corrector.insert_before(
|
143
|
-
"#{FROZEN_STRING_LITERAL_ENABLED}\n\n")
|
144
|
-
elsif processed_source.following_line(last_special_comment).empty?
|
145
|
-
corrector.insert_after(last_special_comment.pos,
|
146
|
-
"\n#{FROZEN_STRING_LITERAL_ENABLED}")
|
142
|
+
corrector.insert_before(correction_range, preceding_comment)
|
147
143
|
else
|
148
|
-
corrector.insert_after(
|
149
|
-
|
144
|
+
corrector.insert_after(correction_range, proceeding_comment)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
def preceding_comment
|
149
|
+
if processed_source.tokens[0].space_before?
|
150
|
+
"#{FROZEN_STRING_LITERAL_ENABLED}\n"
|
151
|
+
else
|
152
|
+
"#{FROZEN_STRING_LITERAL_ENABLED}\n\n"
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def proceeding_comment
|
157
|
+
last_special_comment = last_special_comment(processed_source)
|
158
|
+
if processed_source.following_line(last_special_comment).empty?
|
159
|
+
"\n#{FROZEN_STRING_LITERAL_ENABLED}"
|
160
|
+
else
|
161
|
+
"\n#{FROZEN_STRING_LITERAL_ENABLED}\n"
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def correction_range
|
166
|
+
last_special_comment = last_special_comment(processed_source)
|
167
|
+
|
168
|
+
if last_special_comment.nil?
|
169
|
+
range_with_surrounding_space(range: processed_source.tokens[0],
|
170
|
+
side: :left)
|
171
|
+
else
|
172
|
+
last_special_comment.pos
|
150
173
|
end
|
151
174
|
end
|
152
175
|
end
|
@@ -36,12 +36,17 @@ module RuboCop
|
|
36
36
|
MIXIN_METHODS = %i[extend include prepend].freeze
|
37
37
|
MSG = 'Put `%<mixin>s` mixins in %<suffix>s.'.freeze
|
38
38
|
|
39
|
-
def
|
40
|
-
|
39
|
+
def on_class(node)
|
40
|
+
begin_node = node.child_nodes.find(&:begin_type?) || node
|
41
|
+
begin_node.each_child_node(:send).select(&:macro?).each do |macro|
|
42
|
+
next unless MIXIN_METHODS.include?(macro.method_name)
|
41
43
|
|
42
|
-
|
44
|
+
check(macro)
|
45
|
+
end
|
43
46
|
end
|
44
47
|
|
48
|
+
alias on_module on_class
|
49
|
+
|
45
50
|
def autocorrect(node)
|
46
51
|
range = node.loc.expression
|
47
52
|
if separated_style?
|
@@ -6,10 +6,33 @@ module RuboCop
|
|
6
6
|
# This cop checks for octal, hex, binary and decimal literals using
|
7
7
|
# uppercase prefixes and corrects them to lowercase prefix
|
8
8
|
# or no prefix (in case of decimals).
|
9
|
-
# eg. for octal use `0o` instead of `0` or `0O`.
|
10
9
|
#
|
11
|
-
#
|
12
|
-
#
|
10
|
+
# @example EnforcedOctalStyle: zero_with_o (default)
|
11
|
+
# # bad - missing octal prefix
|
12
|
+
# num = 01234
|
13
|
+
#
|
14
|
+
# # bad - uppercase prefix
|
15
|
+
# num = 0O1234
|
16
|
+
# num = 0X12AB
|
17
|
+
# num = 0B10101
|
18
|
+
#
|
19
|
+
# # bad - redundant decimal prefix
|
20
|
+
# num = 0D1234
|
21
|
+
# num = 0d1234
|
22
|
+
#
|
23
|
+
# # good
|
24
|
+
# num = 0o1234
|
25
|
+
# num = 0x12AB
|
26
|
+
# num = 0b10101
|
27
|
+
# num = 1234
|
28
|
+
#
|
29
|
+
# @example EnforcedOctalStyle: zero_only
|
30
|
+
# # bad
|
31
|
+
# num = 0o1234
|
32
|
+
# num = 0O1234
|
33
|
+
#
|
34
|
+
# # good
|
35
|
+
# num = 01234
|
13
36
|
class NumericLiteralPrefix < Cop
|
14
37
|
include IntegerNode
|
15
38
|
|
@@ -35,7 +35,7 @@ module RuboCop
|
|
35
35
|
block_method_name = resolve_block_method_name(send_or_super)
|
36
36
|
|
37
37
|
# TODO: Rails-specific handling that we should probably make
|
38
|
-
# configurable - https://github.com/
|
38
|
+
# configurable - https://github.com/rubocop-hq/rubocop/issues/1485
|
39
39
|
# we should ignore lambdas & procs
|
40
40
|
return if proc_node?(send_or_super)
|
41
41
|
return if %i[lambda proc].include?(block_method_name)
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RuboCop
|
4
|
+
module Cop
|
5
|
+
module Style
|
6
|
+
# This cop checks for unnecessary conditional expressions.
|
7
|
+
#
|
8
|
+
# @example
|
9
|
+
# # bad
|
10
|
+
# a = b ? b : c
|
11
|
+
#
|
12
|
+
# # good
|
13
|
+
# a = b || c
|
14
|
+
#
|
15
|
+
# @example
|
16
|
+
# # bad
|
17
|
+
# if b
|
18
|
+
# b
|
19
|
+
# else
|
20
|
+
# c
|
21
|
+
# end
|
22
|
+
#
|
23
|
+
# # good
|
24
|
+
# b || c
|
25
|
+
class UnneededCondition < Cop
|
26
|
+
include RangeHelp
|
27
|
+
|
28
|
+
MSG = 'Use double pipes `||` instead.'.freeze
|
29
|
+
|
30
|
+
def on_if(node)
|
31
|
+
return unless offense?(node)
|
32
|
+
add_offense(node, location: range_of_offense(node))
|
33
|
+
end
|
34
|
+
|
35
|
+
def autocorrect(node)
|
36
|
+
lambda do |corrector|
|
37
|
+
if node.ternary?
|
38
|
+
corrector.replace(range_of_offense(node), '||')
|
39
|
+
else
|
40
|
+
corrected = [node.if_branch.source,
|
41
|
+
else_source(node.else_branch)].join(' || ')
|
42
|
+
|
43
|
+
corrector.replace(node.source_range, corrected)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def range_of_offense(node)
|
51
|
+
return :expression unless node.ternary?
|
52
|
+
range_between(node.loc.question.begin_pos, node.loc.colon.end_pos)
|
53
|
+
end
|
54
|
+
|
55
|
+
def offense?(node)
|
56
|
+
condition, if_branch, else_branch = *node
|
57
|
+
|
58
|
+
condition == if_branch && !node.elsif? && (
|
59
|
+
node.ternary? ||
|
60
|
+
!else_branch.instance_of?(AST::Node) ||
|
61
|
+
else_branch.single_line?
|
62
|
+
)
|
63
|
+
end
|
64
|
+
|
65
|
+
def else_source(else_branch)
|
66
|
+
wrap_else = MODIFIER_NODES.include?(else_branch.type) &&
|
67
|
+
else_branch.modifier_form?
|
68
|
+
wrap_else ? "(#{else_branch.source})" : else_branch.source
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -4,6 +4,19 @@ module RuboCop
|
|
4
4
|
module Cop
|
5
5
|
module Style
|
6
6
|
# This cop checks for usage of the %q/%Q syntax when '' or "" would do.
|
7
|
+
#
|
8
|
+
# @example
|
9
|
+
#
|
10
|
+
# # bad
|
11
|
+
# name = %q(Bruce Wayne)
|
12
|
+
# time = %q(8 o'clock)
|
13
|
+
# question = %q("What did you say?")
|
14
|
+
#
|
15
|
+
# # good
|
16
|
+
# name = 'Bruce Wayne'
|
17
|
+
# time = "8 o'clock"
|
18
|
+
# question = '"What did you say?"'
|
19
|
+
#
|
7
20
|
class UnneededPercentQ < Cop
|
8
21
|
MSG = 'Use `%<q_type>s` only for strings that contain both ' \
|
9
22
|
'single quotes and double quotes%<extra>s.'.freeze
|
@@ -82,10 +82,9 @@ module RuboCop
|
|
82
82
|
end
|
83
83
|
|
84
84
|
def process_node(node)
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
end
|
85
|
+
method_name = node_handler_method_name(node)
|
86
|
+
retval = send(method_name, node) if method_name
|
87
|
+
process_children(node) unless retval == :skip_children
|
89
88
|
end
|
90
89
|
|
91
90
|
private
|
@@ -105,34 +104,34 @@ module RuboCop
|
|
105
104
|
end
|
106
105
|
|
107
106
|
def skip_children!
|
108
|
-
|
107
|
+
:skip_children
|
109
108
|
end
|
110
109
|
|
111
110
|
# rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity
|
112
|
-
def
|
111
|
+
def node_handler_method_name(node)
|
113
112
|
case node.type
|
114
113
|
when VARIABLE_ASSIGNMENT_TYPE
|
115
|
-
process_variable_assignment
|
114
|
+
:process_variable_assignment
|
116
115
|
when REGEXP_NAMED_CAPTURE_TYPE
|
117
|
-
process_regexp_named_captures
|
116
|
+
:process_regexp_named_captures
|
118
117
|
when MULTIPLE_ASSIGNMENT_TYPE
|
119
|
-
process_variable_multiple_assignment
|
118
|
+
:process_variable_multiple_assignment
|
120
119
|
when VARIABLE_REFERENCE_TYPE
|
121
|
-
process_variable_referencing
|
120
|
+
:process_variable_referencing
|
122
121
|
when RESCUE_TYPE
|
123
|
-
process_rescue
|
122
|
+
:process_rescue
|
124
123
|
when ZERO_ARITY_SUPER_TYPE
|
125
|
-
process_zero_arity_super
|
124
|
+
:process_zero_arity_super
|
126
125
|
when SEND_TYPE
|
127
|
-
process_send
|
126
|
+
:process_send
|
128
127
|
when *ARGUMENT_DECLARATION_TYPES
|
129
|
-
process_variable_declaration
|
128
|
+
:process_variable_declaration
|
130
129
|
when *OPERATOR_ASSIGNMENT_TYPES
|
131
|
-
process_variable_operator_assignment
|
130
|
+
:process_variable_operator_assignment
|
132
131
|
when *LOOP_TYPES
|
133
|
-
process_loop
|
132
|
+
:process_loop
|
134
133
|
when *SCOPE_TYPES
|
135
|
-
process_scope
|
134
|
+
:process_scope
|
136
135
|
end
|
137
136
|
end
|
138
137
|
# rubocop:enable Metrics/MethodLength, Metrics/CyclomaticComplexity
|
data/lib/rubocop/options.rb
CHANGED
@@ -65,6 +65,7 @@ module RuboCop
|
|
65
65
|
add_severity_option(opts)
|
66
66
|
add_flags_with_optional_args(opts)
|
67
67
|
add_boolean_flags(opts)
|
68
|
+
add_aliases(opts)
|
68
69
|
|
69
70
|
option(opts, '-s', '--stdin FILE')
|
70
71
|
end
|
@@ -142,7 +143,7 @@ module RuboCop
|
|
142
143
|
end
|
143
144
|
end
|
144
145
|
|
145
|
-
def add_boolean_flags(opts)
|
146
|
+
def add_boolean_flags(opts)
|
146
147
|
option(opts, '-F', '--fail-fast')
|
147
148
|
option(opts, '-C', '--cache FLAG')
|
148
149
|
option(opts, '-d', '--debug')
|
@@ -150,10 +151,6 @@ module RuboCop
|
|
150
151
|
option(opts, '-E', '--extra-details')
|
151
152
|
option(opts, '-S', '--display-style-guide')
|
152
153
|
option(opts, '-R', '--rails')
|
153
|
-
option(opts, '-l', '--lint') do
|
154
|
-
@options[:only] ||= []
|
155
|
-
@options[:only] << 'Lint'
|
156
|
-
end
|
157
154
|
option(opts, '-a', '--auto-correct')
|
158
155
|
|
159
156
|
option(opts, '--[no-]color')
|
@@ -163,6 +160,18 @@ module RuboCop
|
|
163
160
|
option(opts, '-P', '--parallel')
|
164
161
|
end
|
165
162
|
|
163
|
+
def add_aliases(opts)
|
164
|
+
option(opts, '-l', '--lint') do
|
165
|
+
@options[:only] ||= []
|
166
|
+
@options[:only] << 'Lint'
|
167
|
+
end
|
168
|
+
option(opts, '-x', '--fix-layout') do
|
169
|
+
@options[:only] ||= []
|
170
|
+
@options[:only] << 'Layout'
|
171
|
+
@options[:auto_correct] = true
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
166
175
|
def add_list_options(opts)
|
167
176
|
option(opts, '-L', '--list-target-files')
|
168
177
|
end
|
@@ -379,6 +388,7 @@ module RuboCop
|
|
379
388
|
lint: 'Run only lint cops.',
|
380
389
|
list_target_files: 'List all files RuboCop will inspect.',
|
381
390
|
auto_correct: 'Auto-correct offenses.',
|
391
|
+
fix_layout: 'Run only layout cops, with auto-correct on.',
|
382
392
|
color: 'Force color output on or off.',
|
383
393
|
version: 'Display version.',
|
384
394
|
verbose_version: 'Display verbose version.',
|
data/lib/rubocop/result_cache.rb
CHANGED
@@ -155,12 +155,12 @@ module RuboCop
|
|
155
155
|
ResultCache.source_checksum ||=
|
156
156
|
begin
|
157
157
|
lib_root = File.join(File.dirname(__FILE__), '..')
|
158
|
-
|
158
|
+
exe_root = File.join(lib_root, '..', 'exe')
|
159
159
|
|
160
160
|
# These are all the files we have `require`d plus everything in the
|
161
|
-
#
|
161
|
+
# exe directory. A change to any of them could affect the cop output
|
162
162
|
# so we include them in the cache hash.
|
163
|
-
source_files = $LOADED_FEATURES + Find.find(
|
163
|
+
source_files = $LOADED_FEATURES + Find.find(exe_root).to_a
|
164
164
|
sources = source_files
|
165
165
|
.select { |path| File.file?(path) }
|
166
166
|
.sort
|
data/lib/rubocop/string_util.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'jaro_winkler'
|
4
|
+
|
3
5
|
module RuboCop
|
4
6
|
# This module provides approximate string matching methods.
|
5
7
|
module StringUtil
|
@@ -8,152 +10,5 @@ module RuboCop
|
|
8
10
|
def similarity(string_a, string_b)
|
9
11
|
JaroWinkler.distance(string_a.to_s, string_b.to_s)
|
10
12
|
end
|
11
|
-
|
12
|
-
# This class computes Jaro distance, which is a measure of similarity
|
13
|
-
# between two strings.
|
14
|
-
class Jaro
|
15
|
-
attr_reader :shorter, :longer
|
16
|
-
|
17
|
-
def self.distance(*args)
|
18
|
-
new(*args).distance
|
19
|
-
end
|
20
|
-
|
21
|
-
def initialize(string_a, string_b)
|
22
|
-
if string_a.size < string_b.size
|
23
|
-
@shorter = string_a
|
24
|
-
@longer = string_b
|
25
|
-
else
|
26
|
-
@shorter = string_b
|
27
|
-
@longer = string_a
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def distance
|
32
|
-
@distance ||= compute_distance
|
33
|
-
end
|
34
|
-
|
35
|
-
private
|
36
|
-
|
37
|
-
def compute_distance
|
38
|
-
common_chars_a, common_chars_b = find_common_characters
|
39
|
-
matched_count = common_chars_a.size
|
40
|
-
|
41
|
-
return 0.0 if matched_count.zero?
|
42
|
-
|
43
|
-
transposition_count =
|
44
|
-
count_transpositions(common_chars_a, common_chars_b)
|
45
|
-
|
46
|
-
compute_non_zero_distance(matched_count.to_f, transposition_count)
|
47
|
-
end
|
48
|
-
|
49
|
-
# rubocop:disable Metrics/AbcSize
|
50
|
-
def find_common_characters
|
51
|
-
common_chars_of_shorter = Array.new(shorter.size)
|
52
|
-
common_chars_of_longer = Array.new(longer.size)
|
53
|
-
|
54
|
-
shorter.each_char.with_index do |shorter_char, shorter_index|
|
55
|
-
matching_index_range(shorter_index).each do |longer_index|
|
56
|
-
longer_char = longer.chars[longer_index]
|
57
|
-
|
58
|
-
next unless shorter_char == longer_char
|
59
|
-
|
60
|
-
common_chars_of_shorter[shorter_index] = shorter_char
|
61
|
-
common_chars_of_longer[longer_index] = longer_char
|
62
|
-
|
63
|
-
# Mark the matching character as already used
|
64
|
-
longer.chars[longer_index] = nil
|
65
|
-
|
66
|
-
break
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
[common_chars_of_shorter, common_chars_of_longer].map(&:compact)
|
71
|
-
end
|
72
|
-
# rubocop:enable Metrics/AbcSize
|
73
|
-
|
74
|
-
def count_transpositions(common_chars_a, common_chars_b)
|
75
|
-
common_chars_a.size.times.count do |index|
|
76
|
-
common_chars_a[index] != common_chars_b[index]
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
def compute_non_zero_distance(matched_count, transposition_count)
|
81
|
-
sum = (matched_count / shorter.size.to_f) +
|
82
|
-
(matched_count / longer.size.to_f) +
|
83
|
-
((matched_count - transposition_count / 2) / matched_count)
|
84
|
-
|
85
|
-
sum / 3.0
|
86
|
-
end
|
87
|
-
|
88
|
-
def matching_index_range(origin)
|
89
|
-
min = origin - matching_window
|
90
|
-
min = 0 if min < 0
|
91
|
-
|
92
|
-
max = origin + matching_window
|
93
|
-
|
94
|
-
min..max
|
95
|
-
end
|
96
|
-
|
97
|
-
def matching_window
|
98
|
-
@matching_window ||= (longer.size / 2) - 1
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
# This class computes Jaro-Winkler distance, which adds prefix-matching
|
103
|
-
# bonus to Jaro distance.
|
104
|
-
class JaroWinkler < Jaro
|
105
|
-
# Add the prefix bonus only when the Jaro distance is above this value.
|
106
|
-
# In other words, if the Jaro distance is less than this value,
|
107
|
-
# JaroWinkler.distance returns the raw Jaro distance.
|
108
|
-
DEFAULT_BOOST_THRESHOLD = 0.7
|
109
|
-
|
110
|
-
# How much the prefix bonus is weighted.
|
111
|
-
# This should not exceed 0.25.
|
112
|
-
DEFAULT_SCALING_FACTOR = 0.1
|
113
|
-
|
114
|
-
# Cutoff the common prefix length to this value if it's longer than this.
|
115
|
-
MAX_COMMON_PREFIX_LENGTH = 4
|
116
|
-
|
117
|
-
attr_reader :boost_threshold, :scaling_factor
|
118
|
-
|
119
|
-
def initialize(string_a, string_b,
|
120
|
-
boost_threshold = nil, scaling_factor = nil)
|
121
|
-
super(string_a, string_b)
|
122
|
-
@boost_threshold = boost_threshold || DEFAULT_BOOST_THRESHOLD
|
123
|
-
@scaling_factor = scaling_factor || DEFAULT_SCALING_FACTOR
|
124
|
-
end
|
125
|
-
|
126
|
-
private
|
127
|
-
|
128
|
-
def compute_distance
|
129
|
-
jaro_distance = super
|
130
|
-
|
131
|
-
if jaro_distance >= boost_threshold
|
132
|
-
bonus = limited_common_prefix_length.to_f * scaling_factor.to_f *
|
133
|
-
(1.0 - jaro_distance)
|
134
|
-
jaro_distance + bonus
|
135
|
-
else
|
136
|
-
jaro_distance
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
def limited_common_prefix_length
|
141
|
-
length = common_prefix_length
|
142
|
-
|
143
|
-
if length > MAX_COMMON_PREFIX_LENGTH
|
144
|
-
MAX_COMMON_PREFIX_LENGTH
|
145
|
-
else
|
146
|
-
length
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
def common_prefix_length
|
151
|
-
shorter.size.times do |index|
|
152
|
-
return index unless shorter[index] == longer[index]
|
153
|
-
end
|
154
|
-
|
155
|
-
shorter.size
|
156
|
-
end
|
157
|
-
end
|
158
13
|
end
|
159
14
|
end
|