hone 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.standard.yml +8 -0
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +201 -0
- data/Rakefile +10 -0
- data/examples/.hone/harness.rb +41 -0
- data/examples/README.md +22 -0
- data/examples/allocation_patterns.rb +66 -0
- data/examples/cpu_patterns.rb +50 -0
- data/examples/jit_patterns.rb +69 -0
- data/exe/hone +7 -0
- data/lib/hone/adapters/base.rb +35 -0
- data/lib/hone/adapters/fasterer.rb +38 -0
- data/lib/hone/adapters/rubocop_performance.rb +85 -0
- data/lib/hone/analyzer.rb +258 -0
- data/lib/hone/cli.rb +247 -0
- data/lib/hone/config.rb +93 -0
- data/lib/hone/correlator.rb +250 -0
- data/lib/hone/exit_codes.rb +10 -0
- data/lib/hone/finding.rb +64 -0
- data/lib/hone/finding_filter.rb +57 -0
- data/lib/hone/formatters/base.rb +25 -0
- data/lib/hone/formatters/filterable.rb +31 -0
- data/lib/hone/formatters/github.rb +71 -0
- data/lib/hone/formatters/json.rb +75 -0
- data/lib/hone/formatters/junit.rb +154 -0
- data/lib/hone/formatters/sarif.rb +179 -0
- data/lib/hone/formatters/tsv.rb +49 -0
- data/lib/hone/harness.rb +57 -0
- data/lib/hone/harness_generator.rb +128 -0
- data/lib/hone/harness_runner.rb +172 -0
- data/lib/hone/method_map.rb +140 -0
- data/lib/hone/patterns/README.md +174 -0
- data/lib/hone/patterns/array_compact.rb +105 -0
- data/lib/hone/patterns/array_include_set.rb +34 -0
- data/lib/hone/patterns/base.rb +90 -0
- data/lib/hone/patterns/block_to_proc.rb +109 -0
- data/lib/hone/patterns/bsearch_vs_find.rb +80 -0
- data/lib/hone/patterns/chars_map_ord.rb +42 -0
- data/lib/hone/patterns/chars_to_variable.rb +136 -0
- data/lib/hone/patterns/chars_to_variable_tainted.rb +136 -0
- data/lib/hone/patterns/constant_regexp.rb +74 -0
- data/lib/hone/patterns/count_vs_size.rb +35 -0
- data/lib/hone/patterns/divmod.rb +92 -0
- data/lib/hone/patterns/dynamic_ivar.rb +44 -0
- data/lib/hone/patterns/dynamic_ivar_get.rb +33 -0
- data/lib/hone/patterns/each_with_index.rb +116 -0
- data/lib/hone/patterns/each_with_object.rb +63 -0
- data/lib/hone/patterns/flatten_once.rb +28 -0
- data/lib/hone/patterns/gsub_to_tr.rb +48 -0
- data/lib/hone/patterns/hash_each_key.rb +41 -0
- data/lib/hone/patterns/hash_each_value.rb +31 -0
- data/lib/hone/patterns/hash_keys_include.rb +30 -0
- data/lib/hone/patterns/hash_merge_bang.rb +33 -0
- data/lib/hone/patterns/hash_values_include.rb +31 -0
- data/lib/hone/patterns/inject_sum.rb +48 -0
- data/lib/hone/patterns/kernel_loop.rb +27 -0
- data/lib/hone/patterns/lazy_ivar.rb +39 -0
- data/lib/hone/patterns/map_compact.rb +32 -0
- data/lib/hone/patterns/map_flatten.rb +31 -0
- data/lib/hone/patterns/map_select_chain.rb +32 -0
- data/lib/hone/patterns/parallel_assignment.rb +127 -0
- data/lib/hone/patterns/positive_predicate.rb +27 -0
- data/lib/hone/patterns/range_include.rb +34 -0
- data/lib/hone/patterns/redundant_string_chars.rb +82 -0
- data/lib/hone/patterns/regexp_match.rb +126 -0
- data/lib/hone/patterns/reverse_each.rb +30 -0
- data/lib/hone/patterns/reverse_first.rb +40 -0
- data/lib/hone/patterns/select_count.rb +32 -0
- data/lib/hone/patterns/select_first.rb +31 -0
- data/lib/hone/patterns/select_map.rb +32 -0
- data/lib/hone/patterns/shuffle_first.rb +30 -0
- data/lib/hone/patterns/slice_with_length.rb +48 -0
- data/lib/hone/patterns/sort_by_first.rb +31 -0
- data/lib/hone/patterns/sort_by_last.rb +31 -0
- data/lib/hone/patterns/sort_first.rb +52 -0
- data/lib/hone/patterns/sort_last.rb +30 -0
- data/lib/hone/patterns/sort_reverse.rb +53 -0
- data/lib/hone/patterns/string_casecmp.rb +54 -0
- data/lib/hone/patterns/string_chars_each.rb +56 -0
- data/lib/hone/patterns/string_concat_in_loop.rb +116 -0
- data/lib/hone/patterns/string_delete_prefix.rb +53 -0
- data/lib/hone/patterns/string_delete_suffix.rb +53 -0
- data/lib/hone/patterns/string_empty.rb +64 -0
- data/lib/hone/patterns/string_end_with.rb +81 -0
- data/lib/hone/patterns/string_shovel.rb +75 -0
- data/lib/hone/patterns/string_start_with.rb +80 -0
- data/lib/hone/patterns/taint_tracking_base.rb +230 -0
- data/lib/hone/patterns/times_map.rb +38 -0
- data/lib/hone/patterns/uniq_by.rb +32 -0
- data/lib/hone/patterns/yield_vs_block.rb +72 -0
- data/lib/hone/profilers/base.rb +162 -0
- data/lib/hone/profilers/factory.rb +31 -0
- data/lib/hone/profilers/memory_profiler.rb +213 -0
- data/lib/hone/profilers/stackprof.rb +99 -0
- data/lib/hone/profilers/vernier.rb +147 -0
- data/lib/hone/reporter.rb +371 -0
- data/lib/hone/scanner.rb +75 -0
- data/lib/hone/suggestion_generator.rb +23 -0
- data/lib/hone/version.rb +5 -0
- data/lib/hone.rb +108 -0
- data/logo.png +0 -0
- data/sig/hone.rbs +4 -0
- metadata +176 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Hone
|
|
4
|
+
module Patterns
|
|
5
|
+
# Pattern: array.map { |x| x.to_s } -> array.map(&:to_s)
|
|
6
|
+
#
|
|
7
|
+
# When a block simply calls a single method on its parameter with no
|
|
8
|
+
# arguments, the Symbol#to_proc shorthand is more idiomatic and slightly
|
|
9
|
+
# more efficient.
|
|
10
|
+
#
|
|
11
|
+
# Examples:
|
|
12
|
+
# # Bad: verbose block
|
|
13
|
+
# array.map { |x| x.to_s }
|
|
14
|
+
# array.select { |item| item.valid? }
|
|
15
|
+
# # Good: Symbol#to_proc shorthand
|
|
16
|
+
# array.map(&:to_s)
|
|
17
|
+
# array.select(&:valid?)
|
|
18
|
+
class BlockToProc < Base
|
|
19
|
+
self.pattern_id = :block_to_proc
|
|
20
|
+
self.optimization_type = :cpu
|
|
21
|
+
|
|
22
|
+
# Methods that commonly take blocks and can use Symbol#to_proc
|
|
23
|
+
APPLICABLE_METHODS = %i[
|
|
24
|
+
map collect select find_all reject detect find
|
|
25
|
+
any? all? none? one? sort_by group_by partition
|
|
26
|
+
max_by min_by minmax_by count take_while drop_while
|
|
27
|
+
filter filter_map
|
|
28
|
+
].freeze
|
|
29
|
+
|
|
30
|
+
def visit_call_node(node)
|
|
31
|
+
super
|
|
32
|
+
|
|
33
|
+
return unless APPLICABLE_METHODS.include?(node.name)
|
|
34
|
+
return unless node.block.is_a?(Prism::BlockNode)
|
|
35
|
+
|
|
36
|
+
block = node.block
|
|
37
|
+
return unless single_param_block?(block)
|
|
38
|
+
return unless block_body_is_single_method_call?(block)
|
|
39
|
+
|
|
40
|
+
param_name = extract_single_param_name(block)
|
|
41
|
+
method_name = extract_called_method_name(block)
|
|
42
|
+
|
|
43
|
+
return unless param_name && method_name
|
|
44
|
+
|
|
45
|
+
add_finding(
|
|
46
|
+
node,
|
|
47
|
+
message: "Use `.#{node.name}(&:#{method_name})` instead of `.#{node.name} { |#{param_name}| #{param_name}.#{method_name} }` for Symbol#to_proc shorthand",
|
|
48
|
+
speedup: "Minor, but more idiomatic Ruby"
|
|
49
|
+
)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private
|
|
53
|
+
|
|
54
|
+
def single_param_block?(block)
|
|
55
|
+
parameters = block.parameters
|
|
56
|
+
return false unless parameters.is_a?(Prism::BlockParametersNode)
|
|
57
|
+
return false unless parameters.parameters.is_a?(Prism::ParametersNode)
|
|
58
|
+
|
|
59
|
+
params = parameters.parameters
|
|
60
|
+
params.requireds.size == 1 &&
|
|
61
|
+
params.optionals.empty? &&
|
|
62
|
+
params.rest.nil? &&
|
|
63
|
+
params.keywords.empty? &&
|
|
64
|
+
params.keyword_rest.nil? &&
|
|
65
|
+
params.block.nil?
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def block_body_is_single_method_call?(block)
|
|
69
|
+
body = block.body
|
|
70
|
+
return false unless body.is_a?(Prism::StatementsNode)
|
|
71
|
+
return false unless body.body.size == 1
|
|
72
|
+
|
|
73
|
+
statement = body.body.first
|
|
74
|
+
return false unless statement.is_a?(Prism::CallNode)
|
|
75
|
+
|
|
76
|
+
# The receiver should be a local variable read matching the block param
|
|
77
|
+
receiver = statement.receiver
|
|
78
|
+
return false unless receiver.is_a?(Prism::LocalVariableReadNode)
|
|
79
|
+
|
|
80
|
+
# The call should have no arguments
|
|
81
|
+
return false if statement.arguments && !statement.arguments.arguments.empty?
|
|
82
|
+
|
|
83
|
+
# The call should not have its own block
|
|
84
|
+
return false if statement.block
|
|
85
|
+
|
|
86
|
+
true
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def extract_single_param_name(block)
|
|
90
|
+
param = block.parameters.parameters.requireds.first
|
|
91
|
+
case param
|
|
92
|
+
when Prism::RequiredParameterNode
|
|
93
|
+
param.name
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def extract_called_method_name(block)
|
|
98
|
+
statement = block.body.body.first
|
|
99
|
+
receiver = statement.receiver
|
|
100
|
+
|
|
101
|
+
# Verify the receiver matches the block parameter
|
|
102
|
+
param_name = extract_single_param_name(block)
|
|
103
|
+
return nil unless receiver.name == param_name
|
|
104
|
+
|
|
105
|
+
statement.name
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Hone
|
|
4
|
+
module Patterns
|
|
5
|
+
# Pattern: sorted_array.find { |x| x >= target } -> sorted_array.bsearch { |x| x >= target }
|
|
6
|
+
#
|
|
7
|
+
# When searching sorted data for the first element matching a comparison,
|
|
8
|
+
# bsearch uses binary search (O(log n)) vs find's linear search (O(n)).
|
|
9
|
+
#
|
|
10
|
+
# This pattern is conservative and only reports when it detects:
|
|
11
|
+
# - find with a block containing >= or > comparison
|
|
12
|
+
# - The receiver name contains hints like "sorted" or common sorted collection names
|
|
13
|
+
class BsearchVsFind < Base
|
|
14
|
+
self.pattern_id = :bsearch_vs_find
|
|
15
|
+
self.optimization_type = :cpu
|
|
16
|
+
|
|
17
|
+
# Names that suggest the array is sorted
|
|
18
|
+
SORTED_HINTS = %w[sorted ordered ranked].freeze
|
|
19
|
+
|
|
20
|
+
def visit_call_node(node)
|
|
21
|
+
super
|
|
22
|
+
|
|
23
|
+
return unless node.name == :find
|
|
24
|
+
return unless block_attached?(node)
|
|
25
|
+
|
|
26
|
+
block = node.block
|
|
27
|
+
return unless block.is_a?(Prism::BlockNode)
|
|
28
|
+
return unless comparison_block?(block)
|
|
29
|
+
return unless likely_sorted_receiver?(node.receiver)
|
|
30
|
+
|
|
31
|
+
add_finding(
|
|
32
|
+
node,
|
|
33
|
+
message: "Consider `.bsearch { }` instead of `.find { }` for O(log n) search on sorted data",
|
|
34
|
+
speedup: "O(log n) vs O(n) for sorted data"
|
|
35
|
+
)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
# Check if block body contains a >= or > comparison
|
|
41
|
+
def comparison_block?(block)
|
|
42
|
+
body = block.body
|
|
43
|
+
return false unless body.is_a?(Prism::StatementsNode)
|
|
44
|
+
return false unless body.body.size == 1
|
|
45
|
+
|
|
46
|
+
statement = body.body.first
|
|
47
|
+
comparison_expression?(statement)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def comparison_expression?(node)
|
|
51
|
+
return false unless node.is_a?(Prism::CallNode)
|
|
52
|
+
|
|
53
|
+
%i[>= > <= <].include?(node.name)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Conservative check: only flag if receiver name hints at sorted data
|
|
57
|
+
def likely_sorted_receiver?(receiver)
|
|
58
|
+
return false unless receiver
|
|
59
|
+
|
|
60
|
+
name = extract_receiver_name(receiver)
|
|
61
|
+
return false unless name
|
|
62
|
+
|
|
63
|
+
name_str = name.to_s.downcase
|
|
64
|
+
SORTED_HINTS.any? { |hint| name_str.include?(hint) }
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def extract_receiver_name(node)
|
|
68
|
+
case node
|
|
69
|
+
when Prism::LocalVariableReadNode
|
|
70
|
+
node.name
|
|
71
|
+
when Prism::InstanceVariableReadNode
|
|
72
|
+
node.name.to_s.delete_prefix("@")
|
|
73
|
+
when Prism::CallNode
|
|
74
|
+
# For method calls like foo.bar, use the method name
|
|
75
|
+
node.name
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Hone
|
|
4
|
+
module Patterns
|
|
5
|
+
# Pattern: str.chars.map(&:ord) -> str.codepoints
|
|
6
|
+
#
|
|
7
|
+
# The .chars.map(&:ord) chain creates an intermediate array of single-char
|
|
8
|
+
# strings, then maps each to its ordinal. Using .codepoints directly is
|
|
9
|
+
# faster and allocates less memory.
|
|
10
|
+
#
|
|
11
|
+
# From sqids-ruby commit 9413b68
|
|
12
|
+
class CharsMapOrd < Base
|
|
13
|
+
self.pattern_id = :chars_map_ord
|
|
14
|
+
self.optimization_type = :allocation
|
|
15
|
+
|
|
16
|
+
def visit_call_node(node)
|
|
17
|
+
super
|
|
18
|
+
# Look for: .map(&:ord) where receiver is .chars
|
|
19
|
+
return unless node.name == :map && block_arg_is_symbol?(node, :ord)
|
|
20
|
+
|
|
21
|
+
receiver = node.receiver
|
|
22
|
+
return unless receiver.is_a?(Prism::CallNode) && receiver.name == :chars
|
|
23
|
+
|
|
24
|
+
add_finding(
|
|
25
|
+
node,
|
|
26
|
+
message: "Use `.codepoints` instead of `.chars.map(&:ord)` to avoid intermediate array allocation",
|
|
27
|
+
speedup: "Fewer allocations"
|
|
28
|
+
)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
def block_arg_is_symbol?(call_node, sym_name)
|
|
34
|
+
block_arg = call_node.block
|
|
35
|
+
return false unless block_arg.is_a?(Prism::BlockArgumentNode)
|
|
36
|
+
|
|
37
|
+
expr = block_arg.expression
|
|
38
|
+
expr.is_a?(Prism::SymbolNode) && expr.value == sym_name.to_s
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Hone
|
|
4
|
+
module Patterns
|
|
5
|
+
# Approach 1: Simple scope-limited variable tracking
|
|
6
|
+
#
|
|
7
|
+
# Detects when .chars is assigned to a variable and then used in ways
|
|
8
|
+
# that could be done directly on the string.
|
|
9
|
+
#
|
|
10
|
+
# @example Bad - allocates array just for indexing
|
|
11
|
+
# chars = str.chars
|
|
12
|
+
# chars[0]
|
|
13
|
+
# chars.length
|
|
14
|
+
# chars.each { |c| ... }
|
|
15
|
+
#
|
|
16
|
+
# @example Good - direct string operations
|
|
17
|
+
# str[0]
|
|
18
|
+
# str.length
|
|
19
|
+
# str.each_char { |c| ... }
|
|
20
|
+
#
|
|
21
|
+
class CharsToVariable < Base
|
|
22
|
+
self.pattern_id = :chars_to_variable
|
|
23
|
+
self.optimization_type = :allocation
|
|
24
|
+
|
|
25
|
+
def initialize(file_path)
|
|
26
|
+
super
|
|
27
|
+
@scope_stack = []
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Track method scope
|
|
31
|
+
def visit_def_node(node)
|
|
32
|
+
with_scope { super }
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Track block scope
|
|
36
|
+
def visit_block_node(node)
|
|
37
|
+
with_scope { super }
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Track lambda scope
|
|
41
|
+
def visit_lambda_node(node)
|
|
42
|
+
with_scope { super }
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Track .chars assignments
|
|
46
|
+
def visit_local_variable_write_node(node)
|
|
47
|
+
super
|
|
48
|
+
track_chars_assignment(node)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Check for inefficient usage of tracked variables
|
|
52
|
+
def visit_call_node(node)
|
|
53
|
+
super
|
|
54
|
+
check_inefficient_usage(node)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
private
|
|
58
|
+
|
|
59
|
+
def with_scope
|
|
60
|
+
@scope_stack.push({})
|
|
61
|
+
yield
|
|
62
|
+
ensure
|
|
63
|
+
@scope_stack.pop
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def current_scope
|
|
67
|
+
@scope_stack.last || {}
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def track_chars_assignment(node)
|
|
71
|
+
return unless node.value.is_a?(Prism::CallNode)
|
|
72
|
+
return unless node.value.name == :chars
|
|
73
|
+
return if node.value.arguments&.arguments&.any? # chars with args is different
|
|
74
|
+
|
|
75
|
+
source_receiver = node.value.receiver
|
|
76
|
+
return unless source_receiver # Need to know what .chars was called on
|
|
77
|
+
|
|
78
|
+
current_scope[node.name] = {
|
|
79
|
+
source_code: source_receiver.slice,
|
|
80
|
+
assignment_line: node.location.start_line
|
|
81
|
+
}
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def check_inefficient_usage(node)
|
|
85
|
+
return unless node.receiver.is_a?(Prism::LocalVariableReadNode)
|
|
86
|
+
|
|
87
|
+
var_name = node.receiver.name
|
|
88
|
+
info = current_scope[var_name]
|
|
89
|
+
return unless info
|
|
90
|
+
|
|
91
|
+
source = info[:source_code]
|
|
92
|
+
|
|
93
|
+
case node.name
|
|
94
|
+
when :[]
|
|
95
|
+
add_finding(
|
|
96
|
+
node,
|
|
97
|
+
message: "Use `#{source}[...]` directly instead of `.chars` variable indexing",
|
|
98
|
+
speedup: "Avoids allocating array of all characters"
|
|
99
|
+
)
|
|
100
|
+
when :length, :size
|
|
101
|
+
add_finding(
|
|
102
|
+
node,
|
|
103
|
+
message: "Use `#{source}.length` directly instead of `.chars.length`",
|
|
104
|
+
speedup: "Avoids allocating array of all characters"
|
|
105
|
+
)
|
|
106
|
+
when :each
|
|
107
|
+
return unless block_attached?(node)
|
|
108
|
+
|
|
109
|
+
add_finding(
|
|
110
|
+
node,
|
|
111
|
+
message: "Use `#{source}.each_char { }` instead of `.chars.each { }`",
|
|
112
|
+
speedup: "No intermediate array allocation"
|
|
113
|
+
)
|
|
114
|
+
when :first
|
|
115
|
+
add_finding(
|
|
116
|
+
node,
|
|
117
|
+
message: "Use `#{source}[0]` instead of `.chars.first`",
|
|
118
|
+
speedup: "Avoids allocating array of all characters"
|
|
119
|
+
)
|
|
120
|
+
when :last
|
|
121
|
+
add_finding(
|
|
122
|
+
node,
|
|
123
|
+
message: "Use `#{source}[-1]` instead of `.chars.last`",
|
|
124
|
+
speedup: "Avoids allocating array of all characters"
|
|
125
|
+
)
|
|
126
|
+
when :include?
|
|
127
|
+
add_finding(
|
|
128
|
+
node,
|
|
129
|
+
message: "Use `#{source}.include?(...)` directly on string",
|
|
130
|
+
speedup: "String#include? works without array allocation"
|
|
131
|
+
)
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Hone
|
|
4
|
+
module Patterns
|
|
5
|
+
# Approach 2: Taint tracking version of CharsToVariable
|
|
6
|
+
#
|
|
7
|
+
# Same detection as CharsToVariable but using the TaintTrackingBase
|
|
8
|
+
# infrastructure. This allows comparison of both approaches.
|
|
9
|
+
#
|
|
10
|
+
# Advantages over simple approach:
|
|
11
|
+
# - Handles variable aliasing (x = chars; y = x; y[0])
|
|
12
|
+
# - Handles instance variables (@chars = str.chars)
|
|
13
|
+
# - Cleaner separation of concerns
|
|
14
|
+
# - Easier to extend for other patterns
|
|
15
|
+
#
|
|
16
|
+
# @example Detects aliased usage
|
|
17
|
+
# chars = str.chars
|
|
18
|
+
# x = chars # Taint propagates to x
|
|
19
|
+
# x[0] # Still flagged!
|
|
20
|
+
#
|
|
21
|
+
class CharsToVariableTainted < TaintTrackingBase
|
|
22
|
+
self.pattern_id = :chars_to_variable_tainted
|
|
23
|
+
self.optimization_type = :allocation
|
|
24
|
+
|
|
25
|
+
protected
|
|
26
|
+
|
|
27
|
+
# Define what creates a taint: .chars calls
|
|
28
|
+
def taint_from_call(call_node)
|
|
29
|
+
return nil unless call_node.name == :chars
|
|
30
|
+
return nil if call_node.arguments&.arguments&.any?
|
|
31
|
+
|
|
32
|
+
source_receiver = call_node.receiver
|
|
33
|
+
return nil unless source_receiver
|
|
34
|
+
|
|
35
|
+
{
|
|
36
|
+
type: :chars_array,
|
|
37
|
+
source: source_receiver,
|
|
38
|
+
source_code: source_receiver.slice,
|
|
39
|
+
origin_line: call_node.location.start_line,
|
|
40
|
+
metadata: {}
|
|
41
|
+
}
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Check for problematic uses of tainted variables
|
|
45
|
+
def check_tainted_usage(call_node, var_name, taint_info)
|
|
46
|
+
return unless taint_info.type == :chars_array
|
|
47
|
+
|
|
48
|
+
source = taint_info.source_code
|
|
49
|
+
method_name = call_node.name
|
|
50
|
+
|
|
51
|
+
case method_name
|
|
52
|
+
when :[]
|
|
53
|
+
report_indexing(call_node, source)
|
|
54
|
+
when :length, :size
|
|
55
|
+
report_length(call_node, source)
|
|
56
|
+
when :each
|
|
57
|
+
report_each(call_node, source) if block_attached?(call_node)
|
|
58
|
+
when :first
|
|
59
|
+
report_first(call_node, source)
|
|
60
|
+
when :last
|
|
61
|
+
report_last(call_node, source)
|
|
62
|
+
when :include?
|
|
63
|
+
report_include(call_node, source)
|
|
64
|
+
when :map
|
|
65
|
+
report_map(call_node, source, var_name) if block_attached?(call_node)
|
|
66
|
+
when :join
|
|
67
|
+
# join actually needs the array - mark this usage as "necessary"
|
|
68
|
+
# In a more sophisticated version, we'd track this and not report
|
|
69
|
+
# the assignment if there's at least one necessary usage
|
|
70
|
+
nil
|
|
71
|
+
when :reverse, :sort, :shuffle, :sample
|
|
72
|
+
# These operations need the array
|
|
73
|
+
nil
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
def report_indexing(node, source)
|
|
80
|
+
add_finding(
|
|
81
|
+
node,
|
|
82
|
+
message: "Use `#{source}[...]` directly instead of `.chars` variable indexing",
|
|
83
|
+
speedup: "Avoids allocating array of all characters"
|
|
84
|
+
)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def report_length(node, source)
|
|
88
|
+
add_finding(
|
|
89
|
+
node,
|
|
90
|
+
message: "Use `#{source}.length` directly instead of `.chars.length`",
|
|
91
|
+
speedup: "Avoids allocating array of all characters"
|
|
92
|
+
)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def report_each(node, source)
|
|
96
|
+
add_finding(
|
|
97
|
+
node,
|
|
98
|
+
message: "Use `#{source}.each_char { }` instead of `.chars.each { }`",
|
|
99
|
+
speedup: "No intermediate array allocation"
|
|
100
|
+
)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def report_first(node, source)
|
|
104
|
+
add_finding(
|
|
105
|
+
node,
|
|
106
|
+
message: "Use `#{source}[0]` instead of `.chars.first`",
|
|
107
|
+
speedup: "Avoids allocating array of all characters"
|
|
108
|
+
)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def report_last(node, source)
|
|
112
|
+
add_finding(
|
|
113
|
+
node,
|
|
114
|
+
message: "Use `#{source}[-1]` instead of `.chars.last`",
|
|
115
|
+
speedup: "Avoids allocating array of all characters"
|
|
116
|
+
)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def report_include(node, source)
|
|
120
|
+
add_finding(
|
|
121
|
+
node,
|
|
122
|
+
message: "Use `#{source}.include?(...)` directly on string",
|
|
123
|
+
speedup: "String#include? works without array allocation"
|
|
124
|
+
)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def report_map(node, source, var_name)
|
|
128
|
+
add_finding(
|
|
129
|
+
node,
|
|
130
|
+
message: "Consider `#{source}.each_char.map { }` instead of `#{var_name}.map { }`",
|
|
131
|
+
speedup: "Uses lazy enumerator, may reduce allocations"
|
|
132
|
+
)
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Hone
|
|
4
|
+
module Patterns
|
|
5
|
+
# Pattern: /pattern/ inside method -> extract to constant
|
|
6
|
+
#
|
|
7
|
+
# Regexp literals are recompiled each time the code is executed.
|
|
8
|
+
# Extracting to a constant compiles the regexp once at load time.
|
|
9
|
+
#
|
|
10
|
+
# Example:
|
|
11
|
+
# # Bad - recompiles regexp on each call
|
|
12
|
+
# def process(str)
|
|
13
|
+
# str.gsub(/\s+/, ' ')
|
|
14
|
+
# end
|
|
15
|
+
#
|
|
16
|
+
# # Good - compiled once at load time
|
|
17
|
+
# WHITESPACE = /\s+/
|
|
18
|
+
# def process(str)
|
|
19
|
+
# str.gsub(WHITESPACE, ' ')
|
|
20
|
+
# end
|
|
21
|
+
#
|
|
22
|
+
# Note: Only flags regexps without interpolation, as interpolated regexps
|
|
23
|
+
# may need to be dynamic.
|
|
24
|
+
class ConstantRegexp < Base
|
|
25
|
+
self.pattern_id = :constant_regexp
|
|
26
|
+
self.optimization_type = :allocation
|
|
27
|
+
|
|
28
|
+
def initialize(file_path)
|
|
29
|
+
super
|
|
30
|
+
@in_method = false
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Track when we're inside a method definition
|
|
34
|
+
def visit_def_node(node)
|
|
35
|
+
with_context(:@in_method, true) { super }
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Detect static regexp literals inside methods
|
|
39
|
+
def visit_regular_expression_node(node)
|
|
40
|
+
super
|
|
41
|
+
|
|
42
|
+
return unless @in_method
|
|
43
|
+
|
|
44
|
+
# Skip if the regexp is very short/simple - the overhead is minimal
|
|
45
|
+
# and extracting trivially small regexps hurts readability
|
|
46
|
+
content = node.content
|
|
47
|
+
return if content.length < 3
|
|
48
|
+
|
|
49
|
+
add_finding(
|
|
50
|
+
node,
|
|
51
|
+
message: "Consider extracting regexp `/#{escape_for_message(content)}/` to a constant",
|
|
52
|
+
speedup: "Avoids recompiling regexp on each call"
|
|
53
|
+
)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Skip interpolated regexps as they may need to be dynamic
|
|
57
|
+
def visit_interpolated_regular_expression_node(node)
|
|
58
|
+
# Don't call super - we intentionally don't flag interpolated regexps
|
|
59
|
+
# as they often need to be dynamic
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def escape_for_message(content)
|
|
65
|
+
# Truncate long regexps and escape for display
|
|
66
|
+
if content.length > 20
|
|
67
|
+
content[0..17] + "..."
|
|
68
|
+
else
|
|
69
|
+
content
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Hone
|
|
4
|
+
module Patterns
|
|
5
|
+
# Pattern: array.count (no block) -> array.size or array.length
|
|
6
|
+
#
|
|
7
|
+
# When `count` is called without a block on an Array, it's slower than
|
|
8
|
+
# `size`/`length` because count is designed for enumerables and does more work.
|
|
9
|
+
# `.size` and `.length` are O(1) operations for Arrays as they simply return
|
|
10
|
+
# the cached length, while `.count` may iterate.
|
|
11
|
+
class CountVsSize < Base
|
|
12
|
+
self.pattern_id = :count_vs_size
|
|
13
|
+
self.optimization_type = :cpu
|
|
14
|
+
|
|
15
|
+
def visit_call_node(node)
|
|
16
|
+
super
|
|
17
|
+
# Look for: receiver.count with no arguments and no block
|
|
18
|
+
return unless node.name == :count && no_arguments?(node) && node.block.nil?
|
|
19
|
+
|
|
20
|
+
add_finding(
|
|
21
|
+
node,
|
|
22
|
+
message: "Use `.size` or `.length` instead of `.count` when counting all elements",
|
|
23
|
+
speedup: "Minor, but `.size` is O(1) for Arrays vs `.count` which may iterate"
|
|
24
|
+
)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
# Check if the node has no arguments
|
|
30
|
+
def no_arguments?(node)
|
|
31
|
+
node.arguments.nil? || node.arguments.arguments.empty?
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Hone
|
|
4
|
+
module Patterns
|
|
5
|
+
# Pattern: [n / d, n % d] -> n.divmod(d)
|
|
6
|
+
#
|
|
7
|
+
# When you need both quotient and remainder, divmod computes them
|
|
8
|
+
# in a single operation instead of performing division twice.
|
|
9
|
+
class Divmod < Base
|
|
10
|
+
self.pattern_id = :divmod
|
|
11
|
+
self.optimization_type = :cpu
|
|
12
|
+
|
|
13
|
+
def visit_array_node(node)
|
|
14
|
+
super
|
|
15
|
+
|
|
16
|
+
elements = node.elements
|
|
17
|
+
return unless elements.size == 2
|
|
18
|
+
|
|
19
|
+
first = elements[0]
|
|
20
|
+
second = elements[1]
|
|
21
|
+
|
|
22
|
+
return unless division_operation?(first) && modulo_operation?(second)
|
|
23
|
+
return unless same_operands?(first, second)
|
|
24
|
+
|
|
25
|
+
add_finding(
|
|
26
|
+
node,
|
|
27
|
+
message: "Use `.divmod` instead of `[n / d, n % d]` for single operation",
|
|
28
|
+
speedup: "Single operation instead of two"
|
|
29
|
+
)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
def division_operation?(node)
|
|
35
|
+
return false unless node.is_a?(Prism::CallNode)
|
|
36
|
+
|
|
37
|
+
node.name == :/
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def modulo_operation?(node)
|
|
41
|
+
return false unless node.is_a?(Prism::CallNode)
|
|
42
|
+
|
|
43
|
+
node.name == :%
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def same_operands?(div_node, mod_node)
|
|
47
|
+
# Both should have receiver (dividend) and one argument (divisor)
|
|
48
|
+
return false unless div_node.receiver && mod_node.receiver
|
|
49
|
+
return false unless div_node.arguments&.arguments&.size == 1
|
|
50
|
+
return false unless mod_node.arguments&.arguments&.size == 1
|
|
51
|
+
|
|
52
|
+
# Compare receivers (dividend)
|
|
53
|
+
return false unless nodes_equivalent?(div_node.receiver, mod_node.receiver)
|
|
54
|
+
|
|
55
|
+
# Compare arguments (divisor)
|
|
56
|
+
div_arg = div_node.arguments.arguments.first
|
|
57
|
+
mod_arg = mod_node.arguments.arguments.first
|
|
58
|
+
nodes_equivalent?(div_arg, mod_arg)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Simple equivalence check for common node types
|
|
62
|
+
def nodes_equivalent?(node1, node2)
|
|
63
|
+
return false unless node1.instance_of?(node2.class)
|
|
64
|
+
|
|
65
|
+
case node1
|
|
66
|
+
when Prism::LocalVariableReadNode
|
|
67
|
+
node1.name == node2.name
|
|
68
|
+
when Prism::InstanceVariableReadNode
|
|
69
|
+
node1.name == node2.name
|
|
70
|
+
when Prism::ClassVariableReadNode
|
|
71
|
+
node1.name == node2.name
|
|
72
|
+
when Prism::GlobalVariableReadNode
|
|
73
|
+
node1.name == node2.name
|
|
74
|
+
when Prism::IntegerNode
|
|
75
|
+
node1.value == node2.value
|
|
76
|
+
when Prism::CallNode
|
|
77
|
+
# For method calls, check name and receiver
|
|
78
|
+
return false unless node1.name == node2.name
|
|
79
|
+
|
|
80
|
+
if node1.receiver && node2.receiver
|
|
81
|
+
nodes_equivalent?(node1.receiver, node2.receiver)
|
|
82
|
+
else
|
|
83
|
+
node1.receiver.nil? && node2.receiver.nil?
|
|
84
|
+
end
|
|
85
|
+
else
|
|
86
|
+
# For other node types, compare the source directly
|
|
87
|
+
node1.location.slice == node2.location.slice
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|