hone 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.standard.yml +8 -0
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +201 -0
- data/Rakefile +10 -0
- data/examples/.hone/harness.rb +41 -0
- data/examples/README.md +22 -0
- data/examples/allocation_patterns.rb +66 -0
- data/examples/cpu_patterns.rb +50 -0
- data/examples/jit_patterns.rb +69 -0
- data/exe/hone +7 -0
- data/lib/hone/adapters/base.rb +35 -0
- data/lib/hone/adapters/fasterer.rb +38 -0
- data/lib/hone/adapters/rubocop_performance.rb +85 -0
- data/lib/hone/analyzer.rb +258 -0
- data/lib/hone/cli.rb +247 -0
- data/lib/hone/config.rb +93 -0
- data/lib/hone/correlator.rb +250 -0
- data/lib/hone/exit_codes.rb +10 -0
- data/lib/hone/finding.rb +64 -0
- data/lib/hone/finding_filter.rb +57 -0
- data/lib/hone/formatters/base.rb +25 -0
- data/lib/hone/formatters/filterable.rb +31 -0
- data/lib/hone/formatters/github.rb +71 -0
- data/lib/hone/formatters/json.rb +75 -0
- data/lib/hone/formatters/junit.rb +154 -0
- data/lib/hone/formatters/sarif.rb +179 -0
- data/lib/hone/formatters/tsv.rb +49 -0
- data/lib/hone/harness.rb +57 -0
- data/lib/hone/harness_generator.rb +128 -0
- data/lib/hone/harness_runner.rb +172 -0
- data/lib/hone/method_map.rb +140 -0
- data/lib/hone/patterns/README.md +174 -0
- data/lib/hone/patterns/array_compact.rb +105 -0
- data/lib/hone/patterns/array_include_set.rb +34 -0
- data/lib/hone/patterns/base.rb +90 -0
- data/lib/hone/patterns/block_to_proc.rb +109 -0
- data/lib/hone/patterns/bsearch_vs_find.rb +80 -0
- data/lib/hone/patterns/chars_map_ord.rb +42 -0
- data/lib/hone/patterns/chars_to_variable.rb +136 -0
- data/lib/hone/patterns/chars_to_variable_tainted.rb +136 -0
- data/lib/hone/patterns/constant_regexp.rb +74 -0
- data/lib/hone/patterns/count_vs_size.rb +35 -0
- data/lib/hone/patterns/divmod.rb +92 -0
- data/lib/hone/patterns/dynamic_ivar.rb +44 -0
- data/lib/hone/patterns/dynamic_ivar_get.rb +33 -0
- data/lib/hone/patterns/each_with_index.rb +116 -0
- data/lib/hone/patterns/each_with_object.rb +63 -0
- data/lib/hone/patterns/flatten_once.rb +28 -0
- data/lib/hone/patterns/gsub_to_tr.rb +48 -0
- data/lib/hone/patterns/hash_each_key.rb +41 -0
- data/lib/hone/patterns/hash_each_value.rb +31 -0
- data/lib/hone/patterns/hash_keys_include.rb +30 -0
- data/lib/hone/patterns/hash_merge_bang.rb +33 -0
- data/lib/hone/patterns/hash_values_include.rb +31 -0
- data/lib/hone/patterns/inject_sum.rb +48 -0
- data/lib/hone/patterns/kernel_loop.rb +27 -0
- data/lib/hone/patterns/lazy_ivar.rb +39 -0
- data/lib/hone/patterns/map_compact.rb +32 -0
- data/lib/hone/patterns/map_flatten.rb +31 -0
- data/lib/hone/patterns/map_select_chain.rb +32 -0
- data/lib/hone/patterns/parallel_assignment.rb +127 -0
- data/lib/hone/patterns/positive_predicate.rb +27 -0
- data/lib/hone/patterns/range_include.rb +34 -0
- data/lib/hone/patterns/redundant_string_chars.rb +82 -0
- data/lib/hone/patterns/regexp_match.rb +126 -0
- data/lib/hone/patterns/reverse_each.rb +30 -0
- data/lib/hone/patterns/reverse_first.rb +40 -0
- data/lib/hone/patterns/select_count.rb +32 -0
- data/lib/hone/patterns/select_first.rb +31 -0
- data/lib/hone/patterns/select_map.rb +32 -0
- data/lib/hone/patterns/shuffle_first.rb +30 -0
- data/lib/hone/patterns/slice_with_length.rb +48 -0
- data/lib/hone/patterns/sort_by_first.rb +31 -0
- data/lib/hone/patterns/sort_by_last.rb +31 -0
- data/lib/hone/patterns/sort_first.rb +52 -0
- data/lib/hone/patterns/sort_last.rb +30 -0
- data/lib/hone/patterns/sort_reverse.rb +53 -0
- data/lib/hone/patterns/string_casecmp.rb +54 -0
- data/lib/hone/patterns/string_chars_each.rb +56 -0
- data/lib/hone/patterns/string_concat_in_loop.rb +116 -0
- data/lib/hone/patterns/string_delete_prefix.rb +53 -0
- data/lib/hone/patterns/string_delete_suffix.rb +53 -0
- data/lib/hone/patterns/string_empty.rb +64 -0
- data/lib/hone/patterns/string_end_with.rb +81 -0
- data/lib/hone/patterns/string_shovel.rb +75 -0
- data/lib/hone/patterns/string_start_with.rb +80 -0
- data/lib/hone/patterns/taint_tracking_base.rb +230 -0
- data/lib/hone/patterns/times_map.rb +38 -0
- data/lib/hone/patterns/uniq_by.rb +32 -0
- data/lib/hone/patterns/yield_vs_block.rb +72 -0
- data/lib/hone/profilers/base.rb +162 -0
- data/lib/hone/profilers/factory.rb +31 -0
- data/lib/hone/profilers/memory_profiler.rb +213 -0
- data/lib/hone/profilers/stackprof.rb +99 -0
- data/lib/hone/profilers/vernier.rb +147 -0
- data/lib/hone/reporter.rb +371 -0
- data/lib/hone/scanner.rb +75 -0
- data/lib/hone/suggestion_generator.rb +23 -0
- data/lib/hone/version.rb +5 -0
- data/lib/hone.rb +108 -0
- data/logo.png +0 -0
- data/sig/hone.rbs +4 -0
- metadata +176 -0
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require "json"
|
|
5
|
+
|
|
6
|
+
module Hone
|
|
7
|
+
class HarnessRunner
|
|
8
|
+
PROFILE_DIR = "tmp/hone"
|
|
9
|
+
|
|
10
|
+
attr_reader :harness_path, :profiler, :include_memory, :warmup, :output_dir
|
|
11
|
+
|
|
12
|
+
def initialize(harness_path, options = {})
|
|
13
|
+
@harness_path = harness_path
|
|
14
|
+
@profiler = options[:profiler] || detect_profiler
|
|
15
|
+
@include_memory = options.fetch(:memory, false)
|
|
16
|
+
@warmup = options.fetch(:warmup, 10)
|
|
17
|
+
@output_dir = options.fetch(:output_dir, PROFILE_DIR)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def run
|
|
21
|
+
harness = Harness.load(@harness_path)
|
|
22
|
+
|
|
23
|
+
unless harness.valid?
|
|
24
|
+
raise Hone::Error, "Harness must define an exercise block"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
FileUtils.mkdir_p(@output_dir)
|
|
28
|
+
|
|
29
|
+
# Setup phase (not profiled)
|
|
30
|
+
harness.run_setup
|
|
31
|
+
|
|
32
|
+
# Warmup phase (not profiled, lets JIT optimize)
|
|
33
|
+
@warmup.times { harness.run_exercise }
|
|
34
|
+
|
|
35
|
+
# Profile CPU
|
|
36
|
+
cpu_path = profile_cpu(harness)
|
|
37
|
+
|
|
38
|
+
# Profile memory (if requested)
|
|
39
|
+
memory_path = @include_memory ? profile_memory(harness) : nil
|
|
40
|
+
|
|
41
|
+
# Teardown phase
|
|
42
|
+
harness.run_teardown
|
|
43
|
+
|
|
44
|
+
# Write metadata
|
|
45
|
+
write_metadata(cpu_path, memory_path, harness.iterations)
|
|
46
|
+
|
|
47
|
+
{cpu: cpu_path, memory: memory_path}
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
private
|
|
51
|
+
|
|
52
|
+
def profile_cpu(harness)
|
|
53
|
+
path = File.join(@output_dir, "cpu_profile.json")
|
|
54
|
+
|
|
55
|
+
case @profiler
|
|
56
|
+
when :stackprof
|
|
57
|
+
profile_with_stackprof(harness, path)
|
|
58
|
+
when :vernier
|
|
59
|
+
profile_with_vernier(harness, path)
|
|
60
|
+
else
|
|
61
|
+
raise Hone::Error, "Unknown profiler: #{@profiler}"
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
path
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def profile_with_stackprof(harness, path)
|
|
68
|
+
require "stackprof"
|
|
69
|
+
# Run profiling and capture the result (don't let StackProf write Marshal format)
|
|
70
|
+
# ignore_gc: true - skip GC frames (not actionable)
|
|
71
|
+
result = StackProf.run(mode: :cpu, raw: true, interval: 1000, ignore_gc: true) do
|
|
72
|
+
harness.iterations.times { harness.run_exercise }
|
|
73
|
+
end
|
|
74
|
+
# Convert to JSON format that Hone can parse, filtering C frames
|
|
75
|
+
File.write(path, stackprof_to_json(result))
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def stackprof_to_json(data)
|
|
79
|
+
# Convert StackProf result to JSON with string keys
|
|
80
|
+
# Filter out C frames (file is nil or contains <cfunc>) and recalculate
|
|
81
|
+
ruby_frames = {}
|
|
82
|
+
ruby_samples = 0
|
|
83
|
+
|
|
84
|
+
data[:frames].each do |address, frame|
|
|
85
|
+
file = frame[:file]
|
|
86
|
+
# Skip C functions and internal frames
|
|
87
|
+
next if file.nil? || file.empty? || file.include?("<cfunc>") || file.start_with?("(")
|
|
88
|
+
|
|
89
|
+
samples = frame[:samples] || 0
|
|
90
|
+
ruby_samples += samples
|
|
91
|
+
|
|
92
|
+
ruby_frames[address.to_s] = {
|
|
93
|
+
"name" => frame[:name],
|
|
94
|
+
"file" => file,
|
|
95
|
+
"line" => frame[:line],
|
|
96
|
+
"samples" => samples,
|
|
97
|
+
"total_samples" => frame[:total_samples] || 0
|
|
98
|
+
}
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
{
|
|
102
|
+
"mode" => data[:mode].to_s,
|
|
103
|
+
"samples" => data[:samples],
|
|
104
|
+
"ruby_samples" => ruby_samples,
|
|
105
|
+
"frames" => ruby_frames
|
|
106
|
+
}.then { |json_data| JSON.pretty_generate(json_data) }
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def profile_with_vernier(harness, path)
|
|
110
|
+
require "vernier"
|
|
111
|
+
Vernier.profile(out: path) do
|
|
112
|
+
harness.iterations.times { harness.run_exercise }
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def profile_memory(harness)
|
|
117
|
+
path = File.join(@output_dir, "memory_profile.json")
|
|
118
|
+
require "memory_profiler"
|
|
119
|
+
|
|
120
|
+
report = MemoryProfiler.report do
|
|
121
|
+
harness.run_exercise
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
File.write(path, memory_report_to_json(report))
|
|
125
|
+
path
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def memory_report_to_json(report)
|
|
129
|
+
# Convert MemoryProfiler report to JSON format
|
|
130
|
+
data = {
|
|
131
|
+
total_allocated: report.total_allocated,
|
|
132
|
+
total_retained: report.total_retained,
|
|
133
|
+
allocated_memory_by_location: report.allocated_memory_by_location.map do |stat|
|
|
134
|
+
{location: stat[:data], bytes: stat[:count]}
|
|
135
|
+
end,
|
|
136
|
+
retained_memory_by_location: report.retained_memory_by_location.map do |stat|
|
|
137
|
+
{location: stat[:data], bytes: stat[:count]}
|
|
138
|
+
end
|
|
139
|
+
}
|
|
140
|
+
JSON.pretty_generate(data)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def write_metadata(cpu_path, memory_path, iterations)
|
|
144
|
+
metadata = {
|
|
145
|
+
generated_at: Time.now.iso8601,
|
|
146
|
+
ruby_version: RUBY_VERSION,
|
|
147
|
+
yjit_enabled: defined?(RubyVM::YJIT) && RubyVM::YJIT.enabled?,
|
|
148
|
+
profiler: @profiler.to_s,
|
|
149
|
+
warmup_iterations: @warmup,
|
|
150
|
+
profile_iterations: iterations,
|
|
151
|
+
cpu_profile: cpu_path,
|
|
152
|
+
memory_profile: memory_path
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
File.write(File.join(@output_dir, "metadata.json"), JSON.pretty_generate(metadata))
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def detect_profiler
|
|
159
|
+
# Prefer Vernier if available (modern, better output)
|
|
160
|
+
|
|
161
|
+
require "vernier"
|
|
162
|
+
:vernier
|
|
163
|
+
rescue LoadError
|
|
164
|
+
begin
|
|
165
|
+
require "stackprof"
|
|
166
|
+
:stackprof
|
|
167
|
+
rescue LoadError
|
|
168
|
+
raise Hone::Error, "No profiler available. Install stackprof or vernier gem."
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "prism"
|
|
4
|
+
|
|
5
|
+
module Hone
|
|
6
|
+
# Maps method definitions to their line ranges for correlating hotspots
|
|
7
|
+
# with specific methods in Ruby source files.
|
|
8
|
+
class MethodMap
|
|
9
|
+
MethodInfo = Data.define(:name, :class_name, :file, :start_line, :end_line) do
|
|
10
|
+
def contains_line?(line)
|
|
11
|
+
(start_line..end_line).cover?(line)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def qualified_name
|
|
15
|
+
class_name ? "#{class_name}##{name}" : name
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def initialize
|
|
20
|
+
@methods = []
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Parse a Ruby file and extract all method definitions
|
|
24
|
+
def add_file(path)
|
|
25
|
+
normalized_path = File.expand_path(path)
|
|
26
|
+
result = Prism.parse_file(normalized_path)
|
|
27
|
+
|
|
28
|
+
if result.errors.any?
|
|
29
|
+
warn "Parse errors in #{normalized_path}:"
|
|
30
|
+
result.errors.each { |e| warn " #{e.message}" }
|
|
31
|
+
return self
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
extractor = MethodExtractor.new(normalized_path)
|
|
35
|
+
result.value.accept(extractor)
|
|
36
|
+
@methods.concat(extractor.methods)
|
|
37
|
+
|
|
38
|
+
self
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Find the method that contains a given line in a file
|
|
42
|
+
def method_at(file, line)
|
|
43
|
+
normalized_file = File.expand_path(file)
|
|
44
|
+
|
|
45
|
+
@methods.find do |method|
|
|
46
|
+
method.file == normalized_file && method.contains_line?(line)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Return all methods for a given file
|
|
51
|
+
def methods_in_file(file)
|
|
52
|
+
normalized_file = File.expand_path(file)
|
|
53
|
+
|
|
54
|
+
@methods.select do |method|
|
|
55
|
+
method.file == normalized_file
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Return all registered methods
|
|
60
|
+
def all_methods
|
|
61
|
+
@methods.dup
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Internal visitor for extracting methods from Prism AST
|
|
65
|
+
class MethodExtractor < Prism::Visitor
|
|
66
|
+
attr_reader :methods
|
|
67
|
+
|
|
68
|
+
def initialize(file_path)
|
|
69
|
+
@file_path = file_path
|
|
70
|
+
@methods = []
|
|
71
|
+
@context_stack = []
|
|
72
|
+
super()
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def visit_class_node(node)
|
|
76
|
+
class_name = extract_constant_name(node.constant_path)
|
|
77
|
+
@context_stack.push(class_name)
|
|
78
|
+
super
|
|
79
|
+
@context_stack.pop
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def visit_module_node(node)
|
|
83
|
+
module_name = extract_constant_name(node.constant_path)
|
|
84
|
+
@context_stack.push(module_name)
|
|
85
|
+
super
|
|
86
|
+
@context_stack.pop
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def visit_def_node(node)
|
|
90
|
+
method_name = node.name.to_s
|
|
91
|
+
class_name = @context_stack.any? ? @context_stack.join("::") : nil
|
|
92
|
+
|
|
93
|
+
@methods << MethodInfo.new(
|
|
94
|
+
name: method_name,
|
|
95
|
+
class_name: class_name,
|
|
96
|
+
file: @file_path,
|
|
97
|
+
start_line: node.location.start_line,
|
|
98
|
+
end_line: node.location.end_line
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
super
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def visit_singleton_method_node(node)
|
|
105
|
+
method_name = "self.#{node.name}"
|
|
106
|
+
class_name = @context_stack.any? ? @context_stack.join("::") : nil
|
|
107
|
+
|
|
108
|
+
@methods << MethodInfo.new(
|
|
109
|
+
name: method_name,
|
|
110
|
+
class_name: class_name,
|
|
111
|
+
file: @file_path,
|
|
112
|
+
start_line: node.location.start_line,
|
|
113
|
+
end_line: node.location.end_line
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
super
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
private
|
|
120
|
+
|
|
121
|
+
def extract_constant_name(node)
|
|
122
|
+
case node
|
|
123
|
+
when Prism::ConstantReadNode
|
|
124
|
+
node.name.to_s
|
|
125
|
+
when Prism::ConstantPathNode
|
|
126
|
+
parts = []
|
|
127
|
+
current = node
|
|
128
|
+
while current.is_a?(Prism::ConstantPathNode)
|
|
129
|
+
parts.unshift(current.name.to_s)
|
|
130
|
+
current = current.parent
|
|
131
|
+
end
|
|
132
|
+
parts.unshift(current.name.to_s) if current.is_a?(Prism::ConstantReadNode)
|
|
133
|
+
parts.join("::")
|
|
134
|
+
else
|
|
135
|
+
node.to_s
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
# Hone Pattern Detection Approaches
|
|
2
|
+
|
|
3
|
+
This document describes the different approaches used for detecting optimization patterns in Ruby code.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
Hone uses Prism AST visitors to detect code patterns that could be optimized. Patterns range from simple single-node checks to complex data flow analysis.
|
|
8
|
+
|
|
9
|
+
## Pattern Tiers
|
|
10
|
+
|
|
11
|
+
| Tier | Complexity | Example |
|
|
12
|
+
|------|------------|---------|
|
|
13
|
+
| 1: Simple AST | Single node inspection | `.positive?` → `> 0` |
|
|
14
|
+
| 2: Context-Aware | Track loop/method scope | String concat in loop |
|
|
15
|
+
| 3: Scope-Limited | Track variables within scope | `chars = str.chars; chars[0]` |
|
|
16
|
+
| 4: Taint Tracking | Track data flow across assignments | Aliased variable detection |
|
|
17
|
+
|
|
18
|
+
## Approach Comparison
|
|
19
|
+
|
|
20
|
+
### Simple Scope Tracking
|
|
21
|
+
|
|
22
|
+
**File:** `chars_to_variable.rb`
|
|
23
|
+
|
|
24
|
+
Tracks variable assignments within the current lexical scope. When a variable is assigned from a specific call (e.g., `.chars`), subsequent uses of that variable are checked.
|
|
25
|
+
|
|
26
|
+
```ruby
|
|
27
|
+
def example
|
|
28
|
+
chars = str.chars # Tracked: chars -> .chars source
|
|
29
|
+
chars[0] # Detected: inefficient indexing
|
|
30
|
+
end
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
**Advantages:**
|
|
34
|
+
- Simple implementation
|
|
35
|
+
- Low overhead
|
|
36
|
+
- Easy to understand and debug
|
|
37
|
+
|
|
38
|
+
**Limitations:**
|
|
39
|
+
- Cannot track variable aliasing (`x = chars; x[0]`)
|
|
40
|
+
- Cannot track instance variables across methods
|
|
41
|
+
- Scope resets on reassignment
|
|
42
|
+
|
|
43
|
+
### Taint Tracking
|
|
44
|
+
|
|
45
|
+
**Files:** `taint_tracking_base.rb`, `chars_to_variable_tainted.rb`
|
|
46
|
+
|
|
47
|
+
Propagates metadata ("taint") through variable assignments, tracking the origin of data as it flows through the program.
|
|
48
|
+
|
|
49
|
+
```ruby
|
|
50
|
+
def example
|
|
51
|
+
chars = str.chars # Taint: chars is "chars_array" from str
|
|
52
|
+
x = chars # Propagate: x inherits taint from chars
|
|
53
|
+
y = x # Propagate: y inherits taint from x
|
|
54
|
+
y[0] # Detected: y is tainted with chars_array origin
|
|
55
|
+
end
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
**Advantages:**
|
|
59
|
+
- Tracks variable aliasing
|
|
60
|
+
- Handles instance variables
|
|
61
|
+
- Supports chained assignments
|
|
62
|
+
- Cleaner separation of concerns
|
|
63
|
+
|
|
64
|
+
**Limitations:**
|
|
65
|
+
- More complex implementation
|
|
66
|
+
- Higher memory usage (stores taint info per variable)
|
|
67
|
+
- Cannot track across method boundaries (yet)
|
|
68
|
+
|
|
69
|
+
## Detection Comparison
|
|
70
|
+
|
|
71
|
+
| Scenario | Simple | Taint |
|
|
72
|
+
|----------|--------|-------|
|
|
73
|
+
| Direct usage: `chars[0]` | Yes | Yes |
|
|
74
|
+
| Aliased: `x = chars; x[0]` | No | Yes |
|
|
75
|
+
| Chained: `a = chars; b = a; b[0]` | No | Yes |
|
|
76
|
+
| Instance var: `@chars[0]` | No | Yes |
|
|
77
|
+
| Cross-method: `def foo; @chars[0]; end` | No | No* |
|
|
78
|
+
|
|
79
|
+
*Cross-method tracking would require whole-program analysis.
|
|
80
|
+
|
|
81
|
+
## TaintTrackingBase API
|
|
82
|
+
|
|
83
|
+
The `TaintTrackingBase` class provides infrastructure for building taint-tracking patterns.
|
|
84
|
+
|
|
85
|
+
### Subclass Requirements
|
|
86
|
+
|
|
87
|
+
```ruby
|
|
88
|
+
class MyPattern < TaintTrackingBase
|
|
89
|
+
self.pattern_id = :my_pattern
|
|
90
|
+
self.optimization_type = :allocation
|
|
91
|
+
|
|
92
|
+
protected
|
|
93
|
+
|
|
94
|
+
# Define what creates a taint
|
|
95
|
+
def taint_from_call(call_node)
|
|
96
|
+
return nil unless call_node.name == :dangerous_method
|
|
97
|
+
|
|
98
|
+
{
|
|
99
|
+
type: :my_taint_type,
|
|
100
|
+
source: call_node.receiver,
|
|
101
|
+
source_code: call_node.receiver&.slice,
|
|
102
|
+
origin_line: call_node.location.start_line,
|
|
103
|
+
metadata: {}
|
|
104
|
+
}
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Check for problematic uses of tainted variables
|
|
108
|
+
def check_tainted_usage(call_node, var_name, taint_info)
|
|
109
|
+
return unless taint_info.type == :my_taint_type
|
|
110
|
+
|
|
111
|
+
if call_node.name == :problematic_method
|
|
112
|
+
add_finding(call_node, message: "...")
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Available Methods
|
|
119
|
+
|
|
120
|
+
```ruby
|
|
121
|
+
# Query taint status
|
|
122
|
+
get_taint(var_name, scope: :local) # Returns TaintInfo or nil
|
|
123
|
+
tainted?(var_name, scope: :local) # Returns boolean
|
|
124
|
+
all_taints(scope: :local) # Returns hash of all taints
|
|
125
|
+
|
|
126
|
+
# Modify taint status (rarely needed in subclasses)
|
|
127
|
+
set_taint(var_name, taint_info, scope: :local)
|
|
128
|
+
clear_taint(var_name, scope: :local)
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### TaintInfo Structure
|
|
132
|
+
|
|
133
|
+
```ruby
|
|
134
|
+
TaintInfo = Data.define(
|
|
135
|
+
:type, # Symbol identifying the taint type
|
|
136
|
+
:source, # Original AST node (e.g., receiver of .chars)
|
|
137
|
+
:source_code, # String representation of source
|
|
138
|
+
:origin_line, # Line number where taint originated
|
|
139
|
+
:metadata # Hash for pattern-specific data
|
|
140
|
+
)
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Scope Handling
|
|
144
|
+
|
|
145
|
+
Both approaches handle Ruby's lexical scoping:
|
|
146
|
+
|
|
147
|
+
| Construct | Scope Behavior |
|
|
148
|
+
|-----------|----------------|
|
|
149
|
+
| `def` | New isolated scope |
|
|
150
|
+
| `class` / `module` | New isolated scope |
|
|
151
|
+
| `lambda` / `->` | New isolated scope |
|
|
152
|
+
| `block` | Inherits parent scope (for taint tracking) |
|
|
153
|
+
|
|
154
|
+
Instance variables use a separate scope (`:instance`) that persists across blocks but resets at class/module boundaries.
|
|
155
|
+
|
|
156
|
+
## When to Use Each Approach
|
|
157
|
+
|
|
158
|
+
**Use Simple Scope Tracking when:**
|
|
159
|
+
- Pattern only needs direct variable usage
|
|
160
|
+
- Performance is critical
|
|
161
|
+
- Implementation simplicity is preferred
|
|
162
|
+
|
|
163
|
+
**Use Taint Tracking when:**
|
|
164
|
+
- Pattern involves variable aliasing
|
|
165
|
+
- Instance variables need tracking
|
|
166
|
+
- Data flow through assignments matters
|
|
167
|
+
- Building on existing taint infrastructure
|
|
168
|
+
|
|
169
|
+
## Future Directions
|
|
170
|
+
|
|
171
|
+
1. **Cross-method tracking**: Track instance variable taints across method definitions
|
|
172
|
+
2. **Escape analysis**: Detect when tainted values escape the current scope
|
|
173
|
+
3. **Conditional tracking**: Handle `if` branches that may or may not taint
|
|
174
|
+
4. **LLM review layer**: Use language models to filter false positives from complex patterns
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Hone
|
|
4
|
+
module Patterns
|
|
5
|
+
# Pattern: array.reject { |x| x.nil? } -> array.compact
|
|
6
|
+
# array.select { |x| !x.nil? } -> array.compact
|
|
7
|
+
#
|
|
8
|
+
# compact is implemented in C and optimized for removing nil values.
|
|
9
|
+
# Using reject/select with a block for nil checking is slower and less clear.
|
|
10
|
+
class ArrayCompact < Base
|
|
11
|
+
self.pattern_id = :array_compact
|
|
12
|
+
self.optimization_type = :cpu
|
|
13
|
+
|
|
14
|
+
def visit_call_node(node)
|
|
15
|
+
super
|
|
16
|
+
|
|
17
|
+
return unless %i[reject select].include?(node.name)
|
|
18
|
+
return unless block_attached?(node)
|
|
19
|
+
|
|
20
|
+
block = node.block
|
|
21
|
+
return unless block.is_a?(Prism::BlockNode)
|
|
22
|
+
|
|
23
|
+
if node.name == :reject && reject_nil_pattern?(block)
|
|
24
|
+
add_finding(
|
|
25
|
+
node,
|
|
26
|
+
message: "Use `.compact` instead of `.reject { |x| x.nil? }` for optimized nil removal",
|
|
27
|
+
speedup: "Uses optimized C implementation"
|
|
28
|
+
)
|
|
29
|
+
elsif node.name == :select && select_not_nil_pattern?(block)
|
|
30
|
+
add_finding(
|
|
31
|
+
node,
|
|
32
|
+
message: "Use `.compact` instead of `.select { |x| !x.nil? }` for optimized nil removal",
|
|
33
|
+
speedup: "Uses optimized C implementation"
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
# Detects: { |x| x.nil? }
|
|
41
|
+
def reject_nil_pattern?(block)
|
|
42
|
+
return false unless single_block_param?(block)
|
|
43
|
+
|
|
44
|
+
body = block.body
|
|
45
|
+
return false unless body.is_a?(Prism::StatementsNode)
|
|
46
|
+
return false unless body.body.size == 1
|
|
47
|
+
|
|
48
|
+
statement = body.body.first
|
|
49
|
+
nil_check_on_param?(statement, block_param_name(block))
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Detects: { |x| !x.nil? }
|
|
53
|
+
def select_not_nil_pattern?(block)
|
|
54
|
+
return false unless single_block_param?(block)
|
|
55
|
+
|
|
56
|
+
body = block.body
|
|
57
|
+
return false unless body.is_a?(Prism::StatementsNode)
|
|
58
|
+
return false unless body.body.size == 1
|
|
59
|
+
|
|
60
|
+
statement = body.body.first
|
|
61
|
+
negated_nil_check_on_param?(statement, block_param_name(block))
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def single_block_param?(block)
|
|
65
|
+
params = block.parameters
|
|
66
|
+
return false unless params.is_a?(Prism::BlockParametersNode)
|
|
67
|
+
|
|
68
|
+
parameters = params.parameters
|
|
69
|
+
return false unless parameters.is_a?(Prism::ParametersNode)
|
|
70
|
+
return false unless parameters.requireds.size == 1
|
|
71
|
+
return false unless parameters.optionals.empty?
|
|
72
|
+
return false unless parameters.rest.nil?
|
|
73
|
+
return false unless parameters.keywords.empty?
|
|
74
|
+
|
|
75
|
+
true
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def block_param_name(block)
|
|
79
|
+
block.parameters.parameters.requireds.first.name
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Checks if statement is: param.nil?
|
|
83
|
+
def nil_check_on_param?(statement, param_name)
|
|
84
|
+
return false unless statement.is_a?(Prism::CallNode)
|
|
85
|
+
return false unless statement.name == :nil?
|
|
86
|
+
return false unless statement.arguments.nil?
|
|
87
|
+
|
|
88
|
+
receiver = statement.receiver
|
|
89
|
+
return false unless receiver.is_a?(Prism::LocalVariableReadNode)
|
|
90
|
+
|
|
91
|
+
receiver.name == param_name
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Checks if statement is: !param.nil?
|
|
95
|
+
def negated_nil_check_on_param?(statement, param_name)
|
|
96
|
+
# Check for !(...) pattern
|
|
97
|
+
return false unless statement.is_a?(Prism::CallNode)
|
|
98
|
+
return false unless statement.name == :!
|
|
99
|
+
return false unless statement.receiver
|
|
100
|
+
|
|
101
|
+
nil_check_on_param?(statement.receiver, param_name)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Hone
|
|
4
|
+
module Patterns
|
|
5
|
+
# Pattern: array.include?(x) -> consider Set for repeated lookups
|
|
6
|
+
#
|
|
7
|
+
# Array#include? is O(n) for each lookup.
|
|
8
|
+
# If performing repeated lookups, converting to Set gives O(1) lookups.
|
|
9
|
+
class ArrayIncludeSet < Base
|
|
10
|
+
self.pattern_id = :array_include_set
|
|
11
|
+
self.optimization_type = :cpu
|
|
12
|
+
|
|
13
|
+
def visit_call_node(node)
|
|
14
|
+
super
|
|
15
|
+
|
|
16
|
+
# Look for: .include?(x) with one argument
|
|
17
|
+
return unless node.name == :include?
|
|
18
|
+
return unless node.arguments&.arguments&.size == 1
|
|
19
|
+
|
|
20
|
+
# Skip if receiver is a known hash method chain (handled by hash_keys_include)
|
|
21
|
+
receiver = node.receiver
|
|
22
|
+
if receiver.is_a?(Prism::CallNode)
|
|
23
|
+
return if receiver.name == :keys || receiver.name == :values
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
add_finding(
|
|
27
|
+
node,
|
|
28
|
+
message: "Consider using Set instead of Array#include? for repeated lookups",
|
|
29
|
+
speedup: "O(1) vs O(n) for repeated lookups"
|
|
30
|
+
)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "prism"
|
|
4
|
+
|
|
5
|
+
module Hone
|
|
6
|
+
# Pattern matchers for detecting optimization opportunities in Ruby AST.
|
|
7
|
+
#
|
|
8
|
+
# Each pattern class inherits from Base and implements visit_* methods
|
|
9
|
+
# to detect specific anti-patterns using Prism's visitor interface.
|
|
10
|
+
#
|
|
11
|
+
# @example Creating a custom pattern
|
|
12
|
+
# class MyPattern < Base
|
|
13
|
+
# self.pattern_id = :my_pattern
|
|
14
|
+
# self.optimization_type = :cpu
|
|
15
|
+
#
|
|
16
|
+
# def visit_call_node(node)
|
|
17
|
+
# super
|
|
18
|
+
# # detection logic here
|
|
19
|
+
# end
|
|
20
|
+
# end
|
|
21
|
+
module Patterns
|
|
22
|
+
@registered = []
|
|
23
|
+
|
|
24
|
+
class << self
|
|
25
|
+
attr_reader :registered
|
|
26
|
+
|
|
27
|
+
def register(pattern_class)
|
|
28
|
+
@registered << pattern_class
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
class Base < Prism::Visitor
|
|
33
|
+
def self.inherited(subclass)
|
|
34
|
+
Patterns.register(subclass)
|
|
35
|
+
end
|
|
36
|
+
class << self
|
|
37
|
+
attr_accessor :pattern_id, :optimization_type
|
|
38
|
+
|
|
39
|
+
def scope
|
|
40
|
+
@scope || :ruby
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
attr_writer :scope
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def self.scan_file(path)
|
|
47
|
+
result = Prism.parse_file(path)
|
|
48
|
+
pattern = new(path)
|
|
49
|
+
result.value.accept(pattern)
|
|
50
|
+
pattern.findings
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def initialize(file_path)
|
|
54
|
+
@file_path = file_path
|
|
55
|
+
@findings = []
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
attr_reader :findings
|
|
59
|
+
|
|
60
|
+
def add_finding(node, message:, speedup: nil)
|
|
61
|
+
@findings << Finding.new(
|
|
62
|
+
file: @file_path,
|
|
63
|
+
line: node.location.start_line,
|
|
64
|
+
column: node.location.start_column,
|
|
65
|
+
pattern_id: self.class.pattern_id,
|
|
66
|
+
optimization_type: self.class.optimization_type,
|
|
67
|
+
source: :hone,
|
|
68
|
+
message: message,
|
|
69
|
+
speedup: speedup,
|
|
70
|
+
code: node.location.slice
|
|
71
|
+
)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
protected
|
|
75
|
+
|
|
76
|
+
def block_attached?(call_node)
|
|
77
|
+
call_node.block.is_a?(Prism::BlockNode) ||
|
|
78
|
+
call_node.block.is_a?(Prism::BlockArgumentNode)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def with_context(variable, value)
|
|
82
|
+
previous = instance_variable_get(variable)
|
|
83
|
+
instance_variable_set(variable, value)
|
|
84
|
+
yield
|
|
85
|
+
ensure
|
|
86
|
+
instance_variable_set(variable, previous)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|