hone 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. checksums.yaml +7 -0
  2. data/.standard.yml +8 -0
  3. data/CHANGELOG.md +5 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +201 -0
  6. data/Rakefile +10 -0
  7. data/examples/.hone/harness.rb +41 -0
  8. data/examples/README.md +22 -0
  9. data/examples/allocation_patterns.rb +66 -0
  10. data/examples/cpu_patterns.rb +50 -0
  11. data/examples/jit_patterns.rb +69 -0
  12. data/exe/hone +7 -0
  13. data/lib/hone/adapters/base.rb +35 -0
  14. data/lib/hone/adapters/fasterer.rb +38 -0
  15. data/lib/hone/adapters/rubocop_performance.rb +85 -0
  16. data/lib/hone/analyzer.rb +258 -0
  17. data/lib/hone/cli.rb +247 -0
  18. data/lib/hone/config.rb +93 -0
  19. data/lib/hone/correlator.rb +250 -0
  20. data/lib/hone/exit_codes.rb +10 -0
  21. data/lib/hone/finding.rb +64 -0
  22. data/lib/hone/finding_filter.rb +57 -0
  23. data/lib/hone/formatters/base.rb +25 -0
  24. data/lib/hone/formatters/filterable.rb +31 -0
  25. data/lib/hone/formatters/github.rb +71 -0
  26. data/lib/hone/formatters/json.rb +75 -0
  27. data/lib/hone/formatters/junit.rb +154 -0
  28. data/lib/hone/formatters/sarif.rb +179 -0
  29. data/lib/hone/formatters/tsv.rb +49 -0
  30. data/lib/hone/harness.rb +57 -0
  31. data/lib/hone/harness_generator.rb +128 -0
  32. data/lib/hone/harness_runner.rb +172 -0
  33. data/lib/hone/method_map.rb +140 -0
  34. data/lib/hone/patterns/README.md +174 -0
  35. data/lib/hone/patterns/array_compact.rb +105 -0
  36. data/lib/hone/patterns/array_include_set.rb +34 -0
  37. data/lib/hone/patterns/base.rb +90 -0
  38. data/lib/hone/patterns/block_to_proc.rb +109 -0
  39. data/lib/hone/patterns/bsearch_vs_find.rb +80 -0
  40. data/lib/hone/patterns/chars_map_ord.rb +42 -0
  41. data/lib/hone/patterns/chars_to_variable.rb +136 -0
  42. data/lib/hone/patterns/chars_to_variable_tainted.rb +136 -0
  43. data/lib/hone/patterns/constant_regexp.rb +74 -0
  44. data/lib/hone/patterns/count_vs_size.rb +35 -0
  45. data/lib/hone/patterns/divmod.rb +92 -0
  46. data/lib/hone/patterns/dynamic_ivar.rb +44 -0
  47. data/lib/hone/patterns/dynamic_ivar_get.rb +33 -0
  48. data/lib/hone/patterns/each_with_index.rb +116 -0
  49. data/lib/hone/patterns/each_with_object.rb +63 -0
  50. data/lib/hone/patterns/flatten_once.rb +28 -0
  51. data/lib/hone/patterns/gsub_to_tr.rb +48 -0
  52. data/lib/hone/patterns/hash_each_key.rb +41 -0
  53. data/lib/hone/patterns/hash_each_value.rb +31 -0
  54. data/lib/hone/patterns/hash_keys_include.rb +30 -0
  55. data/lib/hone/patterns/hash_merge_bang.rb +33 -0
  56. data/lib/hone/patterns/hash_values_include.rb +31 -0
  57. data/lib/hone/patterns/inject_sum.rb +48 -0
  58. data/lib/hone/patterns/kernel_loop.rb +27 -0
  59. data/lib/hone/patterns/lazy_ivar.rb +39 -0
  60. data/lib/hone/patterns/map_compact.rb +32 -0
  61. data/lib/hone/patterns/map_flatten.rb +31 -0
  62. data/lib/hone/patterns/map_select_chain.rb +32 -0
  63. data/lib/hone/patterns/parallel_assignment.rb +127 -0
  64. data/lib/hone/patterns/positive_predicate.rb +27 -0
  65. data/lib/hone/patterns/range_include.rb +34 -0
  66. data/lib/hone/patterns/redundant_string_chars.rb +82 -0
  67. data/lib/hone/patterns/regexp_match.rb +126 -0
  68. data/lib/hone/patterns/reverse_each.rb +30 -0
  69. data/lib/hone/patterns/reverse_first.rb +40 -0
  70. data/lib/hone/patterns/select_count.rb +32 -0
  71. data/lib/hone/patterns/select_first.rb +31 -0
  72. data/lib/hone/patterns/select_map.rb +32 -0
  73. data/lib/hone/patterns/shuffle_first.rb +30 -0
  74. data/lib/hone/patterns/slice_with_length.rb +48 -0
  75. data/lib/hone/patterns/sort_by_first.rb +31 -0
  76. data/lib/hone/patterns/sort_by_last.rb +31 -0
  77. data/lib/hone/patterns/sort_first.rb +52 -0
  78. data/lib/hone/patterns/sort_last.rb +30 -0
  79. data/lib/hone/patterns/sort_reverse.rb +53 -0
  80. data/lib/hone/patterns/string_casecmp.rb +54 -0
  81. data/lib/hone/patterns/string_chars_each.rb +56 -0
  82. data/lib/hone/patterns/string_concat_in_loop.rb +116 -0
  83. data/lib/hone/patterns/string_delete_prefix.rb +53 -0
  84. data/lib/hone/patterns/string_delete_suffix.rb +53 -0
  85. data/lib/hone/patterns/string_empty.rb +64 -0
  86. data/lib/hone/patterns/string_end_with.rb +81 -0
  87. data/lib/hone/patterns/string_shovel.rb +75 -0
  88. data/lib/hone/patterns/string_start_with.rb +80 -0
  89. data/lib/hone/patterns/taint_tracking_base.rb +230 -0
  90. data/lib/hone/patterns/times_map.rb +38 -0
  91. data/lib/hone/patterns/uniq_by.rb +32 -0
  92. data/lib/hone/patterns/yield_vs_block.rb +72 -0
  93. data/lib/hone/profilers/base.rb +162 -0
  94. data/lib/hone/profilers/factory.rb +31 -0
  95. data/lib/hone/profilers/memory_profiler.rb +213 -0
  96. data/lib/hone/profilers/stackprof.rb +99 -0
  97. data/lib/hone/profilers/vernier.rb +147 -0
  98. data/lib/hone/reporter.rb +371 -0
  99. data/lib/hone/scanner.rb +75 -0
  100. data/lib/hone/suggestion_generator.rb +23 -0
  101. data/lib/hone/version.rb +5 -0
  102. data/lib/hone.rb +108 -0
  103. data/logo.png +0 -0
  104. data/sig/hone.rbs +4 -0
  105. metadata +176 -0
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Hone
6
+ module Profilers
7
+ class Factory
8
+ DETECTORS = [
9
+ [Vernier, ->(data) { data.key?("threads") }],
10
+ [StackProf, ->(data) { data.key?("frames") || data.key?("methods") }]
11
+ ].freeze
12
+
13
+ def self.create(profile_path)
14
+ return nil if profile_path.nil?
15
+
16
+ data = JSON.parse(File.read(profile_path))
17
+ profiler_class = detect_profiler(data)
18
+
19
+ raise Hone::Error, "Unknown profile format in #{profile_path}" unless profiler_class
20
+
21
+ profiler_class.new(profile_path)
22
+ rescue JSON::ParserError => e
23
+ raise Hone::Error, "Invalid profile JSON: #{e.message}"
24
+ end
25
+
26
+ def self.detect_profiler(data)
27
+ DETECTORS.find { |_, detector| detector.call(data) }&.first
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,213 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Hone
4
+ module Profilers
5
+ class MemoryProfiler < Base
6
+ include MethodMatching
7
+
8
+ def initialize(profile_path)
9
+ super
10
+
11
+ parse_allocations
12
+ end
13
+
14
+ # Returns CPU percentage for a method (0.0-100.0)
15
+ # Always returns nil for MemoryProfiler since it tracks allocations, not CPU
16
+ def cpu_percent_for(_method_info)
17
+ nil
18
+ end
19
+
20
+ # Returns allocation percentage for a method (0.0-100.0)
21
+ # method_info can be a Hash with :name and/or :file keys, or a String method name
22
+ def alloc_percent_for(method_info)
23
+ frame = find_matching_frame(method_info)
24
+ return nil unless frame
25
+
26
+ frame[:alloc_percent]
27
+ end
28
+
29
+ # Returns array of HotspotInfo for frames above threshold
30
+ # For MemoryProfiler, threshold applies to allocation percentage
31
+ def hotspots(threshold: 1.0)
32
+ @frames
33
+ .select { |frame| frame[:alloc_percent] >= threshold }
34
+ .sort_by { |frame| -frame[:alloc_percent] }
35
+ .map { |frame| build_hotspot_info(frame) }
36
+ end
37
+
38
+ private
39
+
40
+ def parse_allocations
41
+ @frames = []
42
+ @total_allocations = 0
43
+
44
+ # MemoryProfiler can output in various formats
45
+ # Common structures include allocated_memory_by_location, allocated_objects_by_location
46
+ allocation_data = extract_allocation_data
47
+
48
+ # Calculate total allocations for percentage
49
+ @total_allocations = allocation_data.values.sum { |info| info[:count] }
50
+ @total_allocations = 1 if @total_allocations.zero?
51
+
52
+ # Build frames array from allocation data
53
+ allocation_data.each do |location, info|
54
+ alloc_percent = (info[:count].to_f / @total_allocations * 100).round(2)
55
+
56
+ @frames << {
57
+ name: info[:name],
58
+ file: info[:file],
59
+ line: info[:line],
60
+ samples: info[:count],
61
+ alloc_percent: alloc_percent,
62
+ cpu_percent: nil,
63
+ memory_allocated: info[:memory_allocated]
64
+ }
65
+ end
66
+
67
+ @frames.sort_by! { |f| -f[:alloc_percent] }
68
+ end
69
+
70
+ def extract_allocation_data
71
+ allocation_data = {}
72
+
73
+ # Try different MemoryProfiler output formats
74
+ if @data["allocated_objects_by_location"]
75
+ parse_by_location(@data["allocated_objects_by_location"], allocation_data)
76
+ elsif @data["allocated_memory_by_location"]
77
+ parse_by_location(@data["allocated_memory_by_location"], allocation_data, memory: true)
78
+ elsif @data["allocations"]
79
+ parse_allocations_array(@data["allocations"], allocation_data)
80
+ elsif @data["allocated_objects_by_gem"]
81
+ # Fallback to gem-level data if location data not available
82
+ parse_by_gem(@data["allocated_objects_by_gem"], allocation_data)
83
+ else
84
+ # Try to parse as raw location-based data
85
+ parse_raw_data(allocation_data)
86
+ end
87
+
88
+ allocation_data
89
+ end
90
+
91
+ def parse_by_location(location_data, allocation_data, memory: false)
92
+ location_data.each do |entry|
93
+ location = entry["location"] || entry["data"]
94
+ # Support both object count and byte count formats
95
+ count = entry["count"] || entry["value"] || entry["bytes"] || 1
96
+ memory_allocated = entry["memory"] || entry["memsize"] || entry["bytes"] || 0
97
+
98
+ file, line, name = parse_location(location)
99
+ key = "#{file}:#{line}"
100
+
101
+ if allocation_data[key]
102
+ allocation_data[key][:count] += count
103
+ allocation_data[key][:memory_allocated] += memory_allocated
104
+ else
105
+ allocation_data[key] = {
106
+ name: name,
107
+ file: file,
108
+ line: line,
109
+ count: count,
110
+ memory_allocated: memory_allocated
111
+ }
112
+ end
113
+ end
114
+ end
115
+
116
+ def parse_allocations_array(allocations, allocation_data)
117
+ allocations.each do |entry|
118
+ file = entry["file"] || entry["sourcefile"]
119
+ line = entry["line"] || entry["sourceline"]
120
+ name = entry["name"] || entry["class_name"] || entry["method"] || "#{file}:#{line}"
121
+ count = entry["count"] || entry["allocations"] || 1
122
+ memory_allocated = entry["memory"] || entry["memsize"] || entry["size"] || 0
123
+
124
+ key = "#{file}:#{line}"
125
+
126
+ if allocation_data[key]
127
+ allocation_data[key][:count] += count
128
+ allocation_data[key][:memory_allocated] += memory_allocated
129
+ else
130
+ allocation_data[key] = {
131
+ name: name,
132
+ file: file,
133
+ line: line.to_i,
134
+ count: count,
135
+ memory_allocated: memory_allocated
136
+ }
137
+ end
138
+ end
139
+ end
140
+
141
+ def parse_by_gem(gem_data, allocation_data)
142
+ gem_data.each do |entry|
143
+ gem_name = entry["gem"] || entry["data"] || "unknown"
144
+ count = entry["count"] || entry["value"] || 1
145
+
146
+ allocation_data[gem_name] = {
147
+ name: gem_name,
148
+ file: nil,
149
+ line: nil,
150
+ count: count,
151
+ memory_allocated: 0
152
+ }
153
+ end
154
+ end
155
+
156
+ def parse_raw_data(allocation_data)
157
+ # Handle case where data is a simple hash of location => count
158
+ @data.each do |key, value|
159
+ next unless value.is_a?(Integer) || value.is_a?(Hash)
160
+
161
+ if value.is_a?(Integer)
162
+ file, line, name = parse_location(key)
163
+ allocation_data[key] = {
164
+ name: name,
165
+ file: file,
166
+ line: line,
167
+ count: value,
168
+ memory_allocated: 0
169
+ }
170
+ elsif value.is_a?(Hash) && (value["count"] || value["allocations"])
171
+ file, line, name = parse_location(key)
172
+ allocation_data[key] = {
173
+ name: name,
174
+ file: file,
175
+ line: line,
176
+ count: value["count"] || value["allocations"] || 1,
177
+ memory_allocated: value["memory"] || value["memsize"] || 0
178
+ }
179
+ end
180
+ end
181
+ end
182
+
183
+ def parse_location(location)
184
+ return [nil, nil, location] unless location.is_a?(String)
185
+
186
+ # MemoryProfiler location format is typically "file:line" or "file:line:in `method'"
187
+ if location =~ /^(.+):(\d+)(?::in `(.+)')?$/
188
+ file = ::Regexp.last_match(1)
189
+ line = ::Regexp.last_match(2).to_i
190
+ method_name = ::Regexp.last_match(3)
191
+
192
+ name = method_name || "#{File.basename(file)}:#{line}"
193
+ [file, line, name]
194
+ else
195
+ [location, nil, location]
196
+ end
197
+ end
198
+
199
+ def build_hotspot_info(frame)
200
+ # Note: HotspotInfo.cpu_percent is used for allocation percentage here.
201
+ # The HotspotInfo structure is shared across profiler types and represents
202
+ # "impact percentage" - CPU for StackProf/Vernier, allocations for MemoryProfiler.
203
+ HotspotInfo.new(
204
+ name: frame[:name],
205
+ file: frame[:file],
206
+ line: frame[:line],
207
+ cpu_percent: frame[:alloc_percent],
208
+ samples: frame[:samples]
209
+ )
210
+ end
211
+ end
212
+ end
213
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Hone
4
+ module Profilers
5
+ class StackProf < Base
6
+ include MethodMatching
7
+
8
+ def initialize(profile_path)
9
+ super
10
+
11
+ # Prefer ruby_samples (excludes C frames) for more accurate Ruby percentages
12
+ @total_samples = @data["ruby_samples"] || @data["samples"] || calculate_total_samples
13
+ parse_frames
14
+ end
15
+
16
+ # Returns CPU percentage for a method (0.0-100.0)
17
+ # method_info can be a Hash with :name and/or :file keys, or a String method name
18
+ def cpu_percent_for(method_info)
19
+ frame = find_matching_frame(method_info)
20
+ return nil unless frame
21
+
22
+ frame[:cpu_percent]
23
+ end
24
+
25
+ # Returns array of HotspotInfo for frames above threshold
26
+ def hotspots(threshold: 1.0)
27
+ @frames
28
+ .select { |frame| frame[:cpu_percent] >= threshold }
29
+ .sort_by { |frame| -frame[:cpu_percent] }
30
+ .map { |frame| build_hotspot_info(frame) }
31
+ end
32
+
33
+ private
34
+
35
+ def parse_frames
36
+ @frames = []
37
+
38
+ if @data["frames"]
39
+ # Raw StackProf JSON format: frames is a hash with address keys
40
+ @data["frames"].each_value do |frame_data|
41
+ @frames << parse_frame_data(frame_data)
42
+ end
43
+ elsif @data["methods"]
44
+ # Pre-processed Hone format: methods is an array
45
+ @data["methods"].each do |method_data|
46
+ @frames << parse_method_data(method_data)
47
+ end
48
+ end
49
+
50
+ @frames.sort_by! { |f| -f[:cpu_percent] }
51
+ end
52
+
53
+ def parse_frame_data(frame_data)
54
+ samples = frame_data["samples"] || 0
55
+ cpu_percent = calculate_percent(samples)
56
+
57
+ {
58
+ name: frame_data["name"],
59
+ file: frame_data["file"],
60
+ line: frame_data["line"],
61
+ samples: samples,
62
+ total_samples: frame_data["total_samples"] || 0,
63
+ cpu_percent: cpu_percent
64
+ }
65
+ end
66
+
67
+ def parse_method_data(method_data)
68
+ samples = method_data["samples"] || 0
69
+ # Use pre-calculated percent if available, otherwise calculate
70
+ cpu_percent = method_data["percent"] || calculate_percent(samples)
71
+
72
+ {
73
+ name: method_data["name"],
74
+ file: method_data["file"],
75
+ line: method_data["line"],
76
+ samples: samples,
77
+ total_samples: method_data["total_samples"] || 0,
78
+ cpu_percent: cpu_percent.to_f
79
+ }
80
+ end
81
+
82
+ def calculate_total_samples
83
+ if @data["frames"]
84
+ @data["frames"].values.sum { |f| f["samples"] || 0 }
85
+ elsif @data["methods"]
86
+ @data["methods"].sum { |m| m["samples"] || 0 }
87
+ else
88
+ 1 # Avoid division by zero
89
+ end
90
+ end
91
+
92
+ def calculate_percent(samples)
93
+ return 0.0 if @total_samples.zero?
94
+
95
+ (samples.to_f / @total_samples * 100).round(2)
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,147 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module Hone
6
+ module Profilers
7
+ class Vernier < Base
8
+ include MethodMatching
9
+
10
+ def initialize(profile_path)
11
+ super
12
+
13
+ parse_profile
14
+ end
15
+
16
+ # Returns CPU percentage for a method (0.0-100.0)
17
+ # method_info can be a Hash with :name and/or :file keys, or a String method name
18
+ def cpu_percent_for(method_info)
19
+ frame = find_matching_frame(method_info)
20
+ return nil unless frame
21
+
22
+ frame[:cpu_percent]
23
+ end
24
+
25
+ # Returns array of HotspotInfo for frames above threshold
26
+ def hotspots(threshold: 1.0)
27
+ @frames
28
+ .select { |frame| frame[:cpu_percent] >= threshold }
29
+ .sort_by { |frame| -frame[:cpu_percent] }
30
+ .map { |frame| build_hotspot_info(frame) }
31
+ end
32
+
33
+ private
34
+
35
+ def parse_profile
36
+ @frames = []
37
+ @total_samples = 0
38
+
39
+ # Vernier uses Firefox Profiler format
40
+ # The structure has: threads[], each with frameTable, funcTable, stackTable, samples
41
+ threads = @data["threads"] || []
42
+
43
+ # Aggregate samples across all threads
44
+ frame_samples = Hash.new(0)
45
+ frame_info = {}
46
+
47
+ threads.each do |thread|
48
+ parse_thread(thread, frame_samples, frame_info)
49
+ end
50
+
51
+ # Calculate total samples for percentage calculation
52
+ @total_samples = frame_samples.values.sum
53
+ @total_samples = 1 if @total_samples.zero?
54
+
55
+ # Build frames array
56
+ frame_samples.each do |frame_key, samples|
57
+ info = frame_info[frame_key]
58
+ cpu_percent = (samples.to_f / @total_samples * 100).round(2)
59
+
60
+ @frames << {
61
+ name: info[:name],
62
+ file: info[:file],
63
+ line: info[:line],
64
+ samples: samples,
65
+ cpu_percent: cpu_percent
66
+ }
67
+ end
68
+
69
+ @frames.sort_by! { |f| -f[:cpu_percent] }
70
+ end
71
+
72
+ def parse_thread(thread, frame_samples, frame_info)
73
+ frame_table = thread["frameTable"] || {}
74
+ func_table = thread["funcTable"] || {}
75
+ stack_table = thread["stackTable"] || {}
76
+ samples = thread["samples"] || {}
77
+
78
+ # Extract frame data arrays (Firefox Profiler format uses arrays)
79
+ frame_funcs = frame_table["func"] || []
80
+ frame_lines = frame_table["line"] || []
81
+
82
+ func_names = func_table["name"] || []
83
+ func_files = func_table["fileName"] || []
84
+ func_lines = func_table["lineNumber"] || []
85
+
86
+ # String table for resolving indices to actual strings
87
+ string_table = @data["stringTable"] || thread["stringTable"] || []
88
+
89
+ # Stack table: prefix and frame arrays define the stack structure
90
+ stack_prefixes = stack_table["prefix"] || []
91
+ stack_frames = stack_table["frame"] || []
92
+
93
+ # Samples: stack indices for each sample
94
+ sample_stacks = samples["stack"] || []
95
+
96
+ # Count samples per frame
97
+ sample_stacks.each do |stack_idx|
98
+ next if stack_idx.nil?
99
+
100
+ # Walk up the stack and count each frame
101
+ counted_frames = Set.new
102
+ current_stack = stack_idx
103
+
104
+ while current_stack && current_stack >= 0 && current_stack < stack_frames.size
105
+ frame_idx = stack_frames[current_stack]
106
+
107
+ # Only count each frame once per sample (avoid double-counting in recursion)
108
+ unless counted_frames.include?(frame_idx)
109
+ counted_frames.add(frame_idx)
110
+
111
+ # Get function info for this frame
112
+ func_idx = frame_funcs[frame_idx] if frame_idx && frame_idx < frame_funcs.size
113
+
114
+ if func_idx && func_idx < func_names.size
115
+ name_idx = func_names[func_idx]
116
+ file_idx = func_files[func_idx] if func_files
117
+ line = func_lines[func_idx] if func_lines
118
+
119
+ name = resolve_string(string_table, name_idx)
120
+ file = resolve_string(string_table, file_idx)
121
+
122
+ # Use frame line if available, otherwise function line
123
+ frame_line = frame_lines[frame_idx] if frame_idx && frame_idx < frame_lines.size
124
+ line = frame_line if frame_line && frame_line > 0
125
+
126
+ frame_key = "#{name}:#{file}:#{line}"
127
+ frame_samples[frame_key] += 1
128
+ frame_info[frame_key] ||= {name: name, file: file, line: line}
129
+ end
130
+ end
131
+
132
+ # Move to parent stack
133
+ prefix = stack_prefixes[current_stack]
134
+ current_stack = prefix
135
+ end
136
+ end
137
+ end
138
+
139
+ def resolve_string(string_table, index)
140
+ return nil if index.nil?
141
+ return index if index.is_a?(String)
142
+
143
+ string_table[index] if index >= 0 && index < string_table.size
144
+ end
145
+ end
146
+ end
147
+ end