stud-finder 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,771 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+ require 'json'
5
+ require 'open3'
6
+ require 'optparse'
7
+ require 'pathname'
8
+ require 'set'
9
+ require 'time'
10
+ require_relative 'churn'
11
+ require_relative 'temporal_coupling'
12
+ require_relative 'complexity'
13
+ require_relative 'diff'
14
+ require_relative 'coverage/detector'
15
+ require_relative 'edges'
16
+ require_relative 'fan_in'
17
+ require_relative 'js_fan_in'
18
+ require_relative 'js_complexity'
19
+ require_relative 'file_collector'
20
+ require_relative 'scorer'
21
+ require_relative 'version'
22
+
23
+ module StudFinder
24
+ # rubocop:disable Metrics/ClassLength
25
+ class CLI
26
+ OUTPUT_FORMATS = %w[table json markdown csv].freeze
27
+ RESULT_COLUMNS = %w[
28
+ rank language file score class fan_in fan_in_pct fan_out fan_out_pct instability instability_pct complexity
29
+ complexity_pct churn_commits churn_lines churn_pct max_coupling max_coupling_partner coupling_partners
30
+ coupling_pct coverage
31
+ ].freeze
32
+ MARKDOWN_COLUMNS = %w[
33
+ rank language file score class fan_in fan_out fan_out_pct instability complexity churn_commits churn_lines
34
+ churn_pct max_coupling max_coupling_partner coupling_partners coupling_pct coverage
35
+ ].freeze
36
+ WEIGHT_KEYS = %i[fan_in fan_out complexity churn coverage].freeze
37
+ DEFAULT_OPTIONS = {
38
+ output: 'table',
39
+ churn_days: 180,
40
+ weights: { fan_in: 0.25, fan_out: 0.10, complexity: 0.25, churn: 0.25, coverage: 0.15 },
41
+ custom_weights: false,
42
+ trunk_threshold: 85,
43
+ branch_threshold: 50,
44
+ excludes: [],
45
+ min_files: 20,
46
+ top: nil,
47
+ verbose: false,
48
+ ruby_coverage_path: nil,
49
+ js_coverage_path: nil,
50
+ js_timeout: 60,
51
+ diff_base: nil,
52
+ only_paths: nil,
53
+ filter_set: nil,
54
+ coupling_threshold: 0.30,
55
+ coupling_min_commits: 5,
56
+ cli_warnings: []
57
+ }.freeze
58
+
59
+ Analysis = Struct.new(:files, :fan_in, :fan_out, :edges, :complexity, :churn_commits, :churn_lines, :coverage,
60
+ :coverage_available, :skipped_files, :warnings, :rows, :weights, keyword_init: true)
61
+ Report = Struct.new(:ruby, :javascript, :warnings, keyword_init: true)
62
+
63
+ class ValidationError < StandardError; end
64
+
65
+ def initialize(argv, stdout: $stdout, stderr: $stderr)
66
+ @argv = argv.dup
67
+ @stdout = stdout
68
+ @stderr = stderr
69
+ @options = Marshal.load(Marshal.dump(DEFAULT_OPTIONS))
70
+ end
71
+
72
+ def self.start(argv = ARGV, stdout: $stdout, stderr: $stderr)
73
+ new(argv, stdout: stdout, stderr: stderr).run
74
+ end
75
+
76
+ def run
77
+ parser = option_parser
78
+
79
+ if @argv[0] == 'edges'
80
+ @argv.shift
81
+ parser.parse!(@argv)
82
+ target = @argv.shift
83
+ path = @argv.shift || '.'
84
+ raise ValidationError, "Error: unexpected arguments: #{@argv.join(' ')}" unless @argv.empty?
85
+
86
+ @repo_path = File.expand_path(path)
87
+ validate_options!
88
+ return run_edges(target, path)
89
+ end
90
+
91
+ parser.parse!(@argv)
92
+ path = @argv.shift || '.'
93
+ raise ValidationError, "Error: unexpected arguments: #{@argv.join(' ')}" unless @argv.empty?
94
+
95
+ @repo_path = File.expand_path(path)
96
+ validate_options!
97
+
98
+ result = FileCollector.new(
99
+ path: path,
100
+ excludes: @options[:excludes],
101
+ min_files: @options[:min_files],
102
+ stderr: @stderr
103
+ ).collect
104
+ progress("collecting files... #{result.files.length} found")
105
+
106
+ @options[:filter_set] = resolve_filter_set(@repo_path)
107
+
108
+ coupling = compute_coupling(@repo_path, result.files)
109
+ analysis = analyze(@repo_path, result.files, result.languages, coupling)
110
+ analysis = warn_if_no_scored_files(analysis)
111
+ emit_results(@repo_path, result, analysis)
112
+ 0
113
+ rescue OptionParser::InvalidOption, OptionParser::MissingArgument, OptionParser::InvalidArgument, ValidationError,
114
+ FileCollector::Error, Churn::Error, Complexity::Error, Coverage::Cobertura::Error, Coverage::Detector::Error,
115
+ Coverage::Lcov::Error, Coverage::Resultset::Error, Diff::Error, Scorer::ValidationError => e
116
+ @stderr.puts e.message
117
+ 1
118
+ end
119
+
120
+ def run_edges(target, path)
121
+ @repo_path = File.expand_path(path)
122
+ result = FileCollector.new(path: path, excludes: @options[:excludes],
123
+ min_files: @options[:min_files], stderr: @stderr).collect
124
+ progress("collecting files... #{result.files.length} found")
125
+ analysis = analyze(@repo_path, result.files, result.languages)
126
+ all_rows = analysis.ruby.rows + analysis.javascript.rows
127
+ all_edges = analysis.ruby.edges.merge(analysis.javascript.edges)
128
+
129
+ progress("computing temporal coupling (git log, #{@options[:churn_days]} days)...")
130
+ coupling_result = TemporalCoupling.new(
131
+ repo_path: @repo_path,
132
+ files: result.files,
133
+ days: @options[:churn_days],
134
+ min_co_changes: @options[:coupling_min_commits],
135
+ coupling_threshold: @options[:coupling_threshold]
136
+ ).call
137
+
138
+ Edges.new(
139
+ target: target, rows: all_rows, edges: all_edges,
140
+ coupling: coupling_result.pairs,
141
+ churn_days: @options[:churn_days],
142
+ coupling_min_commits: @options[:coupling_min_commits],
143
+ coupling_threshold: @options[:coupling_threshold],
144
+ stdout: @stdout, stderr: @stderr
145
+ ).call
146
+ rescue FileCollector::Error, Churn::Error, Complexity::Error, Scorer::ValidationError => e
147
+ @stderr.puts e.message
148
+ 1
149
+ end
150
+
151
+ private
152
+
153
+ # rubocop:disable Metrics/AbcSize, Metrics/BlockLength, Metrics/MethodLength
154
+ def option_parser
155
+ OptionParser.new do |opts|
156
+ opts.banner = 'Usage: stud-finder [PATH] [OPTIONS]'
157
+ opts.separator ''
158
+ opts.separator 'Options:'
159
+
160
+ opts.on('--output FORMAT', OUTPUT_FORMATS,
161
+ 'Output format: table, json, markdown, csv (default: table)') do |value|
162
+ @options[:output] = value
163
+ end
164
+ opts.on('--churn-days N', Integer, 'Commit lookback window in days (default: 180)') do |value|
165
+ @options[:churn_days] = value
166
+ end
167
+ opts.on('--weights WEIGHTS', 'fan_in:F,fan_out:O,complexity:C,churn:H,coverage:V') do |value|
168
+ @options[:weights] = parse_weights(value)
169
+ @options[:custom_weights] = true
170
+ end
171
+ opts.on('--ruby-coverage PATH', 'Path to a Ruby coverage report (.xml, .info, .json)') do |value|
172
+ @options[:ruby_coverage_path] = value
173
+ end
174
+ opts.on('--js-coverage PATH', 'Path to a JavaScript coverage report (reserved for Phase 2 Chunk B)') do |value|
175
+ @options[:js_coverage_path] = value
176
+ end
177
+ opts.on('--coverage PATH', 'Deprecated alias for --ruby-coverage') do |value|
178
+ @options[:ruby_coverage_path] = value
179
+ @options[:cli_warnings] << 'coverage_flag_deprecated'
180
+ @stderr.puts 'Warning: coverage_flag_deprecated: --coverage is deprecated; use --ruby-coverage.'
181
+ end
182
+ opts.on('--js-timeout N', Integer, 'dependency-cruiser timeout in seconds (default: 60)') do |value|
183
+ @options[:js_timeout] = value
184
+ end
185
+ opts.on('--trunk-threshold N', Integer,
186
+ 'fan_in percentile cutoff for trunk classification (default: 85)') do |value|
187
+ @options[:trunk_threshold] = value
188
+ end
189
+ opts.on('--branch-threshold N', Integer,
190
+ 'fan_in percentile cutoff for branch classification (default: 50)') do |value|
191
+ @options[:branch_threshold] = value
192
+ end
193
+ opts.on('--exclude PATTERN', 'Exclude glob pattern (repeatable)') do |value|
194
+ @options[:excludes] << value
195
+ end
196
+ opts.on('--min-files N', Integer, 'Advisory minimum file count (default: 20)') do |value|
197
+ @options[:min_files] = value
198
+ end
199
+ opts.on('--top N', Integer, 'Emit only the top N results') do |value|
200
+ @options[:top] = value
201
+ end
202
+ opts.on('--diff-base REF',
203
+ 'Score the full repo but emit only files changed vs REF (merge-base), e.g. origin/staging') do |value|
204
+ @options[:diff_base] = value
205
+ end
206
+ opts.on('--only PATHS',
207
+ 'Emit only these comma-separated repo-relative paths (still scored against the full repo)') do |value|
208
+ @options[:only_paths] = value.split(',').map(&:strip).reject(&:empty?)
209
+ end
210
+ opts.on('--coupling-threshold FLOAT', Float,
211
+ 'Minimum coupling ratio for edges output (default: 0.30)') do |value|
212
+ @options[:coupling_threshold] = value
213
+ end
214
+ opts.on('--coupling-min-commits N', Integer,
215
+ 'Minimum co-change count for edges output (default: 5)') do |value|
216
+ @options[:coupling_min_commits] = value
217
+ end
218
+ opts.on('--verbose', 'Print suppressed per-file warnings to stderr') do
219
+ @options[:verbose] = true
220
+ end
221
+ opts.on('--version', 'Print version and exit') do
222
+ @stdout.puts StudFinder::VERSION
223
+ exit 0
224
+ end
225
+ opts.on('--help', 'Print help and exit') do
226
+ @stdout.puts opts
227
+ exit 0
228
+ end
229
+ end
230
+ end
231
+ # rubocop:enable Metrics/AbcSize, Metrics/BlockLength, Metrics/MethodLength
232
+
233
+ def parse_weights(value)
234
+ pairs = value.split(',').map do |entry|
235
+ key, raw = entry.split(':', 2)
236
+ raise ValidationError, 'Error: invalid weights format.' if key.nil? || raw.nil? || key.empty? || raw.empty?
237
+
238
+ [key.to_sym, Float(raw)]
239
+ rescue ArgumentError
240
+ raise ValidationError, 'Error: weight values must be floats.'
241
+ end
242
+
243
+ weights = pairs.to_h
244
+ missing = WEIGHT_KEYS - weights.keys
245
+ extra = weights.keys - WEIGHT_KEYS
246
+ unless missing.empty? && extra.empty?
247
+ raise ValidationError,
248
+ 'Error: weights must include fan_in, fan_out, complexity, churn, and coverage.'
249
+ end
250
+
251
+ out_of_range = weights.any? { |_key, weight| weight.negative? || weight > 1.0 }
252
+ raise ValidationError, 'Error: weight values must be between 0.0 and 1.0.' if out_of_range
253
+
254
+ weights
255
+ end
256
+
257
+ def validate_options!
258
+ validate_threshold!(:trunk_threshold)
259
+ validate_threshold!(:branch_threshold)
260
+ if @options[:branch_threshold] >= @options[:trunk_threshold]
261
+ raise ValidationError, 'Error: branch-threshold must be strictly less than trunk-threshold.'
262
+ end
263
+
264
+ raise ValidationError, 'Error: --min-files must be positive.' if @options[:min_files] <= 0
265
+ raise ValidationError, 'Error: --top must be positive.' if @options[:top] && @options[:top] <= 0
266
+ raise ValidationError, 'Error: --churn-days must be positive.' if @options[:churn_days] <= 0
267
+ raise ValidationError, 'Error: --js-timeout must be positive.' if @options[:js_timeout] <= 0
268
+
269
+ raise ValidationError, 'Error: --coupling-min-commits must be positive.' if @options[:coupling_min_commits] <= 0
270
+ unless (0.0..1.0).cover?(@options[:coupling_threshold])
271
+ raise ValidationError, 'Error: --coupling-threshold must be between 0.0 and 1.0.'
272
+ end
273
+
274
+ validate_coverage_paths!
275
+ validate_filter_options!
276
+ validate_weights! if @options[:custom_weights]
277
+ end
278
+
279
+ def validate_coverage_paths!
280
+ if @options[:ruby_coverage_path] && !File.file?(@options[:ruby_coverage_path])
281
+ raise ValidationError, "Error: coverage file not found: #{@options[:ruby_coverage_path]}"
282
+ end
283
+ return unless @options[:js_coverage_path] && !File.file?(@options[:js_coverage_path])
284
+
285
+ raise ValidationError, "Error: JS coverage file not found: #{@options[:js_coverage_path]}"
286
+ end
287
+
288
+ def validate_filter_options!
289
+ if @options[:diff_base] && @options[:only_paths]
290
+ raise ValidationError, 'Error: --diff-base and --only are mutually exclusive.'
291
+ end
292
+ return unless @options[:diff_base] && @repo_path
293
+
294
+ Diff.new(repo_path: @repo_path, base_ref: @options[:diff_base]).validate_ref!
295
+ rescue Diff::Error => e
296
+ raise ValidationError, e.message
297
+ end
298
+
299
+ def validate_threshold!(name)
300
+ value = @options[name]
301
+ return if value.between?(1, 99)
302
+
303
+ raise ValidationError, "Error: #{name.to_s.tr('_', '-')} must be between 1 and 99."
304
+ end
305
+
306
+ def coverage_available?
307
+ !@options[:ruby_coverage_path].nil? || !@options[:js_coverage_path].nil?
308
+ end
309
+
310
+ def validate_weights!
311
+ weights = @options[:weights]
312
+ if weights[:coverage].positive? && !coverage_available?
313
+ raise ValidationError, 'Error: coverage weight must be 0.0 when no coverage data is provided.'
314
+ end
315
+
316
+ active_sum = weights.values.sum
317
+ return if (active_sum - 1.0).abs <= 0.001
318
+
319
+ raise ValidationError, format('Error: weights must sum to 1.0; actual sum is %.4f.', active_sum)
320
+ end
321
+
322
+ # Computes temporal coupling once over the full collected file set (all languages
323
+ # together, so cross-language co-change is captured) and aggregates each file's
324
+ # partners into { file => { max_coupling: Float, max_coupling_partner: String, partners: Integer } }.
325
+ # max_coupling_partner is the path of the partner that produced max_coupling. Files
326
+ # with no qualifying pairs are simply absent from the hash (scorer treats them as 0).
327
+ def compute_coupling(path, files)
328
+ progress("computing temporal coupling (git log, #{@options[:churn_days]} days)...")
329
+ result = TemporalCoupling.new(
330
+ repo_path: path,
331
+ files: files,
332
+ days: @options[:churn_days],
333
+ min_co_changes: @options[:coupling_min_commits],
334
+ coupling_threshold: @options[:coupling_threshold]
335
+ ).call
336
+ result.pairs.transform_values { |partners| aggregate_partners(partners) }
337
+ end
338
+
339
+ # Reduces a file's partner entries to its strongest-coupling partner.
340
+ # Tie-break is deterministic: highest :coupling, then highest :co_changes,
341
+ # then ascending alphabetical :path — so identical inputs always pick the same partner.
342
+ def aggregate_partners(partners)
343
+ top = partners.min_by { |entry| [-entry[:coupling], -entry[:co_changes], entry[:path]] }
344
+ {
345
+ max_coupling: top ? top[:coupling] : 0.0,
346
+ max_coupling_partner: top ? top[:path] : nil,
347
+ partners: partners.length
348
+ }
349
+ end
350
+
351
+ def analyze(path, files, languages, coupling = nil)
352
+ ruby_files = files.select { |file| languages[file] == :ruby }
353
+ js_files = files.select { |file| %i[javascript typescript].include?(languages[file]) }
354
+
355
+ ruby_analysis = ruby_files.empty? ? empty_analysis : analyze_ruby(path, ruby_files, coupling)
356
+ javascript_analysis = js_files.empty? ? empty_analysis : analyze_javascript(path, js_files, languages, coupling)
357
+
358
+ progress('done')
359
+ Report.new(ruby: ruby_analysis, javascript: javascript_analysis,
360
+ warnings: (ruby_analysis.warnings + javascript_analysis.warnings + @options[:cli_warnings]).uniq)
361
+ end
362
+
363
+ def analyze_ruby(path, files, coupling = nil)
364
+ progress('computing Ruby fan_in + fan_out (rubocop-ast)...')
365
+ fan_in_result = FanIn.new(repo_path: path, files: files).call
366
+
367
+ progress('computing Ruby complexity (rubocop)...')
368
+ complexity_result = Complexity.new(repo_path: path, files: files, stderr: @stderr).call
369
+ analysis_files = files - complexity_result.skipped_files
370
+
371
+ progress("computing Ruby churn (git log, #{@options[:churn_days]} days)...")
372
+ churn_result = Churn.new(repo_path: path, files: analysis_files, days: @options[:churn_days],
373
+ stderr: @stderr).call
374
+
375
+ score_group(analysis_files, fan_in_result.counts, fan_in_result.fan_out_counts, fan_in_result.edges,
376
+ complexity_result.counts, churn_result, complexity_result.skipped_files,
377
+ ruby_coverage(path, analysis_files),
378
+ language_by_file: analysis_files.to_h { |file| [file, :ruby] }, coupling: coupling)
379
+ end
380
+
381
+ def analyze_javascript(path, files, languages, coupling = nil)
382
+ progress('computing JavaScript fan_in + fan_out (dependency-cruiser)...')
383
+ fan_in_result = JsFanIn.new(repo_path: path, files: files, js_timeout: @options[:js_timeout],
384
+ stderr: @stderr).call
385
+ progress('computing JavaScript complexity (eslint)...')
386
+ complexity_result = JsComplexity.new(repo_path: path, files: files, js_timeout: @options[:js_timeout],
387
+ stderr: @stderr).call
388
+ churn_result = Churn.new(repo_path: path, files: files, days: @options[:churn_days], stderr: @stderr).call
389
+ score_group(files, fan_in_result.counts, fan_in_result.fan_out_counts, fan_in_result.edges,
390
+ complexity_result.counts, churn_result, [], js_coverage(path, files),
391
+ language_by_file: languages, extra_warnings: fan_in_result.warnings + complexity_result.warnings,
392
+ coupling: coupling)
393
+ end
394
+
395
+ # rubocop:disable Metrics/ParameterLists
396
+ def score_group(files, fan_in, fan_out, edges, complexity, churn_result, skipped_files, coverage_payload,
397
+ language_by_file: {}, extra_warnings: [], coupling: nil)
398
+ progress("normalizing + scoring #{files.length} files...")
399
+ coverage_result, coverage_parser = coverage_payload
400
+ scorer = Scorer.new(files: files, fan_in: fan_in, fan_out: fan_out, complexity: complexity,
401
+ churn: churn_result.counts, churn_lines: churn_result.line_counts,
402
+ coverage: coverage_result, weights: @options[:weights],
403
+ branch_threshold: @options[:branch_threshold], trunk_threshold: @options[:trunk_threshold],
404
+ coupling: coupling)
405
+ warnings = extra_warnings.dup
406
+ warnings << 'coverage_unavailable' unless coverage_result
407
+ warnings << 'coverage_partial' if coverage_parser&.missing_files&.any?
408
+ warnings << 'zero_churn_majority' if churn_result.zero_inflated
409
+ warnings << 'files_skipped' if skipped_files.any?
410
+ warnings << 'small_repo' if files.length < @options[:min_files]
411
+ emit_scoring_note(scorer, coverage_result)
412
+ Analysis.new(
413
+ files: files, fan_in: fan_in, fan_out: fan_out, edges: edges, complexity: complexity,
414
+ churn_commits: churn_result.churn_commits, churn_lines: churn_result.churn_lines,
415
+ coverage: coverage_result, coverage_available: !coverage_result.nil?, skipped_files: skipped_files,
416
+ warnings: warnings.uniq, rows: scorer.call.map { |row| with_language(row, language_by_file) },
417
+ weights: scorer.normalized_weights
418
+ )
419
+ end
420
+ # rubocop:enable Metrics/ParameterLists
421
+
422
+ def with_language(row, language_by_file)
423
+ row.merge(language: language_by_file.fetch(row[:path]).to_s)
424
+ end
425
+
426
+ def ruby_coverage(path, files)
427
+ return [nil, nil] unless @options[:ruby_coverage_path]
428
+
429
+ parser = Coverage::Detector.for(path: @options[:ruby_coverage_path], files: files, project_root: path)
430
+ [parser.call, parser]
431
+ end
432
+
433
+ def js_coverage(path, files)
434
+ return [nil, nil] unless @options[:js_coverage_path]
435
+
436
+ parser = Coverage::Detector.for(path: @options[:js_coverage_path], files: files, project_root: path)
437
+ [parser.call, parser]
438
+ end
439
+
440
+ def emit_scoring_note(scorer, coverage_result)
441
+ if coverage_result
442
+ @stderr.puts 'Note: coverage data available. Score uses 5-factor formula.'
443
+ else
444
+ @stderr.puts scoring_note(weights: scorer.normalized_weights, stderr: true)
445
+ end
446
+ end
447
+
448
+ def emit_results(path, result, analysis)
449
+ ruby_rows = limited_rows(analysis.ruby.rows)
450
+ javascript_rows = limited_rows(analysis.javascript.rows)
451
+
452
+ case @options[:output]
453
+ when 'json'
454
+ emit_json(path, analysis, ruby_rows, javascript_rows)
455
+ when 'markdown'
456
+ emit_markdown(analysis, ruby_rows, javascript_rows)
457
+ when 'csv'
458
+ emit_csv(ruby_rows + javascript_rows)
459
+ else
460
+ emit_table(path, result, analysis, ruby_rows, javascript_rows)
461
+ end
462
+ end
463
+
464
+ # Resolves the optional output filter to a Set of repo-relative paths, or nil.
465
+ # The filter is applied at emit time only (see #limited_rows) so the full repo
466
+ # is still scored — fan_in counts and percentiles stay correct.
467
+ def resolve_filter_set(path)
468
+ paths =
469
+ if @options[:diff_base]
470
+ Diff.new(repo_path: path, base_ref: @options[:diff_base]).changed_paths
471
+ else
472
+ @options[:only_paths]
473
+ end
474
+ return nil unless paths
475
+
476
+ # Diff (and documented --only) paths are repo-root-relative, but row paths are
477
+ # relative to the analysis root (FileCollector). Rebase so they compare equal
478
+ # when PATH is a subdirectory; a no-op when PATH is the repo root.
479
+ paths = rebase_to_analysis_root(paths, path)
480
+
481
+ if paths.empty?
482
+ @stderr.puts 'Note: diff contains no changed files. Nothing to filter.'
483
+ @options[:cli_warnings] << 'diff_filter_empty'
484
+ end
485
+ Set.new(paths)
486
+ end
487
+
488
+ # Strips the analysis-root prefix from repo-root-relative filter paths and drops
489
+ # any path outside the analysis root. Returns paths unchanged when the analysis
490
+ # root is the repo root (or the toplevel can't be resolved).
491
+ def rebase_to_analysis_root(paths, analysis_path)
492
+ toplevel = git_toplevel(analysis_path)
493
+ return paths if toplevel.nil?
494
+
495
+ # realpath on both sides so symlinked roots (e.g. macOS /var -> /private/var)
496
+ # don't defeat the prefix comparison.
497
+ analysis_abs = File.realpath(analysis_path)
498
+ return paths if analysis_abs == toplevel
499
+
500
+ prefix = Pathname.new(analysis_abs).relative_path_from(Pathname.new(toplevel)).to_s
501
+ return paths if prefix.empty? || prefix == '.' || prefix.start_with?('..')
502
+
503
+ prefix += '/'
504
+ paths.select { |p| p.start_with?(prefix) }.map { |p| p.delete_prefix(prefix) }
505
+ rescue Errno::ENOENT
506
+ paths
507
+ end
508
+
509
+ def git_toplevel(analysis_path)
510
+ stdout, _stderr, status = Open3.capture3(
511
+ 'git', '-C', File.expand_path(analysis_path), 'rev-parse', '--show-toplevel'
512
+ )
513
+ status.success? ? File.realpath(stdout.strip) : nil
514
+ end
515
+
516
+ def warn_if_no_scored_files(analysis)
517
+ return analysis unless @options[:filter_set] && !@options[:filter_set].empty?
518
+
519
+ scored = Set.new(analysis.ruby.files + analysis.javascript.files)
520
+ return analysis if @options[:filter_set].intersect?(scored)
521
+
522
+ @stderr.puts 'Note: no scored files matched the diff. ' \
523
+ 'The PR may only touch unscorable files (docs, config, migrations, etc.).'
524
+ Report.new(ruby: analysis.ruby, javascript: analysis.javascript,
525
+ warnings: (analysis.warnings + ['diff_no_scored_files']).uniq)
526
+ end
527
+
528
+ def limited_rows(rows)
529
+ filtered = @options[:filter_set] ? rows.select { |row| @options[:filter_set].include?(row[:path]) } : rows
530
+ @options[:top] ? filtered.first(@options[:top]) : filtered
531
+ end
532
+
533
+ def filter_note
534
+ return unless @options[:filter_set]
535
+
536
+ if @options[:diff_base]
537
+ "Filtered to files changed vs #{@options[:diff_base]} (ranks are against the full repo)."
538
+ else
539
+ 'Filtered to --only paths (ranks are against the full repo).'
540
+ end
541
+ end
542
+
543
+ def empty_analysis
544
+ Analysis.new(files: [], fan_in: {}, fan_out: {}, edges: {}, complexity: {}, churn_commits: {}, churn_lines: {},
545
+ coverage: nil, coverage_available: false, skipped_files: [], warnings: [], rows: [], weights: nil)
546
+ end
547
+
548
+ def emit_markdown_section(title, rows)
549
+ @stdout.puts "### #{title}"
550
+ @stdout.puts
551
+ @stdout.puts "| #{MARKDOWN_COLUMNS.join(' | ')} |"
552
+ @stdout.puts "| #{MARKDOWN_COLUMNS.map { '---' }.join(' | ')} |"
553
+ rows.each { |row| @stdout.puts markdown_row(row) }
554
+ @stdout.puts
555
+ end
556
+
557
+ def emit_table_section(title, rows)
558
+ @stdout.puts title
559
+ @stdout.puts ' rank language file score class fan_in ' \
560
+ 'fan_out instability complexity churn_commits churn_lines churn_pct max_coupling ' \
561
+ 'max_coupling_partner coupling_partners coupling_pct coverage'
562
+ rows.each { |row| @stdout.puts table_row(row) }
563
+ @stdout.puts
564
+ end
565
+
566
+ def emit_csv(rows)
567
+ @stdout << CSV.generate_line(RESULT_COLUMNS)
568
+ rows.each do |row|
569
+ @stdout << CSV.generate_line(csv_file(row))
570
+ end
571
+ end
572
+
573
+ def emit_json(path, analysis, ruby_rows, javascript_rows)
574
+ @stdout.puts JSON.generate(
575
+ meta: json_meta(path, analysis),
576
+ warnings: analysis.warnings,
577
+ ruby: ruby_rows.map { |row| json_file(row) },
578
+ javascript: javascript_rows.map { |row| json_file(row) }
579
+ )
580
+ end
581
+
582
+ def json_meta(path, analysis)
583
+ meta = {
584
+ repo: path,
585
+ analyzed_at: Time.now.utc.iso8601,
586
+ churn_days: @options[:churn_days],
587
+ file_count: analysis.ruby.files.length + analysis.javascript.files.length,
588
+ files_skipped: analysis.ruby.skipped_files.length + analysis.javascript.skipped_files.length,
589
+ formula: report_coverage_available?(analysis) ? '5-factor' : '4-factor (no coverage)',
590
+ weights: json_weights(analysis.ruby.weights || analysis.javascript.weights),
591
+ warnings: analysis.warnings
592
+ }
593
+ meta[:filtered] = true if @options[:filter_set]
594
+ meta[:diff_base] = @options[:diff_base] if @options[:diff_base]
595
+ meta[:only_paths] = @options[:only_paths] if @options[:only_paths]
596
+ meta
597
+ end
598
+
599
+ def emit_markdown(analysis, ruby_rows, javascript_rows)
600
+ @stdout.puts "## stud-finder — #{Time.now.utc.strftime('%Y-%m-%d')}"
601
+ @stdout.puts
602
+ file_count = analysis.ruby.files.length + analysis.javascript.files.length
603
+ @stdout.puts "> #{report_coverage_available?(analysis) ? '5-factor score' : '4-factor score (no coverage)'}. " \
604
+ "Churn window: #{@options[:churn_days]} days. #{file_count} files analyzed."
605
+ note = filter_note
606
+ if note
607
+ @stdout.puts
608
+ @stdout.puts "> #{note}"
609
+ end
610
+ emit_markdown_section('Ruby', ruby_rows)
611
+ emit_markdown_section('JavaScript/TypeScript', javascript_rows)
612
+ @stdout.puts
613
+ @stdout.puts '*fan_in is a static approximation — dynamic references not counted.*'
614
+ end
615
+
616
+ def markdown_row(row)
617
+ values = [
618
+ row[:rank], row[:language], row[:path], format_score(row[:score]), row[:classification], row[:fan_in],
619
+ row[:fan_out], format_score(row[:fan_out_pct]), format_score(row[:instability]), row[:complexity],
620
+ row[:churn_commits], row[:churn_lines], format_score(row[:churn_pct]), format_score(row[:max_coupling]),
621
+ row[:max_coupling_partner], row[:coupling_partners], format_score(row[:coupling_pct]),
622
+ format_coverage(row[:coverage])
623
+ ]
624
+ "| #{values.join(' | ')} |"
625
+ end
626
+
627
+ def emit_table(path, result, analysis, ruby_rows, javascript_rows)
628
+ coverage_available = report_coverage_available?(analysis)
629
+ formula = coverage_available ? '5-factor score' : '4-factor score'
630
+ @stdout.puts "stud-finder — #{path} (#{@options[:churn_days]}-day churn, #{formula})"
631
+ unless coverage_available
632
+ @stdout.puts scoring_note(weights: analysis.ruby.weights || analysis.javascript.weights,
633
+ stderr: false)
634
+ end
635
+ note = filter_note
636
+ @stdout.puts note if note
637
+ @stdout.puts
638
+ emit_table_section('Ruby', ruby_rows)
639
+ emit_table_section('JavaScript/TypeScript', javascript_rows)
640
+ @stdout.puts
641
+ @stdout.puts footer(result, analysis)
642
+ @stdout.puts 'fan_in is a static approximation — dynamic references (const_get, send, metaprogramming) ' \
643
+ 'not counted.'
644
+ end
645
+
646
+ def json_file(row)
647
+ {
648
+ rank: row[:rank],
649
+ language: row[:language],
650
+ path: row[:path],
651
+ score: row[:score],
652
+ class: row[:classification],
653
+ fan_in: row[:fan_in],
654
+ fan_in_pct: row[:fan_in_pct],
655
+ fan_out: row[:fan_out],
656
+ fan_out_pct: row[:fan_out_pct],
657
+ instability: row[:instability],
658
+ instability_pct: row[:instability_pct],
659
+ complexity: row[:complexity],
660
+ complexity_pct: row[:complexity_pct],
661
+ churn_commits: row[:churn_commits],
662
+ churn_lines: row[:churn_lines],
663
+ churn_pct: row[:churn_pct],
664
+ max_coupling: row[:max_coupling],
665
+ max_coupling_partner: row[:max_coupling_partner],
666
+ coupling_partners: row[:coupling_partners],
667
+ coupling_pct: row[:coupling_pct],
668
+ coverage: row[:coverage]
669
+ }
670
+ end
671
+
672
+ def csv_file(row)
673
+ [
674
+ row[:rank],
675
+ row[:language],
676
+ row[:path],
677
+ format_score(row[:score]),
678
+ row[:classification],
679
+ row[:fan_in],
680
+ format_score(row[:fan_in_pct]),
681
+ row[:fan_out],
682
+ format_score(row[:fan_out_pct]),
683
+ format_score(row[:instability]),
684
+ format_score(row[:instability_pct]),
685
+ row[:complexity],
686
+ format_score(row[:complexity_pct]),
687
+ row[:churn_commits],
688
+ row[:churn_lines],
689
+ format_score(row[:churn_pct]),
690
+ format_score(row[:max_coupling]),
691
+ row[:max_coupling_partner],
692
+ row[:coupling_partners],
693
+ format_score(row[:coupling_pct]),
694
+ row[:coverage] || ''
695
+ ]
696
+ end
697
+
698
+ def report_coverage_available?(analysis)
699
+ analysis.ruby.coverage_available || analysis.javascript.coverage_available
700
+ end
701
+
702
+ def json_weights(weights)
703
+ {
704
+ fan_in: weights[:fan_in].round(4),
705
+ fan_out: weights[:fan_out].round(4),
706
+ complexity: weights[:complexity].round(4),
707
+ churn: weights[:churn].round(4),
708
+ coverage: weights[:coverage]&.round(4)
709
+ }
710
+ end
711
+
712
+ def scoring_note(weights:, stderr:)
713
+ if stderr
714
+ format('Note: coverage data not available. Score uses 4-factor formula (fan_in %<fan_in>.2f, ' \
715
+ 'fan_out %<fan_out>.2f, complexity %<complexity>.2f, churn %<churn>.2f).', **weights)
716
+ else
717
+ format('Note: coverage data not available. Score uses fan_in %<fan_in>.2f, fan_out %<fan_out>.2f, ' \
718
+ 'complexity %<complexity>.2f, churn %<churn>.2f.', **weights)
719
+ end
720
+ end
721
+
722
+ def progress(message)
723
+ @stderr.puts "stud-finder → #{message}"
724
+ end
725
+
726
+ def footer(result, analysis)
727
+ file_count = analysis.ruby.files.length + analysis.javascript.files.length
728
+ skipped_count = analysis.ruby.skipped_files.length + analysis.javascript.skipped_files.length
729
+ parts = ["#{file_count} files analyzed."]
730
+ parts << "#{skipped_count} files skipped (parse errors — run --verbose to see)." if skipped_count.positive?
731
+ parts << "#{result.default_excluded_count} files excluded by default rules."
732
+ parts.join(' ')
733
+ end
734
+
735
+ def format_score(score)
736
+ format('%.4f', score)
737
+ end
738
+
739
+ def format_coverage(coverage)
740
+ return 'n/a' if coverage.nil?
741
+
742
+ "#{(coverage * 100).round}%"
743
+ end
744
+
745
+ def table_row(row)
746
+ format('%<rank>5d %<language>-10s %<path>-45s %<score>6s %<classification>-6s %<fan_in>6d ' \
747
+ '%<fan_out>7d %<instability>11s %<complexity>10d %<churn_commits>13d %<churn_lines>11d ' \
748
+ '%<churn_pct>9s %<max_coupling>12s %<max_coupling_partner>-40s %<coupling_partners>17d ' \
749
+ '%<coupling_pct>12s %<coverage>8s',
750
+ rank: row[:rank], language: row[:language], path: row[:path], score: format_score(row[:score]),
751
+ classification: row[:classification], fan_in: row[:fan_in], fan_out: row[:fan_out],
752
+ instability: format_score(row[:instability]), complexity: row[:complexity],
753
+ churn_commits: row[:churn_commits], churn_lines: row[:churn_lines],
754
+ churn_pct: format_score(row[:churn_pct]), max_coupling: format_score(row[:max_coupling]),
755
+ max_coupling_partner: truncate_partner(row[:max_coupling_partner]),
756
+ coupling_partners: row[:coupling_partners], coupling_pct: format_score(row[:coupling_pct]),
757
+ coverage: format_coverage(row[:coverage]))
758
+ end
759
+
760
+ # Keeps the partner path readable in the fixed-width table. The partner name is
761
+ # the actionable info, so on overflow we keep the rightmost ~40 chars (the
762
+ # basename and nearest parent dirs) with a leading ellipsis marker.
763
+ def truncate_partner(partner)
764
+ partner = partner.to_s
765
+ return partner if partner.length <= 40
766
+
767
+ "...#{partner[-37..]}"
768
+ end
769
+ end
770
+ # rubocop:enable Metrics/ClassLength
771
+ end