tina4ruby 3.13.37 → 3.13.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/tina4/metrics.rb CHANGED
@@ -353,6 +353,124 @@ module Tina4
353
353
  result
354
354
  end
355
355
 
356
+ # ── Top Offenders (CLI + dashboard) ──────────────────────────
357
+
358
+ # Severity ranking for sorting (higher = more severe).
359
+ SEVERITY_RANK = { "error" => 2, "warn" => 1, "info" => 0 }.freeze
360
+
361
+ # Rank the worst code-quality issues into a single "top offenders" list.
362
+ #
363
+ # Reuses full_analysis (does NOT re-analyze). Each offender is a hash:
364
+ # {"file", "line", "kind", "severity", "score", "detail"}
365
+ #
366
+ # Rules (one offender per matching condition):
367
+ # - function complexity > 10 → kind "complexity"
368
+ # severity "error" if >20 else "warn"; score = complexity
369
+ # - file loc > 500 → kind "large_file" (warn); score = loc/100
370
+ # - file functions > 20 → kind "too_many_functions" (warn); score = functions/4
371
+ # - file maintainability < 40 → kind "low_maintainability"
372
+ # severity "error" if <20 else "warn"; score = (50 - mi)
373
+ # - file has_tests false → kind "untested" (info); score = loc/100
374
+ #
375
+ # Sorted by (severity rank, score) DESCENDING and truncated to `top`.
376
+ #
377
+ # Returns {"offenders" => [...], "summary" => {...}} where summary carries
378
+ # the headline numbers the CLI prints (files_analyzed, total_functions,
379
+ # avg_complexity, avg_maintainability, scan_mode, scan_root, and the total
380
+ # offender count before truncation).
381
+ def self.offenders(root = 'src', top = 20)
382
+ analysis = full_analysis(root)
383
+ if analysis.key?("error")
384
+ return { "offenders" => [], "summary" => { "error" => analysis["error"] } }
385
+ end
386
+
387
+ items = []
388
+
389
+ # Function-level: cyclomatic complexity.
390
+ (analysis["most_complex_functions"] || []).each do |fn|
391
+ cc = fn["complexity"]
392
+ next unless cc > 10
393
+ items << {
394
+ "file" => fn["file"],
395
+ "line" => fn["line"],
396
+ "kind" => "complexity",
397
+ "severity" => cc > 20 ? "error" : "warn",
398
+ "score" => cc.to_f,
399
+ "detail" => "#{fn['name']} — cyclomatic complexity #{cc}"
400
+ }
401
+ end
402
+
403
+ # File-level rules.
404
+ (analysis["file_metrics"] || []).each do |fm|
405
+ path = fm["path"]
406
+ loc = fm["loc"]
407
+ funcs = fm["functions"]
408
+ mi = fm["maintainability"]
409
+
410
+ if loc > 500
411
+ items << {
412
+ "file" => path,
413
+ "line" => 1,
414
+ "kind" => "large_file",
415
+ "severity" => "warn",
416
+ "score" => loc / 100.0,
417
+ "detail" => "#{loc} LOC (max 500)"
418
+ }
419
+ end
420
+
421
+ if funcs > 20
422
+ items << {
423
+ "file" => path,
424
+ "line" => 1,
425
+ "kind" => "too_many_functions",
426
+ "severity" => "warn",
427
+ "score" => funcs / 4.0,
428
+ "detail" => "#{funcs} functions (max 20)"
429
+ }
430
+ end
431
+
432
+ if mi < 40
433
+ items << {
434
+ "file" => path,
435
+ "line" => 1,
436
+ "kind" => "low_maintainability",
437
+ "severity" => mi < 20 ? "error" : "warn",
438
+ "score" => 50 - mi,
439
+ "detail" => "maintainability index #{mi} (min 40)"
440
+ }
441
+ end
442
+
443
+ if fm["has_tests"] == false
444
+ items << {
445
+ "file" => path,
446
+ "line" => 1,
447
+ "kind" => "untested",
448
+ "severity" => "info",
449
+ "score" => loc / 100.0,
450
+ "detail" => "no referencing test"
451
+ }
452
+ end
453
+ end
454
+
455
+ # Sort by (severity rank, score) DESCENDING — stable so insertion order
456
+ # breaks ties deterministically.
457
+ items = items.each_with_index.sort_by do |o, idx|
458
+ [-SEVERITY_RANK[o["severity"]], -o["score"], idx]
459
+ end.map(&:first)
460
+
461
+ summary = {
462
+ "files_analyzed" => analysis["files_analyzed"],
463
+ "total_functions" => analysis["total_functions"],
464
+ "avg_complexity" => analysis["avg_complexity"],
465
+ "avg_maintainability" => analysis["avg_maintainability"],
466
+ "scan_mode" => analysis["scan_mode"],
467
+ "scan_root" => analysis["scan_root"],
468
+ "total_offenders" => items.length
469
+ }
470
+
471
+ { "offenders" => items.first(top), "summary" => summary }
472
+ end
473
+
356
474
  # ── File Detail ─────────────────────────────────────────────
357
475
 
358
476
  def self.file_detail(file_path)
@@ -423,64 +541,140 @@ module Tina4
423
541
 
424
542
  private_class_method
425
543
 
544
+ # Check whether a source file has a test that actually exercises it.
545
+ #
546
+ # PRECISE detection (a bare word-mention is NOT enough — that over-reported
547
+ # badly: `sqlite3_adapter.rb` looked "tested" because some spec merely said
548
+ # "sqlite3_adapter"):
549
+ #
550
+ # 1. Filename — a dedicated `<module>_spec.rb` / `<module>_test.rb` /
551
+ # `test_<module>.rb` for THIS exact module (NOT the parent directory —
552
+ # one `database_spec.rb` must not mark every file under `database/`
553
+ # tested).
554
+ # 2. Require — a spec that actually requires this file: its require path
555
+ # (`require "tina4/database/sqlite"` / `require_relative ".../sqlite"`)
556
+ # matched by the basename of a require target. A constant/class that is
557
+ # genuinely DEFINED in this file (top-level class/module) referenced by
558
+ # a spec also counts.
559
+ #
560
+ # Returns true only on a real, file-specific signal — so the "untested"
561
+ # offenders surfaced by `tina4 metrics` and the dashboard "T" badge are
562
+ # trustworthy. (If you wire real coverage data later, prefer it over this.)
426
563
  def self._has_matching_test(rel_path)
427
564
  require 'set'
428
565
 
429
566
  name = File.basename(rel_path, '.rb')
430
- # Parent directory name (e.g. "database" from "database/sqlite3_adapter.rb")
431
- parent_dir = File.dirname(rel_path)
432
- parent_module = (parent_dir != '.' && !parent_dir.empty?) ? File.basename(parent_dir) : ''
433
-
434
- # Stage 1: Filename matching — name_spec, name_test, test_name patterns
435
- test_dirs = ['spec', 'spec/tina4', 'test', 'tests']
436
- test_dirs.each do |td|
437
- patterns = [
438
- "#{td}/#{name}_spec.rb",
439
- "#{td}/#{name}s_spec.rb",
440
- "#{td}/#{name}_test.rb",
441
- "#{td}/test_#{name}.rb",
442
- ]
443
- # Also check parent-named tests (spec/database_spec.rb covers database/sqlite3_adapter.rb)
444
- if parent_module && !parent_module.empty? && parent_module != name
445
- patterns << "#{td}/#{parent_module}_spec.rb"
446
- patterns << "#{td}/#{parent_module}s_spec.rb"
447
- patterns << "#{td}/#{parent_module}_test.rb"
448
- patterns << "#{td}/test_#{parent_module}.rb"
449
- end
450
- return true if patterns.any? { |p| File.exist?(p) }
451
- end
452
-
453
- # Build a dotted/slashed require path for import matching
454
- # e.g. "lib/tina4/database/sqlite3_adapter.rb" → "tina4/database/sqlite3_adapter"
455
- path_without_ext = rel_path.sub(/\.rb$/, '')
456
- # Strip leading lib/ prefix if present
457
- require_path = path_without_ext.sub(%r{^lib/}, '')
458
-
459
- # Build CamelCase class name from snake_case module name
460
- # e.g. "sqlite3_adapter" "Sqlite3Adapter"
461
- class_name = name.split('_').map(&:capitalize).join
462
-
463
- # Stage 2+3: Content scan — check if any spec/test file references this module
464
- scan_dirs = ['spec', 'test', 'tests']
465
- scan_dirs.each do |td|
466
- next unless Dir.exist?(td)
467
- Dir.glob(File.join(td, '**', '*.rb')).each do |test_file|
468
- content = begin
469
- File.read(test_file, encoding: 'utf-8')
470
- rescue StandardError
471
- next
567
+
568
+ # Require path WITHOUT extension, leading lib/ stripped:
569
+ # "lib/tina4/database/sqlite.rb" -> "tina4/database/sqlite"
570
+ require_path = rel_path.sub(/\.rb$/, '').sub(%r{^lib/}, '')
571
+
572
+ # Constants (classes/modules) DEFINED at the top level of this file — a
573
+ # spec referencing one of them genuinely exercises this file. Names only,
574
+ # distinctive (>3 chars, leading uppercase); bare module-name words and
575
+ # guessed CamelCase are too loose to trust.
576
+ defined_symbols = _defined_constants(rel_path)
577
+
578
+ # Search roots: CWD plus (in framework-fallback mode) the repo root that
579
+ # owns spec/ — walk up from the scan root to find it.
580
+ search_roots = ['.']
581
+ if @last_scan_root && !@last_scan_root.empty?
582
+ scan_root = @last_scan_root
583
+ 5.times do
584
+ if %w[spec test tests].any? { |d| Dir.exist?(File.join(scan_root, d)) }
585
+ search_roots << scan_root
586
+ break
587
+ end
588
+ parent = File.dirname(scan_root)
589
+ break if parent == scan_root
590
+ scan_root = parent
591
+ end
592
+ end
593
+ search_roots.uniq!
594
+
595
+ test_dirs = %w[spec test tests]
596
+
597
+ # Stage 1: a dedicated spec/test FILE named for THIS module (no parent-dir
598
+ # blanket match).
599
+ filename_patterns = [
600
+ "#{name}_spec.rb",
601
+ "#{name}s_spec.rb",
602
+ "#{name}_test.rb",
603
+ "test_#{name}.rb",
604
+ ]
605
+ search_roots.each do |root|
606
+ test_dirs.each do |td|
607
+ filename_patterns.each do |fn|
608
+ return true if File.exist?(File.join(root, td, fn))
609
+ end
610
+ end
611
+ end
612
+
613
+ # Stage 2: a spec that actually REQUIRES this module (precise — matched by
614
+ # the require target's basename / tail of the require path), or references
615
+ # a constant defined in it. NO bare word-of-the-module-name match.
616
+ require_regexps = []
617
+ unless require_path.empty?
618
+ # require "…/<module>" or require_relative "…/<module>" — match the
619
+ # require string ending in this file's require path or basename.
620
+ rp = Regexp.escape(require_path)
621
+ nm = Regexp.escape(name)
622
+ require_regexps << /(?:require|require_relative)\s+['"][^'"]*#{rp}['"]/
623
+ require_regexps << %r{(?:require|require_relative)\s+['"][^'"]*/#{nm}['"]}
624
+ end
625
+ unless defined_symbols.empty?
626
+ sym_alt = defined_symbols.map { |s| Regexp.escape(s) }.join('|')
627
+ require_regexps << /\b(?:#{sym_alt})\b/
628
+ end
629
+
630
+ return false if require_regexps.empty?
631
+
632
+ search_roots.each do |root|
633
+ test_dirs.each do |td|
634
+ dir = File.join(root, td)
635
+ next unless Dir.exist?(dir)
636
+ Dir.glob(File.join(dir, '**', '*.rb')).each do |test_file|
637
+ content = begin
638
+ File.read(test_file, encoding: 'utf-8')
639
+ rescue StandardError
640
+ next
641
+ end
642
+ return true if require_regexps.any? { |re| content.match?(re) }
472
643
  end
473
- # Stage 2: require/require_relative path matching
474
- return true if !require_path.empty? && content.include?(require_path)
475
- # Stage 3: class name or module name mention
476
- return true if content.match?(/\b#{Regexp.escape(class_name)}\b/)
477
- return true if content.match?(/\b#{Regexp.escape(name)}\b/i)
478
644
  end
479
645
  end
480
646
 
481
647
  false
482
648
  end
483
649
 
650
+ # Top-level class/module names defined in the file at rel_path (resolved
651
+ # against the last scan root when present). Distinctive names only:
652
+ # leading-uppercase, longer than 2 chars — so genuine 3-char constants like
653
+ # ORM (orm.rb) and API (api.rb), which specs reference as `Tina4::ORM` /
654
+ # `Tina4::API`, are detected as tested instead of being mislabelled
655
+ # untested. (Was > 3, which silently excluded every 3-char constant.)
656
+ def self._defined_constants(rel_path)
657
+ src_file = if @last_scan_root && !@last_scan_root.empty? && !File.exist?(rel_path)
658
+ File.join(@last_scan_root, rel_path)
659
+ else
660
+ rel_path
661
+ end
662
+ symbols = Set.new
663
+ content = begin
664
+ File.read(src_file, encoding: 'utf-8')
665
+ rescue StandardError
666
+ return symbols
667
+ end
668
+ content.each_line do |line|
669
+ stripped = line.strip
670
+ m = stripped.match(/\A(?:class|module)\s+([A-Z][A-Za-z0-9_]*)/)
671
+ next unless m
672
+ const = m[1]
673
+ symbols.add(const) if const.length > 2
674
+ end
675
+ symbols
676
+ end
677
+
484
678
  def self._files_hash(root)
485
679
  md5 = Digest::MD5.new
486
680
  root_path = Pathname.new(root)
@@ -511,8 +705,61 @@ module Tina4
511
705
  imports
512
706
  end
513
707
 
514
- def self._extract_functions(source, tokens, lines)
708
+ # Replace the CONTENT of Ruby string literals, regex literals, and comments
709
+ # with neutral spaces — keeping every line's length and the line count
710
+ # identical to the original — so decision-point keywords and method-shaped
711
+ # text that live INSIDE strings/comments are never miscounted. Returns an
712
+ # array of cleaned lines (chomped) aligned 1:1 with the original lines.
713
+ #
714
+ # Ruby's own lexer (Ripper) does the hard parsing: it tags string/heredoc/
715
+ # regex bodies as :on_tstring_content (and :on_comment, :on_embdoc — the
716
+ # =begin/=end block-comment body), which we blank out positionally. The
717
+ # surrounding code structure (def/if/end keywords, operators) is left intact.
718
+ NOISE_TOKEN_TYPES = %i[
719
+ on_tstring_content on_comment on_embdoc on_embdoc_beg on_embdoc_end
720
+ ].freeze
721
+
722
+ def self._clean_source(source)
723
+ lines = source.lines.map(&:chomp)
724
+ # Mutable per-line character buffers we can blank out by column range.
725
+ buffers = lines.map(&:dup)
726
+
727
+ tokens = begin
728
+ Ripper.lex(source)
729
+ rescue StandardError
730
+ return lines
731
+ end
732
+
733
+ tokens.each do |(pos, type, token)|
734
+ next unless NOISE_TOKEN_TYPES.include?(type)
735
+
736
+ row = pos[0] - 1
737
+ col = pos[1]
738
+ # A noise token may span multiple physical lines (heredocs, block
739
+ # comments, multi-line strings). Blank each covered line segment.
740
+ token.to_s.each_line.with_index do |seg, offset|
741
+ line_idx = row + offset
742
+ next if line_idx.negative? || line_idx >= buffers.length
743
+
744
+ buf = buffers[line_idx]
745
+ # On the token's first line the content starts at `col`; on
746
+ # continuation lines it starts at column 0.
747
+ start = offset.zero? ? col : 0
748
+ seg_len = seg.chomp.length
749
+ stop = [start + seg_len, buf.length].min
750
+ (start...stop).each { |c| buf[c] = ' ' } if stop > start
751
+ end
752
+ end
753
+
754
+ buffers
755
+ end
756
+
757
+ def self._extract_functions(source, _tokens, _lines)
515
758
  functions = []
759
+ # Operate on a neutralised copy: string/regex/comment CONTENT is blanked
760
+ # so keywords inside them are never read as real code (line numbers, line
761
+ # count and column widths are preserved).
762
+ lines = _clean_source(source)
516
763
  # Track class/module nesting for method names
517
764
  context_stack = []
518
765
  i = 0
@@ -527,7 +774,8 @@ module Tina4
527
774
  context_stack.push(class_name) unless class_name.empty?
528
775
  end
529
776
 
530
- # Detect method definitions
777
+ # Detect method definitions — require a real `def ` declaration so a
778
+ # `def`-shaped substring inside a (now-blanked) string is never a method.
531
779
  if stripped.match?(/\Adef\s+/)
532
780
  method_match = stripped.match(/\Adef\s+(self\.)?(\S+?)(\(.*\))?\s*$/)
533
781
  if method_match
@@ -588,43 +836,73 @@ module Tina4
588
836
  functions
589
837
  end
590
838
 
839
+ # Keywords that ALWAYS open a block needing a matching `end`.
840
+ BLOCK_OPENERS = %w[def class module begin case].freeze
841
+ # Keywords that open a block ONLY in statement-leading position; in trailing
842
+ # position they are modifiers (`return x if y`) and need no `end`.
843
+ CONDITIONAL_OPENERS = %w[if unless while until for].freeze
844
+
845
+ # Find the line index where the method that starts at `start_index` ends.
846
+ #
847
+ # Token-driven (Ripper) so it is immune to the line-regex footguns that made
848
+ # this over-run to end-of-file (CC 496 on tiny methods):
849
+ # * `self.class` — `class` after a `.` is an identifier, not a block opener
850
+ # (Ripper tags it :on_ident), so it no longer bumps depth.
851
+ # * modifier `if/unless/while/until/for` (`return x if y`) — only counted
852
+ # as an opener in statement-LEADING position (first real token of a
853
+ # statement), never trailing.
854
+ # * `lines` are already string/comment-cleaned, so keywords inside string
855
+ # bodies are gone too.
856
+ # Falls back to the last line only if no matching `end` is found.
591
857
  def self._find_method_end(lines, start_index)
592
- depth = 0
593
- i = start_index
594
- base_indent = lines[i].length - lines[i].lstrip.length
858
+ source = lines[start_index..].join("\n")
859
+ tokens = begin
860
+ Ripper.lex(source)
861
+ rescue StandardError
862
+ return lines.length - 1
863
+ end
595
864
 
596
- while i < lines.length
597
- stripped = lines[i].strip
865
+ depth = 0
866
+ # A keyword is a block opener only when it leads a statement. Track that:
867
+ # we are at statement start initially and right after a newline / `;`.
868
+ at_statement_start = true
869
+ seen_opener = false
598
870
 
599
- unless stripped.empty? || stripped.start_with?('#')
600
- # Count block openers
601
- if stripped.match?(/\b(def|class|module|if|unless|case|while|until|for|begin|do)\b/) &&
602
- !stripped.match?(/\bend\b/) &&
603
- !stripped.end_with?(' if ', ' unless ', ' while ', ' until ') &&
604
- !(stripped.match?(/\bif\b|\bunless\b|\bwhile\b|\buntil\b/) && i != start_index && _is_modifier?(stripped))
871
+ tokens.each do |(pos, type, token)|
872
+ case type
873
+ when :on_kw
874
+ if BLOCK_OPENERS.include?(token)
605
875
  depth += 1
606
- end
607
-
608
- if stripped == 'end' || stripped.start_with?('end ') || stripped.start_with?('end;')
876
+ seen_opener = true
877
+ elsif token == 'do'
878
+ depth += 1
879
+ seen_opener = true
880
+ elsif CONDITIONAL_OPENERS.include?(token)
881
+ # Leading => real block opener; trailing => modifier (no end).
882
+ if at_statement_start
883
+ depth += 1
884
+ seen_opener = true
885
+ end
886
+ elsif token == 'end'
609
887
  depth -= 1
610
- return i if depth <= 0
888
+ if seen_opener && depth <= 0
889
+ return start_index + (pos[0] - 1)
890
+ end
611
891
  end
892
+ at_statement_start = false
893
+ when :on_nl, :on_ignored_nl, :on_semicolon
894
+ at_statement_start = true
895
+ when :on_sp, :on_comment, :on_embdoc, :on_embdoc_beg, :on_embdoc_end
896
+ # whitespace/comments don't change statement-start state
897
+ else
898
+ at_statement_start = false
612
899
  end
613
-
614
- i += 1
615
900
  end
616
901
 
617
902
  # If we never found the end, return last line
618
903
  lines.length - 1
619
904
  end
620
905
 
621
- def self._is_modifier?(line)
622
- # A rough check: if the keyword is not at the start of the meaningful content,
623
- # it's likely a modifier (e.g., "return x if condition")
624
- stripped = line.strip
625
- !stripped.match?(/\A(if|unless|while|until)\b/)
626
- end
627
-
628
906
  def self._cyclomatic_complexity_from_source(source)
629
907
  cc = 1
630
908