RubyGems - testprune - Versions diffs - 0.1.0 - Mend

testprune 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +7 -0
data/LICENSE +21 -0
data/README.md +552 -0
data/assets/quickstart.svg +70 -0
data/exe/testprune +6 -0
data/lib/testprune/adapters/minitest.rb +42 -0
data/lib/testprune/adapters/rspec.rb +31 -0
data/lib/testprune/analysis.rb +53 -0
data/lib/testprune/autostart.rb +40 -0
data/lib/testprune/baseline.rb +23 -0
data/lib/testprune/cli.rb +136 -0
data/lib/testprune/configuration.rb +86 -0
data/lib/testprune/coverage_delta.rb +82 -0
data/lib/testprune/duplication_detector.rb +203 -0
data/lib/testprune/footprint.rb +87 -0
data/lib/testprune/patch_writer.rb +117 -0
data/lib/testprune/recorder.rb +102 -0
data/lib/testprune/report.rb +127 -0
data/lib/testprune/runner.rb +76 -0
data/lib/testprune/safety_check.rb +45 -0
data/lib/testprune/savings_estimator.rb +30 -0
data/lib/testprune/semantic_map.rb +185 -0
data/lib/testprune/test_body.rb +61 -0
data/lib/testprune/version.rb +5 -0
data/lib/testprune.rb +18 -0
metadata +90 -0

data/lib/testprune/adapters/minitest.rb ADDED Viewed

@@ -0,0 +1,42 @@
+# frozen_string_literal: true
+require_relative '../recorder'
+module Testprune
+  module Adapters
+    # Minitest integration. Brackets each test via the before_setup/after_teardown
+    # lifecycle hooks rather than wrapping #run. Wrapping #run is unsafe here:
+    # minitest-reporters does `alias_method :run_without_hooks, :run` after we'd
+    # prepend, capturing our method into its alias and causing infinite recursion.
+    # The lifecycle hooks are called exactly once per test and are not aliased.
+    module Minitest
+      module Hook
+        def before_setup
+          Testprune::Recorder.instance.start_test
+          super
+        end
+        def after_teardown
+          super
+        ensure
+          recorder = Testprune::Recorder.instance
+          id = "#{self.class}##{name}"
+          file, line = location
+          recorder.finish_test(id: id, description: id, file: file, line: line)
+        end
+        private
+        def location
+          method(name).source_location
+        rescue NameError
+          [nil, nil]
+        end
+      end
+      def self.install
+        ::Minitest::Test.prepend(Hook)
+      end
+    end
+  end
+end

data/lib/testprune/adapters/rspec.rb ADDED Viewed

@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+require_relative '../recorder'
+module Testprune
+  module Adapters
+    # RSpec integration. Installed by autostart once ::RSpec is defined. Wraps each
+    # example to capture its coverage delta + timing, and dumps run.json after the
+    # suite.
+    module RSpec
+      def self.install
+        recorder = Testprune::Recorder.instance
+        recorder.framework = 'rspec'
+        ::RSpec.configure do |config|
+          config.around(:each) do |example|
+            md = example.metadata
+            Testprune::Recorder.instance.around(
+              id:          example.id,
+              description: example.full_description,
+              file:        md[:file_path],
+              line:        md[:line_number]
+            ) { example.run }
+          end
+          config.after(:suite) { Testprune::Recorder.instance.dump }
+        end
+      end
+    end
+  end
+end

data/lib/testprune/analysis.rb ADDED Viewed

@@ -0,0 +1,53 @@
+# frozen_string_literal: true
+require 'json'
+require_relative '../testprune'
+require_relative 'footprint'
+require_relative 'duplication_detector'
+require_relative 'savings_estimator'
+module Testprune
+  # Loads run.json, builds semantic footprints, runs detection + safety, and
+  # bundles everything the report and patch writer need.
+  class Analysis
+    Result = Struct.new(:detector_result, :index, :savings, :run, keyword_init: true) do
+      def candidates       = detector_result.candidates
+      def approved_removals = detector_result.approved_removals
+      def label_for(id)    = index.label_for(id)
+      def ambient_units    = detector_result.ambient_units
+      def setup_only       = detector_result.setup_only
+    end
+    def initialize(config)
+      @config = config
+    end
+    def call
+      unless File.directory?(@config.root)
+        raise Error, "root directory #{@config.root.inspect} does not exist. " \
+                     "Check TESTPRUNE_ROOT or --root."
+      end
+      unless File.exist?(@config.run_file)
+        raise Error, "no captured data at #{@config.run_file}. Run `testprune run` first."
+      end
+      run = begin
+        JSON.parse(File.read(@config.run_file))
+      rescue JSON::ParserError => e
+        raise Error, "run.json is not valid JSON (#{e.message}) — it may be truncated. " \
+                     "Re-run 'testprune run'."
+      end
+      index = SemanticIndex.new(run['root'] || @config.root)
+      footprints = index.build_footprints(run['tests'] || [])
+      detector = DuplicationDetector.new(
+        footprints,
+        overlap_threshold: @config.overlap_threshold,
+        baseline_fraction: @config.baseline_fraction
+      ).call
+      Result.new(detector_result: detector, index: index,
+                 savings: SavingsEstimator.new(run, detector), run: run)
+    end
+  end
+end

data/lib/testprune/autostart.rb ADDED Viewed

@@ -0,0 +1,40 @@
+# frozen_string_literal: true
+# Loaded via `RUBYOPT=-r testprune/autostart` in the instrumented subprocess.
+# Starts Coverage (lines + branches + methods) immediately and keeps it running
+# for the entire suite. Starting early means every file compiled after this point
+# is measurable. Keeping Coverage always-on means `Coverage.running?` is true
+# before SimpleCov or any other coverage tool loads, so they skip their own
+# Coverage.start and no guard is needed in test_helper.rb. Then watches for the
+# test framework to be defined and installs the matching adapter.
+require_relative 'recorder'
+warn "[testprune-debug] autostart loaded in pid #{Process.pid}" if ENV['TESTPRUNE_DEBUG']
+Testprune::Recorder.instance.start_coverage
+# Fallback: ensure run.json is written even if the suite crashes before the
+# framework's after-suite hook fires (unhandled exception, SIGTERM, etc.).
+# The @dumped flag in Recorder prevents double-writes on clean exits.
+at_exit { Testprune::Recorder.instance.dump }
+installed = false
+tracepoint = TracePoint.new(:end) do
+  next if installed
+  if defined?(::Minitest::Test)
+    installed = true
+    require_relative 'adapters/minitest'
+    recorder = Testprune::Recorder.instance
+    recorder.framework = 'minitest'
+    Testprune::Adapters::Minitest.install
+    ::Minitest.after_run { Testprune::Recorder.instance.dump }
+  elsif defined?(::RSpec) && ::RSpec.respond_to?(:configure)
+    installed = true
+    require_relative 'adapters/rspec'
+    Testprune::Adapters::RSpec.install
+  end
+  tracepoint.disable if installed
+end
+tracepoint.enable

data/lib/testprune/baseline.rb ADDED Viewed

@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+require 'set'
+module Testprune
+  # Identifies "ambient" coverage units — ones executed by so many tests that they
+  # carry no signal about what a given test is *for*. In real suites these come
+  # from shared `setup`/fixture code (e.g. creating a User fires the same callbacks
+  # in hundreds of tests). Left in, they make unrelated tests look identical and
+  # produce false "redundant" clusters, so the detector subtracts them first.
+  module Baseline
+    # A unit is ambient if it appears in >= fraction of all tests. fraction nil,
+    # <= 0, or >= 1.0 disables subtraction entirely.
+    def self.ambient_units(footprints, fraction)
+      return Set.new if fraction.nil? || fraction <= 0.0 || fraction >= 1.0 || footprints.empty?
+      threshold = (footprints.size * fraction).ceil
+      counts = Hash.new(0)
+      footprints.each { |fp| fp.units.each { |unit| counts[unit] += 1 } }
+      counts.select { |_unit, count| count >= threshold }.keys.to_set
+    end
+  end
+end

data/lib/testprune/cli.rb ADDED Viewed

@@ -0,0 +1,136 @@
+# frozen_string_literal: true
+require 'optparse'
+require_relative '../testprune'
+module Testprune
+  # Command-line front end. Three real commands:
+  #   run     boots the target suite under coverage instrumentation -> run.json
+  #   report  analyzes run.json and prints grouped candidates (read-only)
+  #   apply   prompts for approval, then writes a removal patch (never edits in place)
+  class CLI
+    BANNER = <<~TXT
+      testprune — audit a Ruby test suite for redundant coverage
+      Usage:
+        testprune run [options] [-- <test command>]
+        testprune report [options]
+        testprune apply [options]
+      Commands:
+        run      Run the target suite instrumented; capture per-test coverage + timing
+        report   Analyze captured data and print removal candidates (read-only)
+        apply    Review candidates, ask for approval, emit a git-applyable patch
+      Options:
+        -s, --source PATH      Source dir to analyze (repeatable; default: app, lib)
+        -o, --output DIR       Output dir for captured data (default: .testprune)
+            --baseline FRAC    Treat units run by >= FRAC of tests as shared-setup
+                               noise and subtract them (0..1; default 0.5; 0 to disable)
+            --json             Emit machine-readable JSON (report only)
+        -h, --help             Show this help
+        -v, --version          Show version
+    TXT
+    def self.start(argv)
+      new.run(argv)
+    end
+    def run(argv)
+      argv = argv.dup
+      command = argv.shift
+      case command
+      when 'run'            then cmd_run(argv)
+      when 'report'         then cmd_report(argv)
+      when 'apply'          then cmd_apply(argv)
+      when '-v', '--version' then puts(Testprune::VERSION)
+      when nil, '-h', '--help' then puts(BANNER)
+      else
+        warn("testprune: unknown command #{command.inspect}\n\n#{BANNER}")
+        return 1
+      end
+      0
+    rescue Testprune::Error => e
+      warn("testprune: #{e.message}")
+      1
+    end
+    private
+    # Splits argv at a literal `--`; everything after is the user's test command.
+    def split_test_command(argv)
+      idx = argv.index('--')
+      return [argv, nil] unless idx
+      [argv[0...idx], argv[(idx + 1)..]]
+    end
+    def parse_options(argv)
+      sources = []
+      opts = { json: false }
+      parser = OptionParser.new do |o|
+        o.on('-s', '--source PATH') { |v| sources << v }
+        o.on('-o', '--output DIR')  { |v| opts[:output] = v }
+        o.on('--baseline FRAC', Float) { |v| opts[:baseline] = v }
+        o.on('--json')              { opts[:json] = true }
+        o.on('-h', '--help')        { puts(BANNER); exit(0) }
+      end
+      rest = parser.parse(argv)
+      opts[:sources] = sources unless sources.empty?
+      [opts, rest]
+    end
+    def apply_config(opts)
+      Testprune.configure do |c|
+        c.source_paths = opts[:sources] if opts[:sources]
+        c.output_dir   = File.expand_path(opts[:output], c.root) if opts[:output]
+        c.baseline_fraction = (opts[:baseline]).positive? ? opts[:baseline] : nil if opts.key?(:baseline)
+      end
+    end
+    def cmd_run(argv)
+      cmd_argv, test_command = split_test_command(argv)
+      opts, = parse_options(cmd_argv)
+      apply_config(opts)
+      require_relative 'runner'
+      Runner.new(Testprune.config).call(test_command)
+    end
+    def cmd_report(argv)
+      opts, = parse_options(argv)
+      apply_config(opts)
+      require_relative 'analysis'
+      result = Analysis.new(Testprune.config).call
+      require_relative 'report'
+      puts(Report.new(result, json: opts[:json]).render)
+    end
+    def cmd_apply(argv)
+      opts, = parse_options(argv)
+      apply_config(opts)
+      require_relative 'analysis'
+      result = Analysis.new(Testprune.config).call
+      require_relative 'report'
+      puts(Report.new(result).render)
+      approved = result.approved_removals
+      if approved.empty?
+        puts("\nNothing safe to remove. No patch written.")
+        return
+      end
+      print("\nApply #{approved.size} HIGH-confidence, safety-verified removal(s) as a patch?\n" \
+            "(MEDIUM/LOW review-only candidates are NOT patched automatically.) [y/N] ")
+      answer = $stdin.gets&.strip&.downcase
+      unless %w[y yes].include?(answer)
+        puts('Aborted. No patch written.')
+        return
+      end
+      require_relative 'patch_writer'
+      path = PatchWriter.new(Testprune.config).write(approved)
+      puts("Wrote #{path}")
+      puts("Review it, then apply with:  git apply #{path}")
+    end
+  end
+end

data/lib/testprune/configuration.rb ADDED Viewed

@@ -0,0 +1,86 @@
+# frozen_string_literal: true
+module Testprune
+  # Holds run/analysis settings. `source_paths` define which files count as the
+  # system-under-test — coverage in any other file (test helpers, vendored gems,
+  # the bundle) is ignored so footprints stay precise and cheap to diff.
+  class Configuration
+    attr_reader :source_paths, :exclude_globs, :output_dir,
+                :overlap_threshold, :baseline_fraction, :root
+    attr_writer :output_dir, :overlap_threshold, :baseline_fraction
+    def source_paths=(val)
+      @source_paths = val
+      @source_roots = nil
+    end
+    def exclude_globs=(val)
+      @exclude_globs = val
+      @source_roots = nil
+    end
+    def root=(val)
+      @root = File.expand_path(val)
+      @source_roots = nil
+    end
+    def initialize(root: Dir.pwd)
+      @root             = File.expand_path(root)
+      @source_paths     = %w[app lib]
+      @exclude_globs    = %w[**/vendor/** **/node_modules/** **/db/** **/config/**]
+      @output_dir       = File.join(@root, 'tmp', '.testprune')
+      @overlap_threshold = 0.9 # Jaccard cutoff for LOW-confidence overlap pairs
+      # Units executed by >= this fraction of tests are treated as ambient
+      # shared-setup noise and subtracted before detection. nil disables it.
+      @baseline_fraction = 0.5
+    end
+    # Rebuild config inside the instrumented subprocess from env vars set by Runner.
+    def self.from_env(env = ENV)
+      cfg = new(root: env.fetch('TESTPRUNE_ROOT', Dir.pwd))
+      cfg.source_paths  = split_env(env['TESTPRUNE_SOURCE_PATHS']) || cfg.source_paths
+      cfg.exclude_globs = split_env(env['TESTPRUNE_EXCLUDE']) || cfg.exclude_globs
+      cfg.output_dir    = env['TESTPRUNE_OUTPUT_DIR'] || cfg.output_dir
+      cfg
+    end
+    def self.split_env(value)
+      return nil if value.nil? || value.empty?
+      value.split(File::PATH_SEPARATOR)
+    end
+    # Env vars the Runner must export so the subprocess reconstructs this config.
+    def to_env
+      {
+        'TESTPRUNE_ROOT'         => @root,
+        'TESTPRUNE_SOURCE_PATHS' => @source_paths.join(File::PATH_SEPARATOR),
+        'TESTPRUNE_EXCLUDE'      => @exclude_globs.join(File::PATH_SEPARATOR),
+        'TESTPRUNE_OUTPUT_DIR'   => @output_dir
+      }
+    end
+    # Absolute, existing source roots under which coverage is considered in-scope.
+    # Memoized: re-allocating this array on every source_file? call is O(tests * files).
+    def source_roots
+      @source_roots ||= @source_paths.map { |p| File.expand_path(p, @root) }
+                                     .select { |p| File.directory?(p) }
+    end
+    # True when an absolute file path is part of the system-under-test.
+    def source_file?(path)
+      abs = File.expand_path(path)
+      return false unless source_roots.any? { |root| abs.start_with?("#{root}/") }
+      @exclude_globs.none? { |glob| File.fnmatch?(glob, abs, File::FNM_PATHNAME) }
+    end
+    def run_file
+      File.join(@output_dir, 'run.json')
+    end
+    def patch_file
+      File.join(@output_dir, 'removal.patch')
+    end
+  end
+end

data/lib/testprune/coverage_delta.rb ADDED Viewed

@@ -0,0 +1,82 @@
+# frozen_string_literal: true
+module Testprune
+  # Computes what a single test executed by diffing two `Coverage.peek_result`
+  # snapshots. A unit (line / branch arm / method) belongs to the test iff its
+  # execution count *increased* between the snapshots — this is order-independent,
+  # so coverage shared with earlier tests is still attributed correctly.
+  #
+  # Output (per in-scope source file):
+  #   { "lines" => [Integer, ...],
+  #     "branches" => [[type, sl, sc, el, ec], ...],
+  #     "methods"  => [[name, sl, sc, el, ec], ...] }
+  # Locations come straight from Coverage's keys so the analysis phase can map
+  # them onto Prism AST nodes without re-deriving positions.
+  module CoverageDelta
+    module_function
+    def compute(before, after, config)
+      result = {}
+      after.each do |file, aft|
+        next unless config.source_file?(file)
+        bef = before[file]
+        lines    = delta_lines(bef && bef[:lines], aft[:lines])
+        branches = delta_branches(bef && bef[:branches], aft[:branches])
+        methods  = delta_methods(bef && bef[:methods], aft[:methods])
+        next if lines.empty? && branches.empty? && methods.empty?
+        result[file] = { 'lines' => lines, 'branches' => branches, 'methods' => methods }
+      end
+      result
+    end
+    def delta_lines(before, after)
+      return [] unless after
+      newly = []
+      after.each_with_index do |after_count, idx|
+        next if after_count.nil? # non-executable line
+        before_count = before && before[idx] ? before[idx] : 0
+        newly << (idx + 1) if after_count > before_count
+      end
+      newly
+    end
+    # Coverage branch shape: { node_key => { branch_key => count } }.
+    # We record each *branch arm* (then/else/when/...) that newly executed,
+    # keyed by the arm's own location + type.
+    def delta_branches(before, after)
+      return [] unless after
+      newly = []
+      after.each do |node_key, arms|
+        before_arms = before && before[node_key]
+        arms.each do |arm_key, after_count|
+          before_count = before_arms && before_arms[arm_key] ? before_arms[arm_key] : 0
+          next unless after_count > before_count
+          type, _id, sl, sc, el, ec = arm_key
+          newly << [type.to_s, sl, sc, el, ec]
+        end
+      end
+      newly
+    end
+    # Coverage method shape: { [class, name, sl, sc, el, ec] => count }.
+    def delta_methods(before, after)
+      return [] unless after
+      newly = []
+      after.each do |method_key, after_count|
+        before_count = before && before[method_key] ? before[method_key] : 0
+        next unless after_count > before_count
+        _klass, name, sl, sc, el, ec = method_key
+        newly << [name.to_s, sl, sc, el, ec]
+      end
+      newly
+    end
+  end
+end