RubyGems - moult - Versions diffs - 0.1.0 - Mend

moult 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +44 -0
data/LICENSE.txt +201 -0
data/NOTICE +4 -0
data/README.md +331 -0
data/exe/moult +6 -0
data/lib/moult/abc.rb +133 -0
data/lib/moult/boundaries/packwerk.rb +114 -0
data/lib/moult/boundaries/severity.rb +87 -0
data/lib/moult/boundaries.rb +77 -0
data/lib/moult/boundaries_report.rb +106 -0
data/lib/moult/churn.rb +52 -0
data/lib/moult/cli/boundaries_command.rb +83 -0
data/lib/moult/cli/coverage_command.rb +101 -0
data/lib/moult/cli/dead_code_command.rb +112 -0
data/lib/moult/cli/duplication_command.rb +92 -0
data/lib/moult/cli/flags_command.rb +95 -0
data/lib/moult/cli/gate_command.rb +113 -0
data/lib/moult/cli/health_command.rb +117 -0
data/lib/moult/cli/hotspots_command.rb +104 -0
data/lib/moult/cli.rb +102 -0
data/lib/moult/clones.rb +91 -0
data/lib/moult/cloud_upload.rb +29 -0
data/lib/moult/confidence/rules.rb +128 -0
data/lib/moult/confidence.rb +106 -0
data/lib/moult/coverage/resolver.rb +56 -0
data/lib/moult/coverage.rb +176 -0
data/lib/moult/coverage_report.rb +98 -0
data/lib/moult/dead_code.rb +119 -0
data/lib/moult/dead_code_report.rb +65 -0
data/lib/moult/diff.rb +177 -0
data/lib/moult/discovery.rb +38 -0
data/lib/moult/duplication/confidence.rb +92 -0
data/lib/moult/duplication.rb +112 -0
data/lib/moult/duplication_report.rb +89 -0
data/lib/moult/flag_scanner.rb +150 -0
data/lib/moult/flags/classification.rb +79 -0
data/lib/moult/flags/snapshot.rb +162 -0
data/lib/moult/flags/staleness.rb +145 -0
data/lib/moult/flags.rb +131 -0
data/lib/moult/flags_report.rb +136 -0
data/lib/moult/formatters/boundaries_json.rb +20 -0
data/lib/moult/formatters/boundaries_table.rb +53 -0
data/lib/moult/formatters/coverage_json.rb +19 -0
data/lib/moult/formatters/coverage_table.rb +60 -0
data/lib/moult/formatters/dead_code_json.rb +20 -0
data/lib/moult/formatters/dead_code_table.rb +66 -0
data/lib/moult/formatters/duplication_json.rb +20 -0
data/lib/moult/formatters/duplication_table.rb +55 -0
data/lib/moult/formatters/flags_json.rb +20 -0
data/lib/moult/formatters/flags_table.rb +76 -0
data/lib/moult/formatters/gate_github.rb +52 -0
data/lib/moult/formatters/gate_json.rb +20 -0
data/lib/moult/formatters/gate_message.rb +19 -0
data/lib/moult/formatters/gate_sarif.rb +78 -0
data/lib/moult/formatters/gate_table.rb +71 -0
data/lib/moult/formatters/health_json.rb +20 -0
data/lib/moult/formatters/health_table.rb +80 -0
data/lib/moult/formatters/json.rb +23 -0
data/lib/moult/formatters/table.rb +70 -0
data/lib/moult/formatters/text_table.rb +39 -0
data/lib/moult/gate/config.rb +55 -0
data/lib/moult/gate/evaluation.rb +172 -0
data/lib/moult/gate/policy.rb +103 -0
data/lib/moult/gate.rb +199 -0
data/lib/moult/gate_report.rb +97 -0
data/lib/moult/git.rb +83 -0
data/lib/moult/health/score.rb +291 -0
data/lib/moult/health.rb +320 -0
data/lib/moult/health_report.rb +97 -0
data/lib/moult/index.rb +228 -0
data/lib/moult/parser.rb +101 -0
data/lib/moult/rails_conventions.rb +124 -0
data/lib/moult/report.rb +114 -0
data/lib/moult/scoring.rb +82 -0
data/lib/moult/span.rb +17 -0
data/lib/moult/symbol_id.rb +30 -0
data/lib/moult/symbol_scanner.rb +100 -0
data/lib/moult/version.rb +5 -0
data/lib/moult.rb +84 -0
data/schema/boundaries.schema.json +125 -0
data/schema/common.schema.json +76 -0
data/schema/coverage.schema.json +83 -0
data/schema/deadcode.schema.json +106 -0
data/schema/duplication.schema.json +128 -0
data/schema/flags.schema.json +157 -0
data/schema/gate.schema.json +165 -0
data/schema/health.schema.json +157 -0
data/schema/hotspots.schema.json +106 -0
metadata +185 -0

data/lib/moult/coverage.rb ADDED Viewed

@@ -0,0 +1,176 @@
+# frozen_string_literal: true
+require "json"
+require "time"
+require_relative "symbol_id"
+module Moult
+  # Ingests line-keyed code coverage from a LOCAL FILE and normalises it into one
+  # Moult-owned value object ({Dataset}) the {Resolver} can read. This is the
+  # runtime-layer analogue of {Index}: external formats (SimpleCov, stdlib
+  # +Coverage+) come in, only Moult types go out, so the input is swappable.
+  #
+  # Two on-disk formats are understood (auto-detected, or forced via +format:+):
+  #
+  # * +:simplecov+ — SimpleCov's +coverage/.resultset.json+:
+  #   <tt>{command => {"coverage" => {abs_path => {"lines" => [...]}}, "timestamp" => epoch}}</tt>.
+  #   Multiple command runs are merged element-wise.
+  # * +:coverage+ — a JSON dump of stdlib <tt>Coverage.result(lines: true)</tt>:
+  #   <tt>{abs_path => {"lines" => [...]}}</tt> or the legacy bare <tt>{abs_path => [...]}</tt>.
+  #
+  # Line arrays are 0-indexed (index 0 = line 1) with the shared convention:
+  # +nil+ = non-executable, +0+ = executable but never run, +N+ = hit count.
+  # +oneshot_lines+ is intentionally unsupported: it cannot distinguish 0 from
+  # nil, so runtime-cold could not be detected.
+  module Coverage
+    module_function
+    # Provenance of a merged coverage dataset. Captured into the protected
+    # contract so a consumer can see where the runtime evidence came from. The
+    # +collected_at+ slot also seeds a future stale-detection slice (deferred).
+    Source = Struct.new(:backend, :version, :collected_at) do
+      def to_h
+        {backend: backend, version: version, collected_at: collected_at}
+      end
+    end
+    # Normalised coverage: per (root-relative) path, the 0-indexed line array.
+    Dataset = Struct.new(:entries, :source, :unmatched_count) do
+      # @return [Boolean] whether this file appeared in the coverage dataset
+      def tracked?(path)
+        entries.key?(path)
+      end
+      # @param line [Integer] 1-based line number
+      # @return [Integer, nil] coverage value at that line, or nil if untracked
+      def line_value(path, line)
+        arr = entries[path]
+        arr && arr[line - 1]
+      end
+    end
+    # @param path [String] path to the coverage file
+    # @param root [String] absolute analysis root (findings are relative to it)
+    # @param format [Symbol] :auto, :simplecov, or :coverage
+    # @return [Dataset]
+    def load(path, root:, format: :auto)
+      raw = JSON.parse(File.read(path))
+      fmt = (format == :auto) ? detect_format(raw) : format
+      abs_entries, source = case fmt
+      when :simplecov then from_simplecov(raw, path)
+      when :coverage then from_coverage(raw, path)
+      else raise Moult::Error, "unknown coverage format: #{fmt}"
+      end
+      entries, unmatched = relativize(abs_entries, root)
+      Dataset.new(entries: entries, source: source, unmatched_count: unmatched)
+    rescue JSON::ParserError => e
+      raise Moult::Error, "could not parse coverage file #{path}: #{e.message}"
+    rescue Errno::ENOENT
+      raise Moult::Error, "no such coverage file: #{path}"
+    end
+    # SimpleCov nests file coverage under a command name and a "coverage" key;
+    # stdlib dumps key files at the top level. The presence of "coverage" on the
+    # first value is the unambiguous discriminator.
+    def detect_format(raw)
+      raise Moult::Error, "coverage file is not a JSON object" unless raw.is_a?(Hash)
+      sample = raw.values.first
+      if sample.is_a?(Hash) && sample.key?("coverage")
+        :simplecov
+      elsif sample.is_a?(Array) || (sample.is_a?(Hash) && sample.key?("lines"))
+        :coverage
+      else
+        raise Moult::Error, "could not auto-detect coverage format; pass --coverage-format simplecov|coverage"
+      end
+    end
+    # @return [[Hash{String=>Array}, Source]] abs-path line arrays + provenance
+    def from_simplecov(raw, _path)
+      merged = {}
+      timestamps = []
+      raw.each_value do |run|
+        next unless run.is_a?(Hash)
+        timestamps << run["timestamp"] if run["timestamp"]
+        (run["coverage"] || {}).each do |file, data|
+          merged[file] = merge_lines(merged[file], extract_lines(data))
+        end
+      end
+      collected = timestamps.compact.max
+      source = Source.new(
+        backend: "simplecov",
+        version: nil, # not recorded in the resultset
+        collected_at: collected && Time.at(collected).utc.iso8601
+      )
+      [merged, source]
+    end
+    # @return [[Hash{String=>Array}, Source]] abs-path line arrays + provenance
+    def from_coverage(raw, path)
+      entries = {}
+      raw.each do |file, data|
+        lines = extract_lines(data)
+        entries[file] = lines if lines
+      end
+      # The raw dump carries no timestamp, so the file mtime is the best-effort
+      # collected_at (noted as a fallback; only matters for deferred staleness).
+      source = Source.new(
+        backend: "coverage",
+        version: RUBY_VERSION,
+        collected_at: File.mtime(path).utc.iso8601
+      )
+      [entries, source]
+    end
+    # Accepts both the wrapped ({"lines" => [...]}) and legacy bare-array forms;
+    # ignores sibling :methods/:branches data.
+    def extract_lines(data)
+      case data
+      when Array then data
+      when Hash then data["lines"]
+      end
+    end
+    # Element-wise merge of two coverage runs: a value is hit if hit in either
+    # run (max of the non-nil values), non-executable only if nil in both.
+    def merge_lines(a, b)
+      return b if a.nil?
+      return a if b.nil?
+      Array.new([a.length, b.length].max) do |i|
+        x, y = a[i], b[i]
+        if x.nil? then y
+        elsif y.nil? then x
+        else [x, y].max
+        end
+      end
+    end
+    # Map absolute coverage paths to the root-relative paths Phase 2 emits, so
+    # the join lands on the same symbol_id components. Files outside the root are
+    # dropped and counted (a different checkout layout, vendored code, etc.).
+    def relativize(abs_entries, root)
+      real_root = canonicalize(root)
+      entries = {}
+      unmatched = 0
+      abs_entries.each do |abs, lines|
+        full = canonicalize(abs)
+        if full == real_root || full.start_with?(real_root + File::SEPARATOR)
+          entries[SymbolId.relative_path(full, real_root)] = lines
+        else
+          unmatched += 1
+        end
+      end
+      [entries, unmatched]
+    end
+    # realpath resolves /tmp -> /private/tmp style symlinks so coverage paths
+    # line up with rubydex's canonical paths; falls back when the file is absent
+    # locally (coverage collected on another machine).
+    def canonicalize(p)
+      File.realpath(p)
+    rescue
+      File.expand_path(p)
+    end
+  end
+end
+require_relative "coverage/resolver"

data/lib/moult/coverage_report.rb ADDED Viewed

@@ -0,0 +1,98 @@
+# frozen_string_literal: true
+module Moult
+  # The serialized result model for `moult coverage` (schema/coverage.schema.json):
+  # a per-symbol hot/cold/untracked map. It is a diagnostic view over the same
+  # runtime evidence `moult deadcode --coverage` folds into confidence — it makes
+  # no dead-code claim, it only reports what ran.
+  #
+  # {build} is the orchestration: ask the {Index} for every definition and
+  # classify each through {Coverage::Resolver}, joined on the same path + span
+  # that make up its symbol_id.
+  class CoverageReport
+    SCHEMA_VERSION = 1
+    # One classified definition. Carries the symbol_id so the map joins to the
+    # hotspots and deadcode contracts.
+    Entry = Struct.new(:symbol_id, :kind, :name, :span, :runtime) do
+      def to_h
+        {symbol_id: symbol_id, kind: kind.to_s, name: name, span: span.to_h, runtime: runtime.to_s}
+      end
+    end
+    attr_reader :root, :entries, :git_ref, :generated_at,
+      :backend, :backend_version, :resolved, :diagnostics, :coverage_source
+    # @param index [Index] resolved definition index
+    # @param coverage [Coverage::Dataset] the runtime dataset to resolve against
+    # @return [CoverageReport]
+    def self.build(index:, coverage:, root:, git_ref: nil, generated_at: nil, backend_version: nil)
+      entries = index.definitions.map do |d|
+        Entry.new(
+          symbol_id: d.symbol_id,
+          kind: d.kind,
+          name: d.name,
+          span: d.span,
+          runtime: Coverage::Resolver.classify(coverage, path: d.path, span: d.span, kind: d.kind)
+        )
+      end
+      # Hot first (most surprising/actionable), then cold, then untracked; name
+      # as a deterministic tie-break.
+      order = {hot: 0, cold: 1, untracked: 2}
+      entries.sort_by! { |e| [order.fetch(e.runtime, 3), e.name.to_s] }
+      new(
+        root: root,
+        entries: entries,
+        git_ref: git_ref,
+        generated_at: generated_at,
+        backend: "rubydex",
+        backend_version: backend_version,
+        resolved: index.resolved?,
+        diagnostics: index.diagnostics,
+        coverage_source: coverage.source
+      )
+    end
+    def initialize(root:, entries:, git_ref: nil, generated_at: nil,
+      backend: "rubydex", backend_version: nil, resolved: true, diagnostics: [], coverage_source: nil)
+      @root = root
+      @entries = entries
+      @git_ref = git_ref
+      @generated_at = generated_at
+      @backend = backend
+      @backend_version = backend_version
+      @resolved = resolved
+      @diagnostics = diagnostics
+      @coverage_source = coverage_source
+    end
+    # @return [Hash{Symbol=>Integer}] counts keyed :hot, :cold, :untracked
+    def summary
+      counts = {hot: 0, cold: 0, untracked: 0}
+      entries.each { |e| counts[e.runtime] = counts.fetch(e.runtime, 0) + 1 }
+      counts
+    end
+    def to_h
+      {
+        schema_version: SCHEMA_VERSION,
+        tool: {name: "moult", version: Moult::VERSION},
+        analysis: {
+          root: root,
+          git_ref: git_ref,
+          generated_at: generated_at,
+          coverage: coverage_source&.to_h,
+          index: {
+            backend: backend,
+            backend_version: backend_version,
+            resolved: resolved,
+            diagnostics: diagnostics
+          }
+        },
+        summary: summary,
+        symbols: entries.map(&:to_h)
+      }
+    end
+  end
+end

data/lib/moult/dead_code.rb ADDED Viewed

@@ -0,0 +1,119 @@
+# frozen_string_literal: true
+module Moult
+  # Orchestrates the dead-code analysis: it asks the {Index} for every definition,
+  # keeps the ones with no production reference, gathers the facts each finding is
+  # judged on, and runs them through the pure {Confidence} model. The result is a
+  # ranked {DeadCodeReport} of confidence-graded candidates — never assertions of
+  # certain death.
+  #
+  # This is the only layer that knows how the facts are sourced (the index, the
+  # Rails conventions, a metaprogramming scan of the owning file); {Confidence}
+  # stays a pure function of those facts so it can be tested in isolation.
+  module DeadCode
+    TEST_PATH = %r{(\A|/)(test|spec)/}
+    # Tokens that indicate dynamic dispatch / metaprogramming in a file. Their
+    # mere presence lowers confidence for definitions in that file: such code can
+    # be reached in ways static analysis cannot see. Matched conservatively (a
+    # false match only lowers confidence, never hides a finding).
+    DYNAMIC_TOKENS = /
+      \b(
+        send | public_send | __send__ |
+        method_missing | respond_to_missing\? |
+        define_method | define_singleton_method |
+        class_eval | module_eval | instance_eval | instance_exec |
+        const_get | const_set | constantize |
+        eval
+      )\b
+    /x
+    module_function
+    # @param root [String] absolute analysis root
+    # @param files [Array<String>] absolute Ruby file paths analysed
+    # @param index [Index] resolved definition/reference index
+    # @param rails [RailsConventions] Rails entrypoint awareness
+    # @param min_confidence [Float] drop findings below this confidence
+    # @param coverage [Coverage::Dataset, nil] runtime coverage to merge (Phase 3)
+    # @return [DeadCodeReport]
+    def build_report(root:, files:, index:, rails:, min_confidence: 0.0,
+      git_ref: nil, generated_at: nil, backend_version: nil, coverage: nil)
+      dynamic_files = dynamic_dispatch_files(files, root)
+      findings = index.definitions.filter_map do |definition|
+        next unless candidate?(definition)
+        Confidence.score(context_for(definition, index: index, rails: rails, dynamic_files: dynamic_files, coverage: coverage))
+      end
+      findings.select! { |f| f.confidence >= min_confidence }
+      findings.sort_by! { |f| [-f.confidence, f.name.to_s] }
+      DeadCodeReport.new(
+        root: root,
+        findings: findings,
+        git_ref: git_ref,
+        generated_at: generated_at,
+        backend: "rubydex",
+        backend_version: backend_version,
+        resolved: index.resolved?,
+        rails: rails.rails?,
+        diagnostics: index.diagnostics,
+        coverage_source: coverage&.source
+      )
+    end
+    # A definition is a candidate when nothing outside of tests references it.
+    def candidate?(definition)
+      non_test_reference_paths(definition).empty?
+    end
+    def context_for(definition, index:, rails:, dynamic_files:, coverage: nil)
+      Confidence::Context.new(
+        symbol_id: definition.symbol_id,
+        kind: definition.kind,
+        name: definition.name,
+        span: definition.span,
+        path: definition.path,
+        visibility: definition.visibility,
+        reference_count: definition.reference_count,
+        test_only: test_only?(definition),
+        rails_signals: rails.signals_for(definition),
+        dynamic_dispatch: dynamic_files.include?(definition.path),
+        override_of: definition.override_of,
+        deprecated: false,
+        index_resolved: index.resolved?,
+        runtime: runtime_for(definition, coverage)
+      )
+    end
+    # The runtime classification for this definition, joined on the same path +
+    # span that make up its symbol_id. nil when no coverage was supplied.
+    def runtime_for(definition, coverage)
+      return nil unless coverage
+      Coverage::Resolver.classify(
+        coverage, path: definition.path, span: definition.span, kind: definition.kind
+      )
+    end
+    # Referenced only from test/spec files: it is exercised, but possibly only to
+    # keep otherwise-dead production code alive — a weaker candidate, not excluded.
+    def test_only?(definition)
+      definition.reference_count.to_i.positive? && non_test_reference_paths(definition).empty?
+    end
+    def non_test_reference_paths(definition)
+      Array(definition.reference_paths).reject { |path| path.to_s.match?(TEST_PATH) }
+    end
+    # @return [Set<String>] root-relative paths whose source contains dynamic dispatch
+    def dynamic_dispatch_files(files, root)
+      files.each_with_object(Set.new) do |abs, set|
+        source = File.read(abs)
+        set << SymbolId.relative_path(abs, root) if source.match?(DYNAMIC_TOKENS)
+      rescue
+        next
+      end
+    end
+  end
+end

data/lib/moult/dead_code_report.rb ADDED Viewed

@@ -0,0 +1,65 @@
+# frozen_string_literal: true
+module Moult
+  # The serialized result model for `moult deadcode`, sibling to {Report}. It
+  # owns the JSON envelope (schema/deadcode.schema.json) and leaves the protected
+  # hotspots {Report} untouched. The findings it carries are
+  # {Confidence::Finding} objects — the per-finding confidence model is the
+  # protected API, so this class only adds the report-level envelope around it.
+  class DeadCodeReport
+    # Bump only on a breaking change to the serialized shape. v2 adds the
+    # Phase 3 runtime block: analysis.coverage provenance and a per-finding
+    # runtime classification (both null when no coverage was merged).
+    SCHEMA_VERSION = 2
+    attr_reader :root, :findings, :git_ref, :generated_at,
+      :backend, :backend_version, :resolved, :rails, :diagnostics, :coverage_source
+    # @param root [String] absolute analysis root
+    # @param findings [Array<Confidence::Finding>] ranked, most-likely-dead first
+    # @param git_ref [String, nil] HEAD sha when run inside a repo
+    # @param generated_at [String, nil] ISO8601 timestamp
+    # @param backend [String] index backend name (e.g. "rubydex")
+    # @param backend_version [String, nil] backend gem version
+    # @param resolved [Boolean] whether the index fully resolved
+    # @param rails [Boolean] whether Rails entrypoint awareness was applied
+    # @param diagnostics [Array<String>] non-fatal index diagnostics
+    # @param coverage_source [Coverage::Source, nil] provenance of merged runtime
+    #   coverage; nil when `moult deadcode` was run without --coverage
+    def initialize(root:, findings:, git_ref: nil, generated_at: nil,
+      backend: "rubydex", backend_version: nil, resolved: true, rails: false, diagnostics: [],
+      coverage_source: nil)
+      @root = root
+      @findings = findings
+      @git_ref = git_ref
+      @generated_at = generated_at
+      @backend = backend
+      @backend_version = backend_version
+      @resolved = resolved
+      @rails = rails
+      @diagnostics = diagnostics
+      @coverage_source = coverage_source
+    end
+    def to_h
+      {
+        schema_version: SCHEMA_VERSION,
+        tool: {name: "moult", version: Moult::VERSION},
+        analysis: {
+          root: root,
+          git_ref: git_ref,
+          generated_at: generated_at,
+          coverage: coverage_source&.to_h,
+          index: {
+            backend: backend,
+            backend_version: backend_version,
+            resolved: resolved,
+            rails: rails,
+            diagnostics: diagnostics
+          }
+        },
+        findings: findings.map(&:to_h)
+      }
+    end
+  end
+end

data/lib/moult/diff.rb ADDED Viewed

@@ -0,0 +1,177 @@
+# frozen_string_literal: true
+module Moult
+  # A Moult-owned value object describing what changed between a base ref and the
+  # working tree, plus the pure filter the gate uses to decide whether a finding
+  # is "in the diff". This is the genuinely novel component of the PR gate — it is
+  # pinned against hand-built git output exactly like the coverage {Resolver} and
+  # the ABC metric; drift is a bug.
+  #
+  # {Git} is the only file that shells git; it hands this class raw
+  # `--name-status` and `--unified=0` text. {parse} turns that text into a Diff
+  # with no IO, so it is trivially unit-testable. {compute} is the thin IO wrapper
+  # that calls git then {parse}.
+  #
+  # Line ranges are taken from the NEW side of each `--unified=0` hunk header
+  # (`@@ -a,b +c,d @@`): with zero context they are precisely the added/changed
+  # lines. Paths are repo-root-relative (git's own framing); the gate is meant to
+  # run at the repository root, where they line up with Moult's root-relative
+  # finding paths.
+  class Diff
+    # One changed file. +status+ is git's single-letter code (A/M/D/R/C/...);
+    # +line_ranges+ are the new-side changed line ranges (empty for a deletion, a
+    # pure-deletion hunk, or a content-less rename).
+    ChangedFile = Struct.new(:path, :status, :line_ranges) do
+      # Does +line+ fall on a changed/added line of this file?
+      def changed_line?(line)
+        line_ranges.any? { |r| r.cover?(line) }
+      end
+      # Does the inclusive line range [lo, hi] intersect any changed range?
+      def changed_range?(lo, hi)
+        line_ranges.any? { |r| r.begin <= hi && r.end >= lo }
+      end
+    end
+    attr_reader :base_ref, :merge_base, :scope, :files
+    # @param base_ref [String, nil] the requested base ref (nil for :all scope)
+    # @param merge_base [String, nil] resolved merge-base sha (nil for :all scope)
+    # @param scope [Symbol] :diff (gate the changed lines) or :all (gate everything)
+    # @param files [Array<ChangedFile>]
+    def initialize(base_ref:, merge_base:, scope:, files:)
+      @base_ref = base_ref
+      @merge_base = merge_base
+      @scope = scope
+      @files = files
+      @by_path = files.to_h { |f| [f.path, f] }
+    end
+    # Line-level membership: is the span [start_line, end_line] inside the diff?
+    # Used where an analysis has lines (complexity methods, dead-code spans,
+    # duplication/flag occurrences). With +start_line+ nil this falls back to
+    # path-level. Always true under :all scope.
+    # @return [Boolean]
+    def in_diff?(path:, start_line: nil, end_line: nil)
+      return true if scope == :all
+      return includes_path?(path) if start_line.nil?
+      file = @by_path[path]
+      return false unless file
+      file.changed_range?(start_line, end_line || start_line)
+    end
+    # Path-level membership: did this file change at all? The fallback where an
+    # analysis is file-keyed with no line numbers (boundaries — null symbol_id).
+    # Always true under :all scope.
+    # @return [Boolean]
+    def includes_path?(path)
+      return true if scope == :all
+      @by_path.key?(path)
+    end
+    class << self
+      # Build a Diff from raw git text. PURE — no IO. Pinned in test/test_diff.rb.
+      # @param name_status [String] `git diff --name-status REF` output
+      # @param unified_diff [String] `git diff --unified=0 REF` output
+      # @return [Diff]
+      def parse(name_status:, unified_diff:, base_ref:, merge_base:, scope: :diff)
+        ranges = parse_unified(utf8(unified_diff))
+        files = parse_name_status(utf8(name_status)).map do |path, status|
+          ChangedFile.new(path: path, status: status, line_ranges: ranges[path] || [])
+        end
+        new(base_ref: base_ref, merge_base: merge_base, scope: scope, files: files)
+      end
+      # Resolve the diff for +root+ against +base_ref+ via {Git}, then {parse}.
+      # @param scope [Symbol] :diff or :all (:all yields an all-inclusive Diff)
+      # @raise [Moult::Error] when the merge-base cannot be resolved
+      # @return [Diff]
+      def compute(root:, base_ref:, scope: :diff)
+        return new(base_ref: nil, merge_base: nil, scope: :all, files: []) if scope == :all
+        mb = Git.merge_base(root, base_ref)
+        unless mb
+          raise Moult::Error,
+            "could not resolve a merge-base between #{base_ref.inspect} and HEAD " \
+            "(unknown ref, shallow clone, or not a git repository); " \
+            "pass --base REF or --scope all"
+        end
+        parse(
+          name_status: Git.diff_name_status(root, mb) || "",
+          unified_diff: Git.diff_unified_zero(root, mb) || "",
+          base_ref: base_ref,
+          merge_base: mb,
+          scope: :diff
+        )
+      end
+      private
+      # git emits UTF-8; reinterpret as such (scrubbing any stray bytes) so string
+      # ops never raise "invalid byte sequence" under a non-UTF-8 locale, where
+      # Open3 tags git's output with the ASCII default external encoding.
+      def utf8(text)
+        text.to_s.dup.force_encoding(Encoding::UTF_8).scrub
+      end
+      # path => [Range, ...] of new-side changed lines, from `--unified=0` hunks.
+      def parse_unified(text)
+        ranges = Hash.new { |h, k| h[k] = [] }
+        current = nil
+        text.each_line do |raw|
+          line = raw.chomp
+          if line.start_with?("+++ ")
+            current = strip_diff_prefix(line[4..])
+          elsif current && line.start_with?("@@")
+            range = hunk_new_range(line)
+            ranges[current] << range if range
+          end
+        end
+        ranges.default_proc = nil
+        ranges
+      end
+      # "@@ -a,b +c,d @@" -> (c..c+d-1); d defaults to 1; d==0 (deletion) -> nil.
+      def hunk_new_range(header)
+        m = header.match(/\+(\d+)(?:,(\d+))?/)
+        return nil unless m
+        start = m[1].to_i
+        count = m[2] ? m[2].to_i : 1
+        return nil if count.zero?
+        start..(start + count - 1)
+      end
+      # Strip the "b/" (or "a/") prefix git puts on diff paths; drop a trailing
+      # tab metadata field; nil for /dev/null (added/deleted side).
+      def strip_diff_prefix(path)
+        path = path.split("\t", 2).first.to_s
+        # git emits the literal "/dev/null" marker for an absent side on every
+        # platform; this is git's convention, not the OS null device (File::NULL
+        # would wrongly be "NUL" on Windows), so match the literal.
+        return nil if path == "/dev/null" # standard:disable Style/FileNull
+        path.sub(%r{\A[ab]/}, "")
+      end
+      # "<status>\t<path>" lines -> [[path, status_code], ...]. Renames/copies
+      # ("R100\told\tnew") resolve to the NEW path.
+      def parse_name_status(text)
+        text.each_line.filter_map do |raw|
+          line = raw.chomp
+          next if line.empty?
+          fields = line.split("\t")
+          code = fields[0].to_s[0]
+          path = (code == "R" || code == "C") ? fields[2] : fields[1]
+          [path, code] if path
+        end
+      end
+    end
+  end
+end

data/lib/moult/discovery.rb ADDED Viewed

@@ -0,0 +1,38 @@
+# frozen_string_literal: true
+require_relative "git"
+module Moult
+  # Finds the Ruby files to analyse under a root directory.
+  #
+  # Inside a git repository we use `git ls-files` so .gitignore is respected for
+  # free (vendored and generated code is excluded as the repo intends).
+  # Otherwise we glob, explicitly skipping the usual non-source directories.
+  module Discovery
+    SKIP_DIRS = %w[vendor tmp node_modules .git].freeze
+    module_function
+    # @param root [String] absolute directory to search
+    # @return [Array<String>] absolute paths to .rb files, sorted
+    def ruby_files(root)
+      files = Git.repo?(root) ? from_git(root) : from_glob(root)
+      files.sort
+    end
+    def from_git(root)
+      Git.listed_files(root)
+        .select { |rel| rel.end_with?(".rb") }
+        .map { |rel| File.join(root, rel) }
+    end
+    def from_glob(root)
+      Dir.glob(File.join(root, "**", "*.rb")).reject { |abs| skip?(abs, root) }
+    end
+    def skip?(abs, root)
+      relative = abs.delete_prefix(root).delete_prefix(File::SEPARATOR)
+      relative.split(File::SEPARATOR).any? { |segment| SKIP_DIRS.include?(segment) }
+    end
+  end
+end