moult 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +44 -0
  3. data/LICENSE.txt +201 -0
  4. data/NOTICE +4 -0
  5. data/README.md +331 -0
  6. data/exe/moult +6 -0
  7. data/lib/moult/abc.rb +133 -0
  8. data/lib/moult/boundaries/packwerk.rb +114 -0
  9. data/lib/moult/boundaries/severity.rb +87 -0
  10. data/lib/moult/boundaries.rb +77 -0
  11. data/lib/moult/boundaries_report.rb +106 -0
  12. data/lib/moult/churn.rb +52 -0
  13. data/lib/moult/cli/boundaries_command.rb +83 -0
  14. data/lib/moult/cli/coverage_command.rb +101 -0
  15. data/lib/moult/cli/dead_code_command.rb +112 -0
  16. data/lib/moult/cli/duplication_command.rb +92 -0
  17. data/lib/moult/cli/flags_command.rb +95 -0
  18. data/lib/moult/cli/gate_command.rb +113 -0
  19. data/lib/moult/cli/health_command.rb +117 -0
  20. data/lib/moult/cli/hotspots_command.rb +104 -0
  21. data/lib/moult/cli.rb +102 -0
  22. data/lib/moult/clones.rb +91 -0
  23. data/lib/moult/cloud_upload.rb +29 -0
  24. data/lib/moult/confidence/rules.rb +128 -0
  25. data/lib/moult/confidence.rb +106 -0
  26. data/lib/moult/coverage/resolver.rb +56 -0
  27. data/lib/moult/coverage.rb +176 -0
  28. data/lib/moult/coverage_report.rb +98 -0
  29. data/lib/moult/dead_code.rb +119 -0
  30. data/lib/moult/dead_code_report.rb +65 -0
  31. data/lib/moult/diff.rb +177 -0
  32. data/lib/moult/discovery.rb +38 -0
  33. data/lib/moult/duplication/confidence.rb +92 -0
  34. data/lib/moult/duplication.rb +112 -0
  35. data/lib/moult/duplication_report.rb +89 -0
  36. data/lib/moult/flag_scanner.rb +150 -0
  37. data/lib/moult/flags/classification.rb +79 -0
  38. data/lib/moult/flags/snapshot.rb +162 -0
  39. data/lib/moult/flags/staleness.rb +145 -0
  40. data/lib/moult/flags.rb +131 -0
  41. data/lib/moult/flags_report.rb +136 -0
  42. data/lib/moult/formatters/boundaries_json.rb +20 -0
  43. data/lib/moult/formatters/boundaries_table.rb +53 -0
  44. data/lib/moult/formatters/coverage_json.rb +19 -0
  45. data/lib/moult/formatters/coverage_table.rb +60 -0
  46. data/lib/moult/formatters/dead_code_json.rb +20 -0
  47. data/lib/moult/formatters/dead_code_table.rb +66 -0
  48. data/lib/moult/formatters/duplication_json.rb +20 -0
  49. data/lib/moult/formatters/duplication_table.rb +55 -0
  50. data/lib/moult/formatters/flags_json.rb +20 -0
  51. data/lib/moult/formatters/flags_table.rb +76 -0
  52. data/lib/moult/formatters/gate_github.rb +52 -0
  53. data/lib/moult/formatters/gate_json.rb +20 -0
  54. data/lib/moult/formatters/gate_message.rb +19 -0
  55. data/lib/moult/formatters/gate_sarif.rb +78 -0
  56. data/lib/moult/formatters/gate_table.rb +71 -0
  57. data/lib/moult/formatters/health_json.rb +20 -0
  58. data/lib/moult/formatters/health_table.rb +80 -0
  59. data/lib/moult/formatters/json.rb +23 -0
  60. data/lib/moult/formatters/table.rb +70 -0
  61. data/lib/moult/formatters/text_table.rb +39 -0
  62. data/lib/moult/gate/config.rb +55 -0
  63. data/lib/moult/gate/evaluation.rb +172 -0
  64. data/lib/moult/gate/policy.rb +103 -0
  65. data/lib/moult/gate.rb +199 -0
  66. data/lib/moult/gate_report.rb +97 -0
  67. data/lib/moult/git.rb +83 -0
  68. data/lib/moult/health/score.rb +291 -0
  69. data/lib/moult/health.rb +320 -0
  70. data/lib/moult/health_report.rb +97 -0
  71. data/lib/moult/index.rb +228 -0
  72. data/lib/moult/parser.rb +101 -0
  73. data/lib/moult/rails_conventions.rb +124 -0
  74. data/lib/moult/report.rb +114 -0
  75. data/lib/moult/scoring.rb +82 -0
  76. data/lib/moult/span.rb +17 -0
  77. data/lib/moult/symbol_id.rb +30 -0
  78. data/lib/moult/symbol_scanner.rb +100 -0
  79. data/lib/moult/version.rb +5 -0
  80. data/lib/moult.rb +84 -0
  81. data/schema/boundaries.schema.json +125 -0
  82. data/schema/common.schema.json +76 -0
  83. data/schema/coverage.schema.json +83 -0
  84. data/schema/deadcode.schema.json +106 -0
  85. data/schema/duplication.schema.json +128 -0
  86. data/schema/flags.schema.json +157 -0
  87. data/schema/gate.schema.json +165 -0
  88. data/schema/health.schema.json +157 -0
  89. data/schema/hotspots.schema.json +106 -0
  90. metadata +185 -0
@@ -0,0 +1,176 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "time"
5
+ require_relative "symbol_id"
6
+
7
+ module Moult
8
+ # Ingests line-keyed code coverage from a LOCAL FILE and normalises it into one
9
+ # Moult-owned value object ({Dataset}) the {Resolver} can read. This is the
10
+ # runtime-layer analogue of {Index}: external formats (SimpleCov, stdlib
11
+ # +Coverage+) come in, only Moult types go out, so the input is swappable.
12
+ #
13
+ # Two on-disk formats are understood (auto-detected, or forced via +format:+):
14
+ #
15
+ # * +:simplecov+ — SimpleCov's +coverage/.resultset.json+:
16
+ # <tt>{command => {"coverage" => {abs_path => {"lines" => [...]}}, "timestamp" => epoch}}</tt>.
17
+ # Multiple command runs are merged element-wise.
18
+ # * +:coverage+ — a JSON dump of stdlib <tt>Coverage.result(lines: true)</tt>:
19
+ # <tt>{abs_path => {"lines" => [...]}}</tt> or the legacy bare <tt>{abs_path => [...]}</tt>.
20
+ #
21
+ # Line arrays are 0-indexed (index 0 = line 1) with the shared convention:
22
+ # +nil+ = non-executable, +0+ = executable but never run, +N+ = hit count.
23
+ # +oneshot_lines+ is intentionally unsupported: it cannot distinguish 0 from
24
+ # nil, so runtime-cold could not be detected.
25
+ module Coverage
26
+ module_function
27
+
28
+ # Provenance of a merged coverage dataset. Captured into the protected
29
+ # contract so a consumer can see where the runtime evidence came from. The
30
+ # +collected_at+ slot also seeds a future stale-detection slice (deferred).
31
+ Source = Struct.new(:backend, :version, :collected_at) do
32
+ def to_h
33
+ {backend: backend, version: version, collected_at: collected_at}
34
+ end
35
+ end
36
+
37
+ # Normalised coverage: per (root-relative) path, the 0-indexed line array.
38
+ Dataset = Struct.new(:entries, :source, :unmatched_count) do
39
+ # @return [Boolean] whether this file appeared in the coverage dataset
40
+ def tracked?(path)
41
+ entries.key?(path)
42
+ end
43
+
44
+ # @param line [Integer] 1-based line number
45
+ # @return [Integer, nil] coverage value at that line, or nil if untracked
46
+ def line_value(path, line)
47
+ arr = entries[path]
48
+ arr && arr[line - 1]
49
+ end
50
+ end
51
+
52
+ # @param path [String] path to the coverage file
53
+ # @param root [String] absolute analysis root (findings are relative to it)
54
+ # @param format [Symbol] :auto, :simplecov, or :coverage
55
+ # @return [Dataset]
56
+ def load(path, root:, format: :auto)
57
+ raw = JSON.parse(File.read(path))
58
+ fmt = (format == :auto) ? detect_format(raw) : format
59
+ abs_entries, source = case fmt
60
+ when :simplecov then from_simplecov(raw, path)
61
+ when :coverage then from_coverage(raw, path)
62
+ else raise Moult::Error, "unknown coverage format: #{fmt}"
63
+ end
64
+ entries, unmatched = relativize(abs_entries, root)
65
+ Dataset.new(entries: entries, source: source, unmatched_count: unmatched)
66
+ rescue JSON::ParserError => e
67
+ raise Moult::Error, "could not parse coverage file #{path}: #{e.message}"
68
+ rescue Errno::ENOENT
69
+ raise Moult::Error, "no such coverage file: #{path}"
70
+ end
71
+
72
+ # SimpleCov nests file coverage under a command name and a "coverage" key;
73
+ # stdlib dumps key files at the top level. The presence of "coverage" on the
74
+ # first value is the unambiguous discriminator.
75
+ def detect_format(raw)
76
+ raise Moult::Error, "coverage file is not a JSON object" unless raw.is_a?(Hash)
77
+ sample = raw.values.first
78
+ if sample.is_a?(Hash) && sample.key?("coverage")
79
+ :simplecov
80
+ elsif sample.is_a?(Array) || (sample.is_a?(Hash) && sample.key?("lines"))
81
+ :coverage
82
+ else
83
+ raise Moult::Error, "could not auto-detect coverage format; pass --coverage-format simplecov|coverage"
84
+ end
85
+ end
86
+
87
+ # @return [[Hash{String=>Array}, Source]] abs-path line arrays + provenance
88
+ def from_simplecov(raw, _path)
89
+ merged = {}
90
+ timestamps = []
91
+ raw.each_value do |run|
92
+ next unless run.is_a?(Hash)
93
+ timestamps << run["timestamp"] if run["timestamp"]
94
+ (run["coverage"] || {}).each do |file, data|
95
+ merged[file] = merge_lines(merged[file], extract_lines(data))
96
+ end
97
+ end
98
+ collected = timestamps.compact.max
99
+ source = Source.new(
100
+ backend: "simplecov",
101
+ version: nil, # not recorded in the resultset
102
+ collected_at: collected && Time.at(collected).utc.iso8601
103
+ )
104
+ [merged, source]
105
+ end
106
+
107
+ # @return [[Hash{String=>Array}, Source]] abs-path line arrays + provenance
108
+ def from_coverage(raw, path)
109
+ entries = {}
110
+ raw.each do |file, data|
111
+ lines = extract_lines(data)
112
+ entries[file] = lines if lines
113
+ end
114
+ # The raw dump carries no timestamp, so the file mtime is the best-effort
115
+ # collected_at (noted as a fallback; only matters for deferred staleness).
116
+ source = Source.new(
117
+ backend: "coverage",
118
+ version: RUBY_VERSION,
119
+ collected_at: File.mtime(path).utc.iso8601
120
+ )
121
+ [entries, source]
122
+ end
123
+
124
+ # Accepts both the wrapped ({"lines" => [...]}) and legacy bare-array forms;
125
+ # ignores sibling :methods/:branches data.
126
+ def extract_lines(data)
127
+ case data
128
+ when Array then data
129
+ when Hash then data["lines"]
130
+ end
131
+ end
132
+
133
+ # Element-wise merge of two coverage runs: a value is hit if hit in either
134
+ # run (max of the non-nil values), non-executable only if nil in both.
135
+ def merge_lines(a, b)
136
+ return b if a.nil?
137
+ return a if b.nil?
138
+ Array.new([a.length, b.length].max) do |i|
139
+ x, y = a[i], b[i]
140
+ if x.nil? then y
141
+ elsif y.nil? then x
142
+ else [x, y].max
143
+ end
144
+ end
145
+ end
146
+
147
+ # Map absolute coverage paths to the root-relative paths Phase 2 emits, so
148
+ # the join lands on the same symbol_id components. Files outside the root are
149
+ # dropped and counted (a different checkout layout, vendored code, etc.).
150
+ def relativize(abs_entries, root)
151
+ real_root = canonicalize(root)
152
+ entries = {}
153
+ unmatched = 0
154
+ abs_entries.each do |abs, lines|
155
+ full = canonicalize(abs)
156
+ if full == real_root || full.start_with?(real_root + File::SEPARATOR)
157
+ entries[SymbolId.relative_path(full, real_root)] = lines
158
+ else
159
+ unmatched += 1
160
+ end
161
+ end
162
+ [entries, unmatched]
163
+ end
164
+
165
+ # realpath resolves /tmp -> /private/tmp style symlinks so coverage paths
166
+ # line up with rubydex's canonical paths; falls back when the file is absent
167
+ # locally (coverage collected on another machine).
168
+ def canonicalize(p)
169
+ File.realpath(p)
170
+ rescue
171
+ File.expand_path(p)
172
+ end
173
+ end
174
+ end
175
+
176
+ require_relative "coverage/resolver"
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moult
4
+ # The serialized result model for `moult coverage` (schema/coverage.schema.json):
5
+ # a per-symbol hot/cold/untracked map. It is a diagnostic view over the same
6
+ # runtime evidence `moult deadcode --coverage` folds into confidence — it makes
7
+ # no dead-code claim, it only reports what ran.
8
+ #
9
+ # {build} is the orchestration: ask the {Index} for every definition and
10
+ # classify each through {Coverage::Resolver}, joined on the same path + span
11
+ # that make up its symbol_id.
12
+ class CoverageReport
13
+ SCHEMA_VERSION = 1
14
+
15
+ # One classified definition. Carries the symbol_id so the map joins to the
16
+ # hotspots and deadcode contracts.
17
+ Entry = Struct.new(:symbol_id, :kind, :name, :span, :runtime) do
18
+ def to_h
19
+ {symbol_id: symbol_id, kind: kind.to_s, name: name, span: span.to_h, runtime: runtime.to_s}
20
+ end
21
+ end
22
+
23
+ attr_reader :root, :entries, :git_ref, :generated_at,
24
+ :backend, :backend_version, :resolved, :diagnostics, :coverage_source
25
+
26
+ # @param index [Index] resolved definition index
27
+ # @param coverage [Coverage::Dataset] the runtime dataset to resolve against
28
+ # @return [CoverageReport]
29
+ def self.build(index:, coverage:, root:, git_ref: nil, generated_at: nil, backend_version: nil)
30
+ entries = index.definitions.map do |d|
31
+ Entry.new(
32
+ symbol_id: d.symbol_id,
33
+ kind: d.kind,
34
+ name: d.name,
35
+ span: d.span,
36
+ runtime: Coverage::Resolver.classify(coverage, path: d.path, span: d.span, kind: d.kind)
37
+ )
38
+ end
39
+ # Hot first (most surprising/actionable), then cold, then untracked; name
40
+ # as a deterministic tie-break.
41
+ order = {hot: 0, cold: 1, untracked: 2}
42
+ entries.sort_by! { |e| [order.fetch(e.runtime, 3), e.name.to_s] }
43
+
44
+ new(
45
+ root: root,
46
+ entries: entries,
47
+ git_ref: git_ref,
48
+ generated_at: generated_at,
49
+ backend: "rubydex",
50
+ backend_version: backend_version,
51
+ resolved: index.resolved?,
52
+ diagnostics: index.diagnostics,
53
+ coverage_source: coverage.source
54
+ )
55
+ end
56
+
57
+ def initialize(root:, entries:, git_ref: nil, generated_at: nil,
58
+ backend: "rubydex", backend_version: nil, resolved: true, diagnostics: [], coverage_source: nil)
59
+ @root = root
60
+ @entries = entries
61
+ @git_ref = git_ref
62
+ @generated_at = generated_at
63
+ @backend = backend
64
+ @backend_version = backend_version
65
+ @resolved = resolved
66
+ @diagnostics = diagnostics
67
+ @coverage_source = coverage_source
68
+ end
69
+
70
+ # @return [Hash{Symbol=>Integer}] counts keyed :hot, :cold, :untracked
71
+ def summary
72
+ counts = {hot: 0, cold: 0, untracked: 0}
73
+ entries.each { |e| counts[e.runtime] = counts.fetch(e.runtime, 0) + 1 }
74
+ counts
75
+ end
76
+
77
+ def to_h
78
+ {
79
+ schema_version: SCHEMA_VERSION,
80
+ tool: {name: "moult", version: Moult::VERSION},
81
+ analysis: {
82
+ root: root,
83
+ git_ref: git_ref,
84
+ generated_at: generated_at,
85
+ coverage: coverage_source&.to_h,
86
+ index: {
87
+ backend: backend,
88
+ backend_version: backend_version,
89
+ resolved: resolved,
90
+ diagnostics: diagnostics
91
+ }
92
+ },
93
+ summary: summary,
94
+ symbols: entries.map(&:to_h)
95
+ }
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moult
4
+ # Orchestrates the dead-code analysis: it asks the {Index} for every definition,
5
+ # keeps the ones with no production reference, gathers the facts each finding is
6
+ # judged on, and runs them through the pure {Confidence} model. The result is a
7
+ # ranked {DeadCodeReport} of confidence-graded candidates — never assertions of
8
+ # certain death.
9
+ #
10
+ # This is the only layer that knows how the facts are sourced (the index, the
11
+ # Rails conventions, a metaprogramming scan of the owning file); {Confidence}
12
+ # stays a pure function of those facts so it can be tested in isolation.
13
+ module DeadCode
14
+ TEST_PATH = %r{(\A|/)(test|spec)/}
15
+
16
+ # Tokens that indicate dynamic dispatch / metaprogramming in a file. Their
17
+ # mere presence lowers confidence for definitions in that file: such code can
18
+ # be reached in ways static analysis cannot see. Matched conservatively (a
19
+ # false match only lowers confidence, never hides a finding).
20
+ DYNAMIC_TOKENS = /
21
+ \b(
22
+ send | public_send | __send__ |
23
+ method_missing | respond_to_missing\? |
24
+ define_method | define_singleton_method |
25
+ class_eval | module_eval | instance_eval | instance_exec |
26
+ const_get | const_set | constantize |
27
+ eval
28
+ )\b
29
+ /x
30
+
31
+ module_function
32
+
33
+ # @param root [String] absolute analysis root
34
+ # @param files [Array<String>] absolute Ruby file paths analysed
35
+ # @param index [Index] resolved definition/reference index
36
+ # @param rails [RailsConventions] Rails entrypoint awareness
37
+ # @param min_confidence [Float] drop findings below this confidence
38
+ # @param coverage [Coverage::Dataset, nil] runtime coverage to merge (Phase 3)
39
+ # @return [DeadCodeReport]
40
+ def build_report(root:, files:, index:, rails:, min_confidence: 0.0,
41
+ git_ref: nil, generated_at: nil, backend_version: nil, coverage: nil)
42
+ dynamic_files = dynamic_dispatch_files(files, root)
43
+
44
+ findings = index.definitions.filter_map do |definition|
45
+ next unless candidate?(definition)
46
+ Confidence.score(context_for(definition, index: index, rails: rails, dynamic_files: dynamic_files, coverage: coverage))
47
+ end
48
+
49
+ findings.select! { |f| f.confidence >= min_confidence }
50
+ findings.sort_by! { |f| [-f.confidence, f.name.to_s] }
51
+
52
+ DeadCodeReport.new(
53
+ root: root,
54
+ findings: findings,
55
+ git_ref: git_ref,
56
+ generated_at: generated_at,
57
+ backend: "rubydex",
58
+ backend_version: backend_version,
59
+ resolved: index.resolved?,
60
+ rails: rails.rails?,
61
+ diagnostics: index.diagnostics,
62
+ coverage_source: coverage&.source
63
+ )
64
+ end
65
+
66
+ # A definition is a candidate when nothing outside of tests references it.
67
+ def candidate?(definition)
68
+ non_test_reference_paths(definition).empty?
69
+ end
70
+
71
+ def context_for(definition, index:, rails:, dynamic_files:, coverage: nil)
72
+ Confidence::Context.new(
73
+ symbol_id: definition.symbol_id,
74
+ kind: definition.kind,
75
+ name: definition.name,
76
+ span: definition.span,
77
+ path: definition.path,
78
+ visibility: definition.visibility,
79
+ reference_count: definition.reference_count,
80
+ test_only: test_only?(definition),
81
+ rails_signals: rails.signals_for(definition),
82
+ dynamic_dispatch: dynamic_files.include?(definition.path),
83
+ override_of: definition.override_of,
84
+ deprecated: false,
85
+ index_resolved: index.resolved?,
86
+ runtime: runtime_for(definition, coverage)
87
+ )
88
+ end
89
+
90
+ # The runtime classification for this definition, joined on the same path +
91
+ # span that make up its symbol_id. nil when no coverage was supplied.
92
+ def runtime_for(definition, coverage)
93
+ return nil unless coverage
94
+ Coverage::Resolver.classify(
95
+ coverage, path: definition.path, span: definition.span, kind: definition.kind
96
+ )
97
+ end
98
+
99
+ # Referenced only from test/spec files: it is exercised, but possibly only to
100
+ # keep otherwise-dead production code alive — a weaker candidate, not excluded.
101
+ def test_only?(definition)
102
+ definition.reference_count.to_i.positive? && non_test_reference_paths(definition).empty?
103
+ end
104
+
105
+ def non_test_reference_paths(definition)
106
+ Array(definition.reference_paths).reject { |path| path.to_s.match?(TEST_PATH) }
107
+ end
108
+
109
+ # @return [Set<String>] root-relative paths whose source contains dynamic dispatch
110
+ def dynamic_dispatch_files(files, root)
111
+ files.each_with_object(Set.new) do |abs, set|
112
+ source = File.read(abs)
113
+ set << SymbolId.relative_path(abs, root) if source.match?(DYNAMIC_TOKENS)
114
+ rescue
115
+ next
116
+ end
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moult
4
+ # The serialized result model for `moult deadcode`, sibling to {Report}. It
5
+ # owns the JSON envelope (schema/deadcode.schema.json) and leaves the protected
6
+ # hotspots {Report} untouched. The findings it carries are
7
+ # {Confidence::Finding} objects — the per-finding confidence model is the
8
+ # protected API, so this class only adds the report-level envelope around it.
9
+ class DeadCodeReport
10
+ # Bump only on a breaking change to the serialized shape. v2 adds the
11
+ # Phase 3 runtime block: analysis.coverage provenance and a per-finding
12
+ # runtime classification (both null when no coverage was merged).
13
+ SCHEMA_VERSION = 2
14
+
15
+ attr_reader :root, :findings, :git_ref, :generated_at,
16
+ :backend, :backend_version, :resolved, :rails, :diagnostics, :coverage_source
17
+
18
+ # @param root [String] absolute analysis root
19
+ # @param findings [Array<Confidence::Finding>] ranked, most-likely-dead first
20
+ # @param git_ref [String, nil] HEAD sha when run inside a repo
21
+ # @param generated_at [String, nil] ISO8601 timestamp
22
+ # @param backend [String] index backend name (e.g. "rubydex")
23
+ # @param backend_version [String, nil] backend gem version
24
+ # @param resolved [Boolean] whether the index fully resolved
25
+ # @param rails [Boolean] whether Rails entrypoint awareness was applied
26
+ # @param diagnostics [Array<String>] non-fatal index diagnostics
27
+ # @param coverage_source [Coverage::Source, nil] provenance of merged runtime
28
+ # coverage; nil when `moult deadcode` was run without --coverage
29
+ def initialize(root:, findings:, git_ref: nil, generated_at: nil,
30
+ backend: "rubydex", backend_version: nil, resolved: true, rails: false, diagnostics: [],
31
+ coverage_source: nil)
32
+ @root = root
33
+ @findings = findings
34
+ @git_ref = git_ref
35
+ @generated_at = generated_at
36
+ @backend = backend
37
+ @backend_version = backend_version
38
+ @resolved = resolved
39
+ @rails = rails
40
+ @diagnostics = diagnostics
41
+ @coverage_source = coverage_source
42
+ end
43
+
44
+ def to_h
45
+ {
46
+ schema_version: SCHEMA_VERSION,
47
+ tool: {name: "moult", version: Moult::VERSION},
48
+ analysis: {
49
+ root: root,
50
+ git_ref: git_ref,
51
+ generated_at: generated_at,
52
+ coverage: coverage_source&.to_h,
53
+ index: {
54
+ backend: backend,
55
+ backend_version: backend_version,
56
+ resolved: resolved,
57
+ rails: rails,
58
+ diagnostics: diagnostics
59
+ }
60
+ },
61
+ findings: findings.map(&:to_h)
62
+ }
63
+ end
64
+ end
65
+ end
data/lib/moult/diff.rb ADDED
@@ -0,0 +1,177 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moult
4
+ # A Moult-owned value object describing what changed between a base ref and the
5
+ # working tree, plus the pure filter the gate uses to decide whether a finding
6
+ # is "in the diff". This is the genuinely novel component of the PR gate — it is
7
+ # pinned against hand-built git output exactly like the coverage {Resolver} and
8
+ # the ABC metric; drift is a bug.
9
+ #
10
+ # {Git} is the only file that shells git; it hands this class raw
11
+ # `--name-status` and `--unified=0` text. {parse} turns that text into a Diff
12
+ # with no IO, so it is trivially unit-testable. {compute} is the thin IO wrapper
13
+ # that calls git then {parse}.
14
+ #
15
+ # Line ranges are taken from the NEW side of each `--unified=0` hunk header
16
+ # (`@@ -a,b +c,d @@`): with zero context they are precisely the added/changed
17
+ # lines. Paths are repo-root-relative (git's own framing); the gate is meant to
18
+ # run at the repository root, where they line up with Moult's root-relative
19
+ # finding paths.
20
+ class Diff
21
+ # One changed file. +status+ is git's single-letter code (A/M/D/R/C/...);
22
+ # +line_ranges+ are the new-side changed line ranges (empty for a deletion, a
23
+ # pure-deletion hunk, or a content-less rename).
24
+ ChangedFile = Struct.new(:path, :status, :line_ranges) do
25
+ # Does +line+ fall on a changed/added line of this file?
26
+ def changed_line?(line)
27
+ line_ranges.any? { |r| r.cover?(line) }
28
+ end
29
+
30
+ # Does the inclusive line range [lo, hi] intersect any changed range?
31
+ def changed_range?(lo, hi)
32
+ line_ranges.any? { |r| r.begin <= hi && r.end >= lo }
33
+ end
34
+ end
35
+
36
+ attr_reader :base_ref, :merge_base, :scope, :files
37
+
38
+ # @param base_ref [String, nil] the requested base ref (nil for :all scope)
39
+ # @param merge_base [String, nil] resolved merge-base sha (nil for :all scope)
40
+ # @param scope [Symbol] :diff (gate the changed lines) or :all (gate everything)
41
+ # @param files [Array<ChangedFile>]
42
+ def initialize(base_ref:, merge_base:, scope:, files:)
43
+ @base_ref = base_ref
44
+ @merge_base = merge_base
45
+ @scope = scope
46
+ @files = files
47
+ @by_path = files.to_h { |f| [f.path, f] }
48
+ end
49
+
50
+ # Line-level membership: is the span [start_line, end_line] inside the diff?
51
+ # Used where an analysis has lines (complexity methods, dead-code spans,
52
+ # duplication/flag occurrences). With +start_line+ nil this falls back to
53
+ # path-level. Always true under :all scope.
54
+ # @return [Boolean]
55
+ def in_diff?(path:, start_line: nil, end_line: nil)
56
+ return true if scope == :all
57
+ return includes_path?(path) if start_line.nil?
58
+
59
+ file = @by_path[path]
60
+ return false unless file
61
+
62
+ file.changed_range?(start_line, end_line || start_line)
63
+ end
64
+
65
+ # Path-level membership: did this file change at all? The fallback where an
66
+ # analysis is file-keyed with no line numbers (boundaries — null symbol_id).
67
+ # Always true under :all scope.
68
+ # @return [Boolean]
69
+ def includes_path?(path)
70
+ return true if scope == :all
71
+
72
+ @by_path.key?(path)
73
+ end
74
+
75
+ class << self
76
+ # Build a Diff from raw git text. PURE — no IO. Pinned in test/test_diff.rb.
77
+ # @param name_status [String] `git diff --name-status REF` output
78
+ # @param unified_diff [String] `git diff --unified=0 REF` output
79
+ # @return [Diff]
80
+ def parse(name_status:, unified_diff:, base_ref:, merge_base:, scope: :diff)
81
+ ranges = parse_unified(utf8(unified_diff))
82
+ files = parse_name_status(utf8(name_status)).map do |path, status|
83
+ ChangedFile.new(path: path, status: status, line_ranges: ranges[path] || [])
84
+ end
85
+ new(base_ref: base_ref, merge_base: merge_base, scope: scope, files: files)
86
+ end
87
+
88
+ # Resolve the diff for +root+ against +base_ref+ via {Git}, then {parse}.
89
+ # @param scope [Symbol] :diff or :all (:all yields an all-inclusive Diff)
90
+ # @raise [Moult::Error] when the merge-base cannot be resolved
91
+ # @return [Diff]
92
+ def compute(root:, base_ref:, scope: :diff)
93
+ return new(base_ref: nil, merge_base: nil, scope: :all, files: []) if scope == :all
94
+
95
+ mb = Git.merge_base(root, base_ref)
96
+ unless mb
97
+ raise Moult::Error,
98
+ "could not resolve a merge-base between #{base_ref.inspect} and HEAD " \
99
+ "(unknown ref, shallow clone, or not a git repository); " \
100
+ "pass --base REF or --scope all"
101
+ end
102
+
103
+ parse(
104
+ name_status: Git.diff_name_status(root, mb) || "",
105
+ unified_diff: Git.diff_unified_zero(root, mb) || "",
106
+ base_ref: base_ref,
107
+ merge_base: mb,
108
+ scope: :diff
109
+ )
110
+ end
111
+
112
+ private
113
+
114
+ # git emits UTF-8; reinterpret as such (scrubbing any stray bytes) so string
115
+ # ops never raise "invalid byte sequence" under a non-UTF-8 locale, where
116
+ # Open3 tags git's output with the ASCII default external encoding.
117
+ def utf8(text)
118
+ text.to_s.dup.force_encoding(Encoding::UTF_8).scrub
119
+ end
120
+
121
+ # path => [Range, ...] of new-side changed lines, from `--unified=0` hunks.
122
+ def parse_unified(text)
123
+ ranges = Hash.new { |h, k| h[k] = [] }
124
+ current = nil
125
+ text.each_line do |raw|
126
+ line = raw.chomp
127
+ if line.start_with?("+++ ")
128
+ current = strip_diff_prefix(line[4..])
129
+ elsif current && line.start_with?("@@")
130
+ range = hunk_new_range(line)
131
+ ranges[current] << range if range
132
+ end
133
+ end
134
+ ranges.default_proc = nil
135
+ ranges
136
+ end
137
+
138
+ # "@@ -a,b +c,d @@" -> (c..c+d-1); d defaults to 1; d==0 (deletion) -> nil.
139
+ def hunk_new_range(header)
140
+ m = header.match(/\+(\d+)(?:,(\d+))?/)
141
+ return nil unless m
142
+
143
+ start = m[1].to_i
144
+ count = m[2] ? m[2].to_i : 1
145
+ return nil if count.zero?
146
+
147
+ start..(start + count - 1)
148
+ end
149
+
150
+ # Strip the "b/" (or "a/") prefix git puts on diff paths; drop a trailing
151
+ # tab metadata field; nil for /dev/null (added/deleted side).
152
+ def strip_diff_prefix(path)
153
+ path = path.split("\t", 2).first.to_s
154
+ # git emits the literal "/dev/null" marker for an absent side on every
155
+ # platform; this is git's convention, not the OS null device (File::NULL
156
+ # would wrongly be "NUL" on Windows), so match the literal.
157
+ return nil if path == "/dev/null" # standard:disable Style/FileNull
158
+
159
+ path.sub(%r{\A[ab]/}, "")
160
+ end
161
+
162
+ # "<status>\t<path>" lines -> [[path, status_code], ...]. Renames/copies
163
+ # ("R100\told\tnew") resolve to the NEW path.
164
+ def parse_name_status(text)
165
+ text.each_line.filter_map do |raw|
166
+ line = raw.chomp
167
+ next if line.empty?
168
+
169
+ fields = line.split("\t")
170
+ code = fields[0].to_s[0]
171
+ path = (code == "R" || code == "C") ? fields[2] : fields[1]
172
+ [path, code] if path
173
+ end
174
+ end
175
+ end
176
+ end
177
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "git"
4
+
5
+ module Moult
6
+ # Finds the Ruby files to analyse under a root directory.
7
+ #
8
+ # Inside a git repository we use `git ls-files` so .gitignore is respected for
9
+ # free (vendored and generated code is excluded as the repo intends).
10
+ # Otherwise we glob, explicitly skipping the usual non-source directories.
11
+ module Discovery
12
+ SKIP_DIRS = %w[vendor tmp node_modules .git].freeze
13
+
14
+ module_function
15
+
16
+ # @param root [String] absolute directory to search
17
+ # @return [Array<String>] absolute paths to .rb files, sorted
18
+ def ruby_files(root)
19
+ files = Git.repo?(root) ? from_git(root) : from_glob(root)
20
+ files.sort
21
+ end
22
+
23
+ def from_git(root)
24
+ Git.listed_files(root)
25
+ .select { |rel| rel.end_with?(".rb") }
26
+ .map { |rel| File.join(root, rel) }
27
+ end
28
+
29
+ def from_glob(root)
30
+ Dir.glob(File.join(root, "**", "*.rb")).reject { |abs| skip?(abs, root) }
31
+ end
32
+
33
+ def skip?(abs, root)
34
+ relative = abs.delete_prefix(root).delete_prefix(File::SEPARATOR)
35
+ relative.split(File::SEPARATOR).any? { |segment| SKIP_DIRS.include?(segment) }
36
+ end
37
+ end
38
+ end