moult 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +44 -0
  3. data/LICENSE.txt +201 -0
  4. data/NOTICE +4 -0
  5. data/README.md +331 -0
  6. data/exe/moult +6 -0
  7. data/lib/moult/abc.rb +133 -0
  8. data/lib/moult/boundaries/packwerk.rb +114 -0
  9. data/lib/moult/boundaries/severity.rb +87 -0
  10. data/lib/moult/boundaries.rb +77 -0
  11. data/lib/moult/boundaries_report.rb +106 -0
  12. data/lib/moult/churn.rb +52 -0
  13. data/lib/moult/cli/boundaries_command.rb +83 -0
  14. data/lib/moult/cli/coverage_command.rb +101 -0
  15. data/lib/moult/cli/dead_code_command.rb +112 -0
  16. data/lib/moult/cli/duplication_command.rb +92 -0
  17. data/lib/moult/cli/flags_command.rb +95 -0
  18. data/lib/moult/cli/gate_command.rb +113 -0
  19. data/lib/moult/cli/health_command.rb +117 -0
  20. data/lib/moult/cli/hotspots_command.rb +104 -0
  21. data/lib/moult/cli.rb +102 -0
  22. data/lib/moult/clones.rb +91 -0
  23. data/lib/moult/cloud_upload.rb +29 -0
  24. data/lib/moult/confidence/rules.rb +128 -0
  25. data/lib/moult/confidence.rb +106 -0
  26. data/lib/moult/coverage/resolver.rb +56 -0
  27. data/lib/moult/coverage.rb +176 -0
  28. data/lib/moult/coverage_report.rb +98 -0
  29. data/lib/moult/dead_code.rb +119 -0
  30. data/lib/moult/dead_code_report.rb +65 -0
  31. data/lib/moult/diff.rb +177 -0
  32. data/lib/moult/discovery.rb +38 -0
  33. data/lib/moult/duplication/confidence.rb +92 -0
  34. data/lib/moult/duplication.rb +112 -0
  35. data/lib/moult/duplication_report.rb +89 -0
  36. data/lib/moult/flag_scanner.rb +150 -0
  37. data/lib/moult/flags/classification.rb +79 -0
  38. data/lib/moult/flags/snapshot.rb +162 -0
  39. data/lib/moult/flags/staleness.rb +145 -0
  40. data/lib/moult/flags.rb +131 -0
  41. data/lib/moult/flags_report.rb +136 -0
  42. data/lib/moult/formatters/boundaries_json.rb +20 -0
  43. data/lib/moult/formatters/boundaries_table.rb +53 -0
  44. data/lib/moult/formatters/coverage_json.rb +19 -0
  45. data/lib/moult/formatters/coverage_table.rb +60 -0
  46. data/lib/moult/formatters/dead_code_json.rb +20 -0
  47. data/lib/moult/formatters/dead_code_table.rb +66 -0
  48. data/lib/moult/formatters/duplication_json.rb +20 -0
  49. data/lib/moult/formatters/duplication_table.rb +55 -0
  50. data/lib/moult/formatters/flags_json.rb +20 -0
  51. data/lib/moult/formatters/flags_table.rb +76 -0
  52. data/lib/moult/formatters/gate_github.rb +52 -0
  53. data/lib/moult/formatters/gate_json.rb +20 -0
  54. data/lib/moult/formatters/gate_message.rb +19 -0
  55. data/lib/moult/formatters/gate_sarif.rb +78 -0
  56. data/lib/moult/formatters/gate_table.rb +71 -0
  57. data/lib/moult/formatters/health_json.rb +20 -0
  58. data/lib/moult/formatters/health_table.rb +80 -0
  59. data/lib/moult/formatters/json.rb +23 -0
  60. data/lib/moult/formatters/table.rb +70 -0
  61. data/lib/moult/formatters/text_table.rb +39 -0
  62. data/lib/moult/gate/config.rb +55 -0
  63. data/lib/moult/gate/evaluation.rb +172 -0
  64. data/lib/moult/gate/policy.rb +103 -0
  65. data/lib/moult/gate.rb +199 -0
  66. data/lib/moult/gate_report.rb +97 -0
  67. data/lib/moult/git.rb +83 -0
  68. data/lib/moult/health/score.rb +291 -0
  69. data/lib/moult/health.rb +320 -0
  70. data/lib/moult/health_report.rb +97 -0
  71. data/lib/moult/index.rb +228 -0
  72. data/lib/moult/parser.rb +101 -0
  73. data/lib/moult/rails_conventions.rb +124 -0
  74. data/lib/moult/report.rb +114 -0
  75. data/lib/moult/scoring.rb +82 -0
  76. data/lib/moult/span.rb +17 -0
  77. data/lib/moult/symbol_id.rb +30 -0
  78. data/lib/moult/symbol_scanner.rb +100 -0
  79. data/lib/moult/version.rb +5 -0
  80. data/lib/moult.rb +84 -0
  81. data/schema/boundaries.schema.json +125 -0
  82. data/schema/common.schema.json +76 -0
  83. data/schema/coverage.schema.json +83 -0
  84. data/schema/deadcode.schema.json +106 -0
  85. data/schema/duplication.schema.json +128 -0
  86. data/schema/flags.schema.json +157 -0
  87. data/schema/gate.schema.json +165 -0
  88. data/schema/health.schema.json +157 -0
  89. data/schema/hotspots.schema.json +106 -0
  90. metadata +185 -0
@@ -0,0 +1,291 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moult
4
+ module Health
5
+ # The pure model that turns the other analyses' signals into one composite
6
+ # health score. This slice's realisation of Moult's protected confidence API:
7
+ # it answers a deliberately humble question — *how healthy does this codebase
8
+ # look, given the signals we have* — and it is never a verdict. Every component
9
+ # records the observation behind its sub-score as a {Reason}, and the composite
10
+ # records which components contributed, so the number is auditable.
11
+ #
12
+ # {assess} is a pure function of small numeric inputs ({Inputs} and the per-
13
+ # analysis +*Input+ structs) — no IO, no report objects. That keeps it trivially
14
+ # unit-testable and lets the scoring be pinned against hand-built inputs: drift
15
+ # is a bug, the same treatment {ABC}, the coverage {Resolver}, and the
16
+ # duplication {Confidence} model get.
17
+ #
18
+ # The single inversion to keep in mind: the four input analyses all score
19
+ # *badness* (higher = worse). Health scores *goodness* (1.0 = healthy). Every
20
+ # normalisation converts a bounded badness ratio b in [0, 1] to a health
21
+ # sub-score via {health_from_badness} — the one audited inversion point.
22
+ module Score
23
+ # ---- pinned weights -----------------------------------------------------
24
+ # Static weight of each built-in component; they sum to 1.0 and are
25
+ # renormalised over whatever components are actually present. Complexity
26
+ # anchors the composite — it is the only signal that means something with no
27
+ # git history and no coverage. Coverage and dead code tie: both are strong
28
+ # "is this code used" signals but each is conditional. Duplication is the
29
+ # softest health signal (sometimes deliberate), so it gets the smallest share.
30
+ # Boundaries (conditional: only packwerk projects) joins as a structural signal;
31
+ # the original four kept their RELATIVE proportions (each scaled by 0.8) so a
32
+ # repo without boundaries scores and renormalises exactly as before.
33
+ WEIGHTS = {
34
+ "complexity" => 0.24,
35
+ "dead_code" => 0.20,
36
+ "duplication" => 0.16,
37
+ "coverage" => 0.20,
38
+ "boundaries" => 0.20
39
+ }.freeze
40
+
41
+ # ---- pinned grade thresholds (inclusive lower bounds on the composite) ---
42
+ # Letter grades on a normalised score follow the conventions of established
43
+ # code-health tools (Code Climate's A–F maintainability grade, SonarQube's
44
+ # A–E maintainability rating, CodeScene's 1–10 Code Health). The density/ratio
45
+ # normalisation below mirrors SonarQube's debt-RATIO approach (debt relative
46
+ # to size) rather than absolute counts. NOTE: the knees and weights here are
47
+ # v1 judgement-based heuristics chosen for sane, monotonic behaviour — they
48
+ # are NOT yet calibrated against a real-world baseline corpus the way CodeScene
49
+ # calibrates its factors; corpus calibration is deliberate future work. They
50
+ # are pinned so the SIGNAL is deterministic and auditable; treat drift as a bug.
51
+ GRADE_THRESHOLDS = [
52
+ ["A", 0.90],
53
+ ["B", 0.80],
54
+ ["C", 0.70],
55
+ ["D", 0.60],
56
+ ["F", 0.0]
57
+ ].freeze
58
+
59
+ # ---- pinned complexity normalisation ------------------------------------
60
+ # Health falls linearly as the MEAN per-file risk approaches a knee. Averaging
61
+ # over files already dilutes single outliers, so a plain ratio (à la SonarQube's
62
+ # debt ratio) is honest and predictable — no extra log compression, which would
63
+ # double-penalise moderate code.
64
+ COMPLEXITY_CHURN_KNEE = 300.0 # mean complexity*churn per file at which health hits the floor
65
+ COMPLEXITY_ONLY_KNEE = 150.0 # mean summed-ABC per file at which health hits the floor (no churn signal)
66
+ COMPLEXITY_FLOOR = 0.30 # complexity alone is a soft signal: never reads as 0.0 catastrophic
67
+
68
+ # ---- pinned dead-code normalisation -------------------------------------
69
+ DEADCODE_DENSITY_KNEE = 0.12 # confidence-weighted dead density at which health hits 0
70
+ DEADCODE_UNRESOLVED_CAP = 0.95 # an unresolved index cannot certify perfect health
71
+
72
+ # ---- pinned duplication normalisation -----------------------------------
73
+ DUPLICATION_BURDEN_KNEE = 40.0 # confidence-weighted duplicated mass per file at which health hits 0
74
+
75
+ # ---- pinned boundaries normalisation ------------------------------------
76
+ BOUNDARY_BURDEN_KNEE = 4.0 # severity-weighted boundary violations per file at which health hits 0
77
+
78
+ # One auditable observation behind a sub-score. Mirrors the rule/.../detail
79
+ # reason shape used across Moult, but health sub-scores are RATIOS not signed
80
+ # delta-sums, so it carries the observed +value+ (a [0, 1] quantity) rather
81
+ # than a +delta+. Kept local so the health slice does not couple to the
82
+ # dead-code or duplication Reason structs.
83
+ Reason = Struct.new(:rule, :value, :detail) do
84
+ def to_h
85
+ {rule: rule.to_s, value: value, detail: detail}
86
+ end
87
+ end
88
+
89
+ # A graded component: a health sub-score in [0, 1] (1.0 = healthy), the stats
90
+ # backing it, and the reasons behind it.
91
+ Component = Struct.new(:name, :category, :score, :stats, :reasons)
92
+
93
+ # The whole-codebase (or per-file) result: the composite + the present
94
+ # components. +score+/+grade+ are nil only when every component is absent.
95
+ Composite = Struct.new(:score, :grade, :components)
96
+
97
+ # IO-free numeric inputs. The orchestrator extracts one of each from the
98
+ # matching analysis report; the model never sees a report object. A nil slot
99
+ # means the analysis was absent or errored — it is dropped from the composite.
100
+ Inputs = Struct.new(:complexity, :dead_code, :duplication, :coverage, :boundaries)
101
+
102
+ ComplexityInput = Struct.new(:file_count, :total_complexity, :total_score, :churn_present)
103
+ DeadCodeInput = Struct.new(:symbol_count, :confidence_sum, :finding_count, :resolved)
104
+ DuplicationInput = Struct.new(:file_count, :weighted_dup_mass, :set_count)
105
+ CoverageInput = Struct.new(:hot, :cold)
106
+ BoundariesInput = Struct.new(:file_count, :weighted_violations, :violation_count)
107
+
108
+ module_function
109
+
110
+ # @param inputs [Inputs]
111
+ # @return [Composite]
112
+ def assess(inputs)
113
+ components = [
114
+ complexity_component(inputs.complexity),
115
+ dead_code_component(inputs.dead_code),
116
+ duplication_component(inputs.duplication),
117
+ coverage_component(inputs.coverage),
118
+ boundaries_component(inputs.boundaries)
119
+ ].compact
120
+
121
+ return Composite.new(score: nil, grade: nil, components: []) if components.empty?
122
+
123
+ total_weight = components.sum { |c| WEIGHTS.fetch(c.name) }
124
+ weighted = components.sum { |c| c.score * WEIGHTS.fetch(c.name) }
125
+ overall = (weighted / total_weight).round(2)
126
+
127
+ Composite.new(score: overall, grade: grade_for(overall), components: components)
128
+ end
129
+
130
+ # @param score [Float] composite in [0, 1]
131
+ # @return [String] letter grade
132
+ def grade_for(score)
133
+ GRADE_THRESHOLDS.find { |(_, low)| score >= low }.first
134
+ end
135
+
136
+ # The renormalised share a present component carried of the composite.
137
+ # @param name [String] component name
138
+ # @param present_names [Array<String>] names of the components that contributed
139
+ # @return [Float]
140
+ def normalized_weight(name, present_names)
141
+ total = present_names.sum { |n| WEIGHTS.fetch(n) }
142
+ return 0.0 if total.zero?
143
+ (WEIGHTS.fetch(name) / total).round(4)
144
+ end
145
+
146
+ # Convert a bounded badness ratio to a health sub-score, applying an optional
147
+ # floor so a soft signal never reads as catastrophic 0.0.
148
+ # @param badness [Float] in [0, 1] (clamped); higher = worse
149
+ # @param floor [Float] lowest the sub-score may reach
150
+ # @return [Float] rounded to 2 decimals
151
+ def health_from_badness(badness, floor: 0.0)
152
+ b = badness.clamp(0.0, 1.0)
153
+ ((1.0 - b) * (1.0 - floor) + floor).clamp(0.0, 1.0).round(2)
154
+ end
155
+
156
+ # @param input [ComplexityInput, nil]
157
+ # @return [Component, nil]
158
+ def complexity_component(input)
159
+ return nil unless input
160
+ return healthy_by_absence("complexity", "no methods with complexity to score") if input.file_count.to_i.zero?
161
+
162
+ if input.churn_present
163
+ mean_risk = input.total_score / input.file_count.to_f
164
+ badness = mean_risk / COMPLEXITY_CHURN_KNEE
165
+ reason = Reason.new(rule: :complexity_churn_density, value: nil,
166
+ detail: "mean complexity*churn per file #{mean_risk.round(1)} vs knee #{COMPLEXITY_CHURN_KNEE}")
167
+ else
168
+ mean_cx = input.total_complexity / input.file_count.to_f
169
+ badness = mean_cx / COMPLEXITY_ONLY_KNEE
170
+ reason = Reason.new(rule: :complexity_only_density, value: nil,
171
+ detail: "no churn signal; mean ABC per file #{mean_cx.round(1)} vs knee #{COMPLEXITY_ONLY_KNEE}")
172
+ end
173
+
174
+ score = health_from_badness(badness, floor: COMPLEXITY_FLOOR)
175
+ reason.value = score
176
+ Component.new(
177
+ name: "complexity", category: "complexity", score: score,
178
+ stats: {
179
+ file_count: input.file_count,
180
+ mean_complexity: (input.total_complexity / input.file_count.to_f).round(2),
181
+ churn_present: input.churn_present
182
+ },
183
+ reasons: [reason]
184
+ )
185
+ end
186
+
187
+ # @param input [DeadCodeInput, nil]
188
+ # @return [Component, nil]
189
+ def dead_code_component(input)
190
+ return nil unless input
191
+ return healthy_by_absence("dead_code", "no symbols to score") if input.symbol_count.to_i.zero?
192
+
193
+ density = input.confidence_sum / input.symbol_count.to_f
194
+ score = health_from_badness(density / DEADCODE_DENSITY_KNEE)
195
+ reasons = [Reason.new(rule: :dead_density, value: score,
196
+ detail: "confidence-weighted dead density #{density.round(4)} vs knee #{DEADCODE_DENSITY_KNEE} " \
197
+ "(#{input.finding_count} candidates / #{input.symbol_count} symbols)")]
198
+
199
+ unless input.resolved
200
+ capped = [score, DEADCODE_UNRESOLVED_CAP].min
201
+ if capped < score
202
+ score = capped
203
+ reasons << Reason.new(rule: :index_unresolved, value: score,
204
+ detail: "index did not fully resolve; capped at #{DEADCODE_UNRESOLVED_CAP}")
205
+ end
206
+ end
207
+
208
+ Component.new(
209
+ name: "dead_code", category: "dead_code", score: score,
210
+ stats: {
211
+ symbol_count: input.symbol_count,
212
+ candidate_count: input.finding_count,
213
+ confidence_sum: input.confidence_sum.round(2),
214
+ resolved: input.resolved
215
+ },
216
+ reasons: reasons
217
+ )
218
+ end
219
+
220
+ # @param input [DuplicationInput, nil]
221
+ # @return [Component, nil]
222
+ def duplication_component(input)
223
+ return nil unless input
224
+ return healthy_by_absence("duplication", "no files to score") if input.file_count.to_i.zero?
225
+
226
+ burden = input.weighted_dup_mass / input.file_count.to_f
227
+ score = health_from_badness(burden / DUPLICATION_BURDEN_KNEE)
228
+ Component.new(
229
+ name: "duplication", category: "duplication", score: score,
230
+ stats: {
231
+ file_count: input.file_count,
232
+ weighted_dup_mass: input.weighted_dup_mass.round(1),
233
+ clone_sets: input.set_count
234
+ },
235
+ reasons: [Reason.new(rule: :duplication_burden, value: score,
236
+ detail: "confidence-weighted duplicated mass per file #{burden.round(2)} vs knee #{DUPLICATION_BURDEN_KNEE} " \
237
+ "(#{input.set_count} clone sets)")]
238
+ )
239
+ end
240
+
241
+ # @param input [CoverageInput, nil]
242
+ # @return [Component, nil]
243
+ def coverage_component(input)
244
+ return nil unless input
245
+ tracked = input.hot.to_i + input.cold.to_i
246
+ # untracked is deliberately NOT in the denominator: it is no signal, so it
247
+ # must never count as either healthy or unhealthy.
248
+ return healthy_by_absence("coverage", "no tracked symbols (untracked carries no signal)") if tracked.zero?
249
+
250
+ cold_ratio = input.cold / tracked.to_f
251
+ score = health_from_badness(cold_ratio)
252
+ Component.new(
253
+ name: "coverage", category: "coverage", score: score,
254
+ stats: {hot: input.hot, cold: input.cold, tracked: tracked},
255
+ reasons: [Reason.new(rule: :cold_ratio, value: score,
256
+ detail: "#{input.cold} cold of #{tracked} tracked symbols (untracked excluded)")]
257
+ )
258
+ end
259
+
260
+ # @param input [BoundariesInput, nil]
261
+ # @return [Component, nil]
262
+ def boundaries_component(input)
263
+ return nil unless input
264
+ return healthy_by_absence("boundaries", "no files to score") if input.file_count.to_i.zero?
265
+
266
+ burden = input.weighted_violations / input.file_count.to_f
267
+ score = health_from_badness(burden / BOUNDARY_BURDEN_KNEE)
268
+ Component.new(
269
+ name: "boundaries", category: "architecture_boundary", score: score,
270
+ stats: {
271
+ file_count: input.file_count,
272
+ weighted_violations: input.weighted_violations.round(2),
273
+ violation_count: input.violation_count
274
+ },
275
+ reasons: [Reason.new(rule: :boundary_burden, value: score,
276
+ detail: "severity-weighted boundary violations per file #{burden.round(3)} vs knee #{BOUNDARY_BURDEN_KNEE} " \
277
+ "(#{input.violation_count} violations)")]
278
+ )
279
+ end
280
+
281
+ # A present component that is vacuously healthy because it had nothing to
282
+ # score — distinct from an absent (nil) component, and it says why.
283
+ def healthy_by_absence(name, detail)
284
+ Component.new(
285
+ name: name, category: name, score: 1.0, stats: {},
286
+ reasons: [Reason.new(rule: :no_signal, value: 1.0, detail: detail)]
287
+ )
288
+ end
289
+ end
290
+ end
291
+ end
@@ -0,0 +1,320 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moult
4
+ # Orchestrates the health score: it runs each existing analysis, extracts the
5
+ # numeric signals each one exposes, and composes them through the pure
6
+ # {Health::Score} model into one auditable {HealthReport}. There is no external
7
+ # tool here — the "adapter" is this composition of Moult's own reports.
8
+ #
9
+ # This is the only layer that does IO and knows how the signals are sourced;
10
+ # {Score} stays a pure function of the extracted numbers so it can be pinned in
11
+ # isolation. Every analysis is run inside its own rescue: a failure degrades that
12
+ # one component to `present: false` with a diagnostic, never crashing the whole
13
+ # health run.
14
+ module Health
15
+ # Fixed component order, so output is stable and every slot is accounted for
16
+ # (present, skipped, or errored).
17
+ KNOWN_COMPONENTS = %w[complexity dead_code duplication coverage boundaries].freeze
18
+
19
+ # Cap on join keys serialized per file, so the roll-up cannot balloon on a
20
+ # large file; the true total is recorded alongside.
21
+ SYMBOLS_PER_FILE = 20
22
+
23
+ # The outcome of one isolated analysis run.
24
+ Run = Struct.new(:value, :error) do
25
+ def ok?
26
+ error.nil? && !value.nil?
27
+ end
28
+ end
29
+
30
+ module_function
31
+
32
+ # @param root [String] absolute analysis root
33
+ # @param files [Array<String>] absolute Ruby file paths to analyse
34
+ # @param index [Index] resolved definition/reference index (drives dead-code + coverage)
35
+ # @param rails [RailsConventions] Rails entrypoint awareness for dead-code
36
+ # @param coverage [Coverage::Dataset, nil] runtime coverage to merge (adds the coverage component)
37
+ # @param since [String, nil] churn window start for the complexity component
38
+ # @return [HealthReport]
39
+ def build_report(root:, files:, index:, rails:, coverage: nil, since: nil,
40
+ git_ref: nil, generated_at: nil, churn_window: nil, churn_since: nil)
41
+ churn = Churn.collect(root: root, since: since || Churn::DEFAULT_SINCE)
42
+
43
+ runs = {
44
+ "complexity" => run { Scoring.build_report(root: root, files: files, churn: churn) },
45
+ "dead_code" => run { DeadCode.build_report(root: root, files: files, index: index, rails: rails, coverage: coverage) },
46
+ "duplication" => run { Duplication.build_report(root: root, files: files) },
47
+ "coverage" => run { coverage ? CoverageReport.build(index: index, coverage: coverage, root: root) : nil },
48
+ "boundaries" => run { Boundaries.build_report(root: root) }
49
+ }
50
+
51
+ # Derive churn presence from the JOINED hotspots, not the raw churn hash: a
52
+ # repo-relative churn map run against a subdir root won't join to the scored
53
+ # files, so the honest signal is "did any scored file actually carry churn".
54
+ churn_present = runs["complexity"].ok? &&
55
+ runs["complexity"].value.hotspots.any? { |h| h.churn.to_i.positive? }
56
+
57
+ inputs = Score::Inputs.new(
58
+ complexity: runs["complexity"].ok? ? complexity_input(runs["complexity"].value, churn_present) : nil,
59
+ dead_code: runs["dead_code"].ok? ? dead_code_input(runs["dead_code"].value, index) : nil,
60
+ duplication: runs["duplication"].ok? ? duplication_input(runs["duplication"].value, files.size) : nil,
61
+ coverage: runs["coverage"].ok? ? coverage_input(runs["coverage"].value) : nil,
62
+ # Absent (skipped) unless the project is actually packwerk-configured: an
63
+ # unconfigured repo has no boundary signal and must not read as healthy 1.0.
64
+ boundaries: boundaries_present?(runs["boundaries"]) ? boundaries_input(runs["boundaries"].value, files.size) : nil
65
+ )
66
+
67
+ composite = Score.assess(inputs)
68
+ components = component_views(composite, runs, coverage_requested: !coverage.nil?)
69
+ files_view = file_rollup(runs, index, churn_present)
70
+
71
+ HealthReport.new(
72
+ root: root,
73
+ score: composite.score,
74
+ grade: composite.grade,
75
+ components: components,
76
+ files: files_view,
77
+ git_ref: git_ref,
78
+ generated_at: generated_at,
79
+ coverage_source: coverage&.source,
80
+ churn_window: churn_window,
81
+ churn_since: churn_since
82
+ )
83
+ end
84
+
85
+ # Run one analysis in isolation: success carries the report, any failure
86
+ # carries the message so the component degrades rather than the whole run.
87
+ def run
88
+ Run.new(value: yield, error: nil)
89
+ rescue => e
90
+ Run.new(value: nil, error: e.message)
91
+ end
92
+
93
+ # ---- signal extraction (heavy report -> pure numeric input) ---------------
94
+
95
+ def complexity_input(report, churn_present)
96
+ hs = report.hotspots
97
+ Score::ComplexityInput.new(
98
+ file_count: hs.size,
99
+ total_complexity: hs.sum(&:complexity),
100
+ total_score: hs.sum(&:score),
101
+ churn_present: churn_present
102
+ )
103
+ end
104
+
105
+ def dead_code_input(report, index)
106
+ Score::DeadCodeInput.new(
107
+ symbol_count: index.definitions.size,
108
+ confidence_sum: report.findings.sum(&:confidence),
109
+ finding_count: report.findings.size,
110
+ resolved: report.resolved
111
+ )
112
+ end
113
+
114
+ def duplication_input(report, file_count)
115
+ # Only the EXTRA copies are consolidatable mass; confidence-weight so a
116
+ # low-confidence "similar" rhyme barely registers.
117
+ weighted = report.findings.sum { |f| f.confidence * f.mass * (f.occurrences.size - 1) }
118
+ Score::DuplicationInput.new(
119
+ file_count: file_count,
120
+ weighted_dup_mass: weighted,
121
+ set_count: report.findings.size
122
+ )
123
+ end
124
+
125
+ def coverage_input(report)
126
+ s = report.summary
127
+ Score::CoverageInput.new(hot: s[:hot], cold: s[:cold])
128
+ end
129
+
130
+ # A boundaries run contributes only when it ran AND the project is packwerk-
131
+ # configured; an unconfigured repo yields a successful-but-empty report that
132
+ # must be SKIPPED, not scored as vacuously healthy.
133
+ def boundaries_present?(run)
134
+ run.ok? && run.value.configured
135
+ end
136
+
137
+ def boundaries_input(report, file_count)
138
+ Score::BoundariesInput.new(
139
+ file_count: file_count,
140
+ weighted_violations: report.findings.sum { |f| boundary_weight(f) * f.occurrences.size },
141
+ violation_count: report.findings.sum { |f| f.occurrences.size }
142
+ )
143
+ end
144
+
145
+ def boundary_weight(finding)
146
+ Boundaries::Severity::SEVERITY_WEIGHT.fetch(finding.severity, Boundaries::Severity::SEVERITY_WEIGHT.fetch("low"))
147
+ end
148
+
149
+ # ---- component views (every slot, present or not) -------------------------
150
+
151
+ def component_views(composite, runs, coverage_requested:)
152
+ present = composite.components.to_h { |c| [c.name, c] }
153
+ present_names = composite.components.map(&:name)
154
+
155
+ KNOWN_COMPONENTS.map do |name|
156
+ component = present[name]
157
+ if component
158
+ HealthReport::ComponentView.new(
159
+ name: name,
160
+ category: component.category,
161
+ present: true,
162
+ score: component.score,
163
+ weight: Score::WEIGHTS.fetch(name),
164
+ normalized_weight: Score.normalized_weight(name, present_names),
165
+ summary: component.stats,
166
+ reasons: component.reasons,
167
+ diagnostic: nil
168
+ )
169
+ else
170
+ HealthReport::ComponentView.new(
171
+ name: name,
172
+ category: nil,
173
+ present: false,
174
+ score: nil,
175
+ weight: Score::WEIGHTS.fetch(name),
176
+ normalized_weight: nil,
177
+ summary: {},
178
+ reasons: [],
179
+ diagnostic: diagnostic_for(name, runs[name], coverage_requested)
180
+ )
181
+ end
182
+ end
183
+ end
184
+
185
+ def diagnostic_for(name, run, coverage_requested)
186
+ return run.error if run&.error
187
+ case name
188
+ when "coverage"
189
+ coverage_requested ? "coverage produced no usable signal" : "no --coverage supplied"
190
+ when "boundaries"
191
+ "not a packwerk project (no packwerk.yml)"
192
+ else
193
+ "analysis produced no result"
194
+ end
195
+ end
196
+
197
+ # ---- per-file roll-up (the cross-analysis join surface) -------------------
198
+
199
+ def file_rollup(runs, index, churn_present)
200
+ hotspots = runs["complexity"].ok? ? runs["complexity"].value.hotspots.to_h { |h| [h.path, h] } : {}
201
+ dead = runs["dead_code"].ok? ? runs["dead_code"].value.findings.group_by(&:path) : {}
202
+ dup = runs["duplication"].ok? ? clones_by_path(runs["duplication"].value) : {}
203
+ cov = runs["coverage"].ok? ? coverage_by_path(runs["coverage"].value) : {}
204
+ bnd = boundaries_present?(runs["boundaries"]) ? boundaries_by_path(runs["boundaries"].value) : {}
205
+ symbols_per_file = index.definitions.group_by(&:path).transform_values(&:size)
206
+
207
+ paths = Set.new
208
+ paths.merge(hotspots.keys)
209
+ paths.merge(dead.keys)
210
+ paths.merge(dup.keys)
211
+ paths.merge(cov.select { |_, c| c[:cold].positive? }.keys)
212
+ paths.merge(bnd.keys)
213
+
214
+ views = paths.map do |path|
215
+ file_view(path, hotspots[path], dead[path], dup[path], cov[path], bnd[path],
216
+ symbols_per_file[path], churn_present)
217
+ end
218
+ # Least-healthy first, path as a deterministic tie-break.
219
+ views.sort_by { |v| [v.score, v.path] }
220
+ end
221
+
222
+ def file_view(path, hotspot, dead_findings, clone, coverage, boundaries, symbol_count, churn_present)
223
+ inputs = Score::Inputs.new(
224
+ complexity: hotspot && Score::ComplexityInput.new(
225
+ file_count: 1, total_complexity: hotspot.complexity,
226
+ total_score: hotspot.score, churn_present: churn_present
227
+ ),
228
+ dead_code: dead_findings && Score::DeadCodeInput.new(
229
+ symbol_count: [symbol_count.to_i, dead_findings.size].max,
230
+ confidence_sum: dead_findings.sum(&:confidence),
231
+ finding_count: dead_findings.size, resolved: true
232
+ ),
233
+ duplication: clone && Score::DuplicationInput.new(
234
+ file_count: 1, weighted_dup_mass: clone[:weighted_mass], set_count: clone[:sets]
235
+ ),
236
+ coverage: coverage && tracked?(coverage) && Score::CoverageInput.new(
237
+ hot: coverage[:hot], cold: coverage[:cold]
238
+ ),
239
+ boundaries: boundaries && Score::BoundariesInput.new(
240
+ file_count: 1, weighted_violations: boundaries[:weighted], violation_count: boundaries[:count]
241
+ )
242
+ )
243
+
244
+ composite = Score.assess(inputs)
245
+ ids = file_symbol_ids(hotspot, dead_findings, clone, coverage)
246
+ HealthReport::FileView.new(
247
+ path: path,
248
+ score: composite.score,
249
+ grade: composite.grade,
250
+ components: composite.components.to_h { |c| [c.name, c.score] },
251
+ symbol_ids: ids.first(SYMBOLS_PER_FILE),
252
+ symbol_count: ids.size
253
+ )
254
+ end
255
+
256
+ # Contributing join keys for a file, dead-finding / clone / coverage / hotspot
257
+ # in that order, de-duplicated.
258
+ def file_symbol_ids(hotspot, dead_findings, clone, coverage)
259
+ ids = []
260
+ ids.concat(dead_findings.map(&:symbol_id)) if dead_findings
261
+ ids.concat(clone[:symbol_ids]) if clone
262
+ ids.concat(coverage[:cold_ids]) if coverage
263
+ ids.concat(hotspot.methods.map(&:symbol_id)) if hotspot
264
+ ids.compact.uniq
265
+ end
266
+
267
+ def tracked?(coverage)
268
+ (coverage[:hot] + coverage[:cold]).positive?
269
+ end
270
+
271
+ # path => {weighted_mass:, sets:, symbol_ids:} from clone occurrences in that file.
272
+ def clones_by_path(report)
273
+ acc = Hash.new { |h, k| h[k] = {weighted_mass: 0.0, sets: 0, symbol_ids: []} }
274
+ report.findings.each do |finding|
275
+ finding.occurrences.each do |occ|
276
+ bucket = acc[occ.path]
277
+ bucket[:weighted_mass] += finding.confidence * finding.mass
278
+ bucket[:sets] += 1
279
+ bucket[:symbol_ids] << occ.symbol_id if occ.symbol_id
280
+ end
281
+ end
282
+ acc.default_proc = nil # so later missing-key reads return nil instead of mutating
283
+ acc
284
+ end
285
+
286
+ # path => {weighted:, count:} from boundary-violation occurrences in that file.
287
+ # Boundary occurrences carry a null symbol_id (file-keyed), so they contribute to
288
+ # the per-file roll-up at PATH granularity only — never to file_symbol_ids.
289
+ def boundaries_by_path(report)
290
+ acc = Hash.new { |h, k| h[k] = {weighted: 0.0, count: 0} }
291
+ report.findings.each do |finding|
292
+ weight = boundary_weight(finding)
293
+ finding.occurrences.each do |occ|
294
+ acc[occ.path][:weighted] += weight
295
+ acc[occ.path][:count] += 1
296
+ end
297
+ end
298
+ acc.default_proc = nil # so later missing-key reads return nil instead of mutating
299
+ acc
300
+ end
301
+
302
+ # path => {hot:, cold:, cold_ids:} from coverage entries (path parsed from symbol_id).
303
+ def coverage_by_path(report)
304
+ acc = Hash.new { |h, k| h[k] = {hot: 0, cold: 0, cold_ids: []} }
305
+ report.entries.each do |entry|
306
+ path = entry.symbol_id.split(":", 3).first
307
+ case entry.runtime
308
+ when :hot then acc[path][:hot] += 1
309
+ when :cold
310
+ acc[path][:cold] += 1
311
+ acc[path][:cold_ids] << entry.symbol_id
312
+ end
313
+ end
314
+ acc.default_proc = nil # so later missing-key reads return nil instead of mutating
315
+ acc
316
+ end
317
+ end
318
+ end
319
+
320
+ require_relative "health/score"