moult 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +44 -0
- data/LICENSE.txt +201 -0
- data/NOTICE +4 -0
- data/README.md +331 -0
- data/exe/moult +6 -0
- data/lib/moult/abc.rb +133 -0
- data/lib/moult/boundaries/packwerk.rb +114 -0
- data/lib/moult/boundaries/severity.rb +87 -0
- data/lib/moult/boundaries.rb +77 -0
- data/lib/moult/boundaries_report.rb +106 -0
- data/lib/moult/churn.rb +52 -0
- data/lib/moult/cli/boundaries_command.rb +83 -0
- data/lib/moult/cli/coverage_command.rb +101 -0
- data/lib/moult/cli/dead_code_command.rb +112 -0
- data/lib/moult/cli/duplication_command.rb +92 -0
- data/lib/moult/cli/flags_command.rb +95 -0
- data/lib/moult/cli/gate_command.rb +113 -0
- data/lib/moult/cli/health_command.rb +117 -0
- data/lib/moult/cli/hotspots_command.rb +104 -0
- data/lib/moult/cli.rb +102 -0
- data/lib/moult/clones.rb +91 -0
- data/lib/moult/cloud_upload.rb +29 -0
- data/lib/moult/confidence/rules.rb +128 -0
- data/lib/moult/confidence.rb +106 -0
- data/lib/moult/coverage/resolver.rb +56 -0
- data/lib/moult/coverage.rb +176 -0
- data/lib/moult/coverage_report.rb +98 -0
- data/lib/moult/dead_code.rb +119 -0
- data/lib/moult/dead_code_report.rb +65 -0
- data/lib/moult/diff.rb +177 -0
- data/lib/moult/discovery.rb +38 -0
- data/lib/moult/duplication/confidence.rb +92 -0
- data/lib/moult/duplication.rb +112 -0
- data/lib/moult/duplication_report.rb +89 -0
- data/lib/moult/flag_scanner.rb +150 -0
- data/lib/moult/flags/classification.rb +79 -0
- data/lib/moult/flags/snapshot.rb +162 -0
- data/lib/moult/flags/staleness.rb +145 -0
- data/lib/moult/flags.rb +131 -0
- data/lib/moult/flags_report.rb +136 -0
- data/lib/moult/formatters/boundaries_json.rb +20 -0
- data/lib/moult/formatters/boundaries_table.rb +53 -0
- data/lib/moult/formatters/coverage_json.rb +19 -0
- data/lib/moult/formatters/coverage_table.rb +60 -0
- data/lib/moult/formatters/dead_code_json.rb +20 -0
- data/lib/moult/formatters/dead_code_table.rb +66 -0
- data/lib/moult/formatters/duplication_json.rb +20 -0
- data/lib/moult/formatters/duplication_table.rb +55 -0
- data/lib/moult/formatters/flags_json.rb +20 -0
- data/lib/moult/formatters/flags_table.rb +76 -0
- data/lib/moult/formatters/gate_github.rb +52 -0
- data/lib/moult/formatters/gate_json.rb +20 -0
- data/lib/moult/formatters/gate_message.rb +19 -0
- data/lib/moult/formatters/gate_sarif.rb +78 -0
- data/lib/moult/formatters/gate_table.rb +71 -0
- data/lib/moult/formatters/health_json.rb +20 -0
- data/lib/moult/formatters/health_table.rb +80 -0
- data/lib/moult/formatters/json.rb +23 -0
- data/lib/moult/formatters/table.rb +70 -0
- data/lib/moult/formatters/text_table.rb +39 -0
- data/lib/moult/gate/config.rb +55 -0
- data/lib/moult/gate/evaluation.rb +172 -0
- data/lib/moult/gate/policy.rb +103 -0
- data/lib/moult/gate.rb +199 -0
- data/lib/moult/gate_report.rb +97 -0
- data/lib/moult/git.rb +83 -0
- data/lib/moult/health/score.rb +291 -0
- data/lib/moult/health.rb +320 -0
- data/lib/moult/health_report.rb +97 -0
- data/lib/moult/index.rb +228 -0
- data/lib/moult/parser.rb +101 -0
- data/lib/moult/rails_conventions.rb +124 -0
- data/lib/moult/report.rb +114 -0
- data/lib/moult/scoring.rb +82 -0
- data/lib/moult/span.rb +17 -0
- data/lib/moult/symbol_id.rb +30 -0
- data/lib/moult/symbol_scanner.rb +100 -0
- data/lib/moult/version.rb +5 -0
- data/lib/moult.rb +84 -0
- data/schema/boundaries.schema.json +125 -0
- data/schema/common.schema.json +76 -0
- data/schema/coverage.schema.json +83 -0
- data/schema/deadcode.schema.json +106 -0
- data/schema/duplication.schema.json +128 -0
- data/schema/flags.schema.json +157 -0
- data/schema/gate.schema.json +165 -0
- data/schema/health.schema.json +157 -0
- data/schema/hotspots.schema.json +106 -0
- metadata +185 -0
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moult
|
|
4
|
+
module Health
|
|
5
|
+
# The pure model that turns the other analyses' signals into one composite
|
|
6
|
+
# health score. This slice's realisation of Moult's protected confidence API:
|
|
7
|
+
# it answers a deliberately humble question — *how healthy does this codebase
|
|
8
|
+
# look, given the signals we have* — and it is never a verdict. Every component
|
|
9
|
+
# records the observation behind its sub-score as a {Reason}, and the composite
|
|
10
|
+
# records which components contributed, so the number is auditable.
|
|
11
|
+
#
|
|
12
|
+
# {assess} is a pure function of small numeric inputs ({Inputs} and the per-
|
|
13
|
+
# analysis +*Input+ structs) — no IO, no report objects. That keeps it trivially
|
|
14
|
+
# unit-testable and lets the scoring be pinned against hand-built inputs: drift
|
|
15
|
+
# is a bug, the same treatment {ABC}, the coverage {Resolver}, and the
|
|
16
|
+
# duplication {Confidence} model get.
|
|
17
|
+
#
|
|
18
|
+
# The single inversion to keep in mind: the four input analyses all score
|
|
19
|
+
# *badness* (higher = worse). Health scores *goodness* (1.0 = healthy). Every
|
|
20
|
+
# normalisation converts a bounded badness ratio b in [0, 1] to a health
|
|
21
|
+
# sub-score via {health_from_badness} — the one audited inversion point.
|
|
22
|
+
module Score
|
|
23
|
+
# ---- pinned weights -----------------------------------------------------
|
|
24
|
+
# Static weight of each built-in component; they sum to 1.0 and are
|
|
25
|
+
# renormalised over whatever components are actually present. Complexity
|
|
26
|
+
# anchors the composite — it is the only signal that means something with no
|
|
27
|
+
# git history and no coverage. Coverage and dead code tie: both are strong
|
|
28
|
+
# "is this code used" signals but each is conditional. Duplication is the
|
|
29
|
+
# softest health signal (sometimes deliberate), so it gets the smallest share.
|
|
30
|
+
# Boundaries (conditional: only packwerk projects) joins as a structural signal;
|
|
31
|
+
# the original four kept their RELATIVE proportions (each scaled by 0.8) so a
|
|
32
|
+
# repo without boundaries scores and renormalises exactly as before.
|
|
33
|
+
WEIGHTS = {
|
|
34
|
+
"complexity" => 0.24,
|
|
35
|
+
"dead_code" => 0.20,
|
|
36
|
+
"duplication" => 0.16,
|
|
37
|
+
"coverage" => 0.20,
|
|
38
|
+
"boundaries" => 0.20
|
|
39
|
+
}.freeze
|
|
40
|
+
|
|
41
|
+
# ---- pinned grade thresholds (inclusive lower bounds on the composite) ---
|
|
42
|
+
# Letter grades on a normalised score follow the conventions of established
|
|
43
|
+
# code-health tools (Code Climate's A–F maintainability grade, SonarQube's
|
|
44
|
+
# A–E maintainability rating, CodeScene's 1–10 Code Health). The density/ratio
|
|
45
|
+
# normalisation below mirrors SonarQube's debt-RATIO approach (debt relative
|
|
46
|
+
# to size) rather than absolute counts. NOTE: the knees and weights here are
|
|
47
|
+
# v1 judgement-based heuristics chosen for sane, monotonic behaviour — they
|
|
48
|
+
# are NOT yet calibrated against a real-world baseline corpus the way CodeScene
|
|
49
|
+
# calibrates its factors; corpus calibration is deliberate future work. They
|
|
50
|
+
# are pinned so the SIGNAL is deterministic and auditable; treat drift as a bug.
|
|
51
|
+
GRADE_THRESHOLDS = [
|
|
52
|
+
["A", 0.90],
|
|
53
|
+
["B", 0.80],
|
|
54
|
+
["C", 0.70],
|
|
55
|
+
["D", 0.60],
|
|
56
|
+
["F", 0.0]
|
|
57
|
+
].freeze
|
|
58
|
+
|
|
59
|
+
# ---- pinned complexity normalisation ------------------------------------
|
|
60
|
+
# Health falls linearly as the MEAN per-file risk approaches a knee. Averaging
|
|
61
|
+
# over files already dilutes single outliers, so a plain ratio (à la SonarQube's
|
|
62
|
+
# debt ratio) is honest and predictable — no extra log compression, which would
|
|
63
|
+
# double-penalise moderate code.
|
|
64
|
+
COMPLEXITY_CHURN_KNEE = 300.0 # mean complexity*churn per file at which health hits the floor
|
|
65
|
+
COMPLEXITY_ONLY_KNEE = 150.0 # mean summed-ABC per file at which health hits the floor (no churn signal)
|
|
66
|
+
COMPLEXITY_FLOOR = 0.30 # complexity alone is a soft signal: never reads as 0.0 catastrophic
|
|
67
|
+
|
|
68
|
+
# ---- pinned dead-code normalisation -------------------------------------
|
|
69
|
+
DEADCODE_DENSITY_KNEE = 0.12 # confidence-weighted dead density at which health hits 0
|
|
70
|
+
DEADCODE_UNRESOLVED_CAP = 0.95 # an unresolved index cannot certify perfect health
|
|
71
|
+
|
|
72
|
+
# ---- pinned duplication normalisation -----------------------------------
|
|
73
|
+
DUPLICATION_BURDEN_KNEE = 40.0 # confidence-weighted duplicated mass per file at which health hits 0
|
|
74
|
+
|
|
75
|
+
# ---- pinned boundaries normalisation ------------------------------------
|
|
76
|
+
BOUNDARY_BURDEN_KNEE = 4.0 # severity-weighted boundary violations per file at which health hits 0
|
|
77
|
+
|
|
78
|
+
# One auditable observation behind a sub-score. Mirrors the rule/.../detail
|
|
79
|
+
# reason shape used across Moult, but health sub-scores are RATIOS not signed
|
|
80
|
+
# delta-sums, so it carries the observed +value+ (a [0, 1] quantity) rather
|
|
81
|
+
# than a +delta+. Kept local so the health slice does not couple to the
|
|
82
|
+
# dead-code or duplication Reason structs.
|
|
83
|
+
Reason = Struct.new(:rule, :value, :detail) do
|
|
84
|
+
def to_h
|
|
85
|
+
{rule: rule.to_s, value: value, detail: detail}
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# A graded component: a health sub-score in [0, 1] (1.0 = healthy), the stats
|
|
90
|
+
# backing it, and the reasons behind it.
|
|
91
|
+
Component = Struct.new(:name, :category, :score, :stats, :reasons)
|
|
92
|
+
|
|
93
|
+
# The whole-codebase (or per-file) result: the composite + the present
|
|
94
|
+
# components. +score+/+grade+ are nil only when every component is absent.
|
|
95
|
+
Composite = Struct.new(:score, :grade, :components)
|
|
96
|
+
|
|
97
|
+
# IO-free numeric inputs. The orchestrator extracts one of each from the
|
|
98
|
+
# matching analysis report; the model never sees a report object. A nil slot
|
|
99
|
+
# means the analysis was absent or errored — it is dropped from the composite.
|
|
100
|
+
Inputs = Struct.new(:complexity, :dead_code, :duplication, :coverage, :boundaries)
|
|
101
|
+
|
|
102
|
+
ComplexityInput = Struct.new(:file_count, :total_complexity, :total_score, :churn_present)
|
|
103
|
+
DeadCodeInput = Struct.new(:symbol_count, :confidence_sum, :finding_count, :resolved)
|
|
104
|
+
DuplicationInput = Struct.new(:file_count, :weighted_dup_mass, :set_count)
|
|
105
|
+
CoverageInput = Struct.new(:hot, :cold)
|
|
106
|
+
BoundariesInput = Struct.new(:file_count, :weighted_violations, :violation_count)
|
|
107
|
+
|
|
108
|
+
module_function
|
|
109
|
+
|
|
110
|
+
# @param inputs [Inputs]
|
|
111
|
+
# @return [Composite]
|
|
112
|
+
def assess(inputs)
|
|
113
|
+
components = [
|
|
114
|
+
complexity_component(inputs.complexity),
|
|
115
|
+
dead_code_component(inputs.dead_code),
|
|
116
|
+
duplication_component(inputs.duplication),
|
|
117
|
+
coverage_component(inputs.coverage),
|
|
118
|
+
boundaries_component(inputs.boundaries)
|
|
119
|
+
].compact
|
|
120
|
+
|
|
121
|
+
return Composite.new(score: nil, grade: nil, components: []) if components.empty?
|
|
122
|
+
|
|
123
|
+
total_weight = components.sum { |c| WEIGHTS.fetch(c.name) }
|
|
124
|
+
weighted = components.sum { |c| c.score * WEIGHTS.fetch(c.name) }
|
|
125
|
+
overall = (weighted / total_weight).round(2)
|
|
126
|
+
|
|
127
|
+
Composite.new(score: overall, grade: grade_for(overall), components: components)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# @param score [Float] composite in [0, 1]
|
|
131
|
+
# @return [String] letter grade
|
|
132
|
+
def grade_for(score)
|
|
133
|
+
GRADE_THRESHOLDS.find { |(_, low)| score >= low }.first
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# The renormalised share a present component carried of the composite.
|
|
137
|
+
# @param name [String] component name
|
|
138
|
+
# @param present_names [Array<String>] names of the components that contributed
|
|
139
|
+
# @return [Float]
|
|
140
|
+
def normalized_weight(name, present_names)
|
|
141
|
+
total = present_names.sum { |n| WEIGHTS.fetch(n) }
|
|
142
|
+
return 0.0 if total.zero?
|
|
143
|
+
(WEIGHTS.fetch(name) / total).round(4)
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Convert a bounded badness ratio to a health sub-score, applying an optional
|
|
147
|
+
# floor so a soft signal never reads as catastrophic 0.0.
|
|
148
|
+
# @param badness [Float] in [0, 1] (clamped); higher = worse
|
|
149
|
+
# @param floor [Float] lowest the sub-score may reach
|
|
150
|
+
# @return [Float] rounded to 2 decimals
|
|
151
|
+
def health_from_badness(badness, floor: 0.0)
|
|
152
|
+
b = badness.clamp(0.0, 1.0)
|
|
153
|
+
((1.0 - b) * (1.0 - floor) + floor).clamp(0.0, 1.0).round(2)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# @param input [ComplexityInput, nil]
|
|
157
|
+
# @return [Component, nil]
|
|
158
|
+
def complexity_component(input)
|
|
159
|
+
return nil unless input
|
|
160
|
+
return healthy_by_absence("complexity", "no methods with complexity to score") if input.file_count.to_i.zero?
|
|
161
|
+
|
|
162
|
+
if input.churn_present
|
|
163
|
+
mean_risk = input.total_score / input.file_count.to_f
|
|
164
|
+
badness = mean_risk / COMPLEXITY_CHURN_KNEE
|
|
165
|
+
reason = Reason.new(rule: :complexity_churn_density, value: nil,
|
|
166
|
+
detail: "mean complexity*churn per file #{mean_risk.round(1)} vs knee #{COMPLEXITY_CHURN_KNEE}")
|
|
167
|
+
else
|
|
168
|
+
mean_cx = input.total_complexity / input.file_count.to_f
|
|
169
|
+
badness = mean_cx / COMPLEXITY_ONLY_KNEE
|
|
170
|
+
reason = Reason.new(rule: :complexity_only_density, value: nil,
|
|
171
|
+
detail: "no churn signal; mean ABC per file #{mean_cx.round(1)} vs knee #{COMPLEXITY_ONLY_KNEE}")
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
score = health_from_badness(badness, floor: COMPLEXITY_FLOOR)
|
|
175
|
+
reason.value = score
|
|
176
|
+
Component.new(
|
|
177
|
+
name: "complexity", category: "complexity", score: score,
|
|
178
|
+
stats: {
|
|
179
|
+
file_count: input.file_count,
|
|
180
|
+
mean_complexity: (input.total_complexity / input.file_count.to_f).round(2),
|
|
181
|
+
churn_present: input.churn_present
|
|
182
|
+
},
|
|
183
|
+
reasons: [reason]
|
|
184
|
+
)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# @param input [DeadCodeInput, nil]
|
|
188
|
+
# @return [Component, nil]
|
|
189
|
+
def dead_code_component(input)
|
|
190
|
+
return nil unless input
|
|
191
|
+
return healthy_by_absence("dead_code", "no symbols to score") if input.symbol_count.to_i.zero?
|
|
192
|
+
|
|
193
|
+
density = input.confidence_sum / input.symbol_count.to_f
|
|
194
|
+
score = health_from_badness(density / DEADCODE_DENSITY_KNEE)
|
|
195
|
+
reasons = [Reason.new(rule: :dead_density, value: score,
|
|
196
|
+
detail: "confidence-weighted dead density #{density.round(4)} vs knee #{DEADCODE_DENSITY_KNEE} " \
|
|
197
|
+
"(#{input.finding_count} candidates / #{input.symbol_count} symbols)")]
|
|
198
|
+
|
|
199
|
+
unless input.resolved
|
|
200
|
+
capped = [score, DEADCODE_UNRESOLVED_CAP].min
|
|
201
|
+
if capped < score
|
|
202
|
+
score = capped
|
|
203
|
+
reasons << Reason.new(rule: :index_unresolved, value: score,
|
|
204
|
+
detail: "index did not fully resolve; capped at #{DEADCODE_UNRESOLVED_CAP}")
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
Component.new(
|
|
209
|
+
name: "dead_code", category: "dead_code", score: score,
|
|
210
|
+
stats: {
|
|
211
|
+
symbol_count: input.symbol_count,
|
|
212
|
+
candidate_count: input.finding_count,
|
|
213
|
+
confidence_sum: input.confidence_sum.round(2),
|
|
214
|
+
resolved: input.resolved
|
|
215
|
+
},
|
|
216
|
+
reasons: reasons
|
|
217
|
+
)
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# @param input [DuplicationInput, nil]
|
|
221
|
+
# @return [Component, nil]
|
|
222
|
+
def duplication_component(input)
|
|
223
|
+
return nil unless input
|
|
224
|
+
return healthy_by_absence("duplication", "no files to score") if input.file_count.to_i.zero?
|
|
225
|
+
|
|
226
|
+
burden = input.weighted_dup_mass / input.file_count.to_f
|
|
227
|
+
score = health_from_badness(burden / DUPLICATION_BURDEN_KNEE)
|
|
228
|
+
Component.new(
|
|
229
|
+
name: "duplication", category: "duplication", score: score,
|
|
230
|
+
stats: {
|
|
231
|
+
file_count: input.file_count,
|
|
232
|
+
weighted_dup_mass: input.weighted_dup_mass.round(1),
|
|
233
|
+
clone_sets: input.set_count
|
|
234
|
+
},
|
|
235
|
+
reasons: [Reason.new(rule: :duplication_burden, value: score,
|
|
236
|
+
detail: "confidence-weighted duplicated mass per file #{burden.round(2)} vs knee #{DUPLICATION_BURDEN_KNEE} " \
|
|
237
|
+
"(#{input.set_count} clone sets)")]
|
|
238
|
+
)
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# @param input [CoverageInput, nil]
|
|
242
|
+
# @return [Component, nil]
|
|
243
|
+
def coverage_component(input)
|
|
244
|
+
return nil unless input
|
|
245
|
+
tracked = input.hot.to_i + input.cold.to_i
|
|
246
|
+
# untracked is deliberately NOT in the denominator: it is no signal, so it
|
|
247
|
+
# must never count as either healthy or unhealthy.
|
|
248
|
+
return healthy_by_absence("coverage", "no tracked symbols (untracked carries no signal)") if tracked.zero?
|
|
249
|
+
|
|
250
|
+
cold_ratio = input.cold / tracked.to_f
|
|
251
|
+
score = health_from_badness(cold_ratio)
|
|
252
|
+
Component.new(
|
|
253
|
+
name: "coverage", category: "coverage", score: score,
|
|
254
|
+
stats: {hot: input.hot, cold: input.cold, tracked: tracked},
|
|
255
|
+
reasons: [Reason.new(rule: :cold_ratio, value: score,
|
|
256
|
+
detail: "#{input.cold} cold of #{tracked} tracked symbols (untracked excluded)")]
|
|
257
|
+
)
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
# @param input [BoundariesInput, nil]
|
|
261
|
+
# @return [Component, nil]
|
|
262
|
+
def boundaries_component(input)
|
|
263
|
+
return nil unless input
|
|
264
|
+
return healthy_by_absence("boundaries", "no files to score") if input.file_count.to_i.zero?
|
|
265
|
+
|
|
266
|
+
burden = input.weighted_violations / input.file_count.to_f
|
|
267
|
+
score = health_from_badness(burden / BOUNDARY_BURDEN_KNEE)
|
|
268
|
+
Component.new(
|
|
269
|
+
name: "boundaries", category: "architecture_boundary", score: score,
|
|
270
|
+
stats: {
|
|
271
|
+
file_count: input.file_count,
|
|
272
|
+
weighted_violations: input.weighted_violations.round(2),
|
|
273
|
+
violation_count: input.violation_count
|
|
274
|
+
},
|
|
275
|
+
reasons: [Reason.new(rule: :boundary_burden, value: score,
|
|
276
|
+
detail: "severity-weighted boundary violations per file #{burden.round(3)} vs knee #{BOUNDARY_BURDEN_KNEE} " \
|
|
277
|
+
"(#{input.violation_count} violations)")]
|
|
278
|
+
)
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
# A present component that is vacuously healthy because it had nothing to
|
|
282
|
+
# score — distinct from an absent (nil) component, and it says why.
|
|
283
|
+
def healthy_by_absence(name, detail)
|
|
284
|
+
Component.new(
|
|
285
|
+
name: name, category: name, score: 1.0, stats: {},
|
|
286
|
+
reasons: [Reason.new(rule: :no_signal, value: 1.0, detail: detail)]
|
|
287
|
+
)
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
end
|
|
291
|
+
end
|
data/lib/moult/health.rb
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moult
|
|
4
|
+
# Orchestrates the health score: it runs each existing analysis, extracts the
|
|
5
|
+
# numeric signals each one exposes, and composes them through the pure
|
|
6
|
+
# {Health::Score} model into one auditable {HealthReport}. There is no external
|
|
7
|
+
# tool here — the "adapter" is this composition of Moult's own reports.
|
|
8
|
+
#
|
|
9
|
+
# This is the only layer that does IO and knows how the signals are sourced;
|
|
10
|
+
# {Score} stays a pure function of the extracted numbers so it can be pinned in
|
|
11
|
+
# isolation. Every analysis is run inside its own rescue: a failure degrades that
|
|
12
|
+
# one component to `present: false` with a diagnostic, never crashing the whole
|
|
13
|
+
# health run.
|
|
14
|
+
module Health
|
|
15
|
+
# Fixed component order, so output is stable and every slot is accounted for
|
|
16
|
+
# (present, skipped, or errored).
|
|
17
|
+
KNOWN_COMPONENTS = %w[complexity dead_code duplication coverage boundaries].freeze
|
|
18
|
+
|
|
19
|
+
# Cap on join keys serialized per file, so the roll-up cannot balloon on a
|
|
20
|
+
# large file; the true total is recorded alongside.
|
|
21
|
+
SYMBOLS_PER_FILE = 20
|
|
22
|
+
|
|
23
|
+
# The outcome of one isolated analysis run.
|
|
24
|
+
Run = Struct.new(:value, :error) do
|
|
25
|
+
def ok?
|
|
26
|
+
error.nil? && !value.nil?
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
module_function
|
|
31
|
+
|
|
32
|
+
# @param root [String] absolute analysis root
|
|
33
|
+
# @param files [Array<String>] absolute Ruby file paths to analyse
|
|
34
|
+
# @param index [Index] resolved definition/reference index (drives dead-code + coverage)
|
|
35
|
+
# @param rails [RailsConventions] Rails entrypoint awareness for dead-code
|
|
36
|
+
# @param coverage [Coverage::Dataset, nil] runtime coverage to merge (adds the coverage component)
|
|
37
|
+
# @param since [String, nil] churn window start for the complexity component
|
|
38
|
+
# @return [HealthReport]
|
|
39
|
+
def build_report(root:, files:, index:, rails:, coverage: nil, since: nil,
|
|
40
|
+
git_ref: nil, generated_at: nil, churn_window: nil, churn_since: nil)
|
|
41
|
+
churn = Churn.collect(root: root, since: since || Churn::DEFAULT_SINCE)
|
|
42
|
+
|
|
43
|
+
runs = {
|
|
44
|
+
"complexity" => run { Scoring.build_report(root: root, files: files, churn: churn) },
|
|
45
|
+
"dead_code" => run { DeadCode.build_report(root: root, files: files, index: index, rails: rails, coverage: coverage) },
|
|
46
|
+
"duplication" => run { Duplication.build_report(root: root, files: files) },
|
|
47
|
+
"coverage" => run { coverage ? CoverageReport.build(index: index, coverage: coverage, root: root) : nil },
|
|
48
|
+
"boundaries" => run { Boundaries.build_report(root: root) }
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
# Derive churn presence from the JOINED hotspots, not the raw churn hash: a
|
|
52
|
+
# repo-relative churn map run against a subdir root won't join to the scored
|
|
53
|
+
# files, so the honest signal is "did any scored file actually carry churn".
|
|
54
|
+
churn_present = runs["complexity"].ok? &&
|
|
55
|
+
runs["complexity"].value.hotspots.any? { |h| h.churn.to_i.positive? }
|
|
56
|
+
|
|
57
|
+
inputs = Score::Inputs.new(
|
|
58
|
+
complexity: runs["complexity"].ok? ? complexity_input(runs["complexity"].value, churn_present) : nil,
|
|
59
|
+
dead_code: runs["dead_code"].ok? ? dead_code_input(runs["dead_code"].value, index) : nil,
|
|
60
|
+
duplication: runs["duplication"].ok? ? duplication_input(runs["duplication"].value, files.size) : nil,
|
|
61
|
+
coverage: runs["coverage"].ok? ? coverage_input(runs["coverage"].value) : nil,
|
|
62
|
+
# Absent (skipped) unless the project is actually packwerk-configured: an
|
|
63
|
+
# unconfigured repo has no boundary signal and must not read as healthy 1.0.
|
|
64
|
+
boundaries: boundaries_present?(runs["boundaries"]) ? boundaries_input(runs["boundaries"].value, files.size) : nil
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
composite = Score.assess(inputs)
|
|
68
|
+
components = component_views(composite, runs, coverage_requested: !coverage.nil?)
|
|
69
|
+
files_view = file_rollup(runs, index, churn_present)
|
|
70
|
+
|
|
71
|
+
HealthReport.new(
|
|
72
|
+
root: root,
|
|
73
|
+
score: composite.score,
|
|
74
|
+
grade: composite.grade,
|
|
75
|
+
components: components,
|
|
76
|
+
files: files_view,
|
|
77
|
+
git_ref: git_ref,
|
|
78
|
+
generated_at: generated_at,
|
|
79
|
+
coverage_source: coverage&.source,
|
|
80
|
+
churn_window: churn_window,
|
|
81
|
+
churn_since: churn_since
|
|
82
|
+
)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Run one analysis in isolation: success carries the report, any failure
|
|
86
|
+
# carries the message so the component degrades rather than the whole run.
|
|
87
|
+
def run
|
|
88
|
+
Run.new(value: yield, error: nil)
|
|
89
|
+
rescue => e
|
|
90
|
+
Run.new(value: nil, error: e.message)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# ---- signal extraction (heavy report -> pure numeric input) ---------------
|
|
94
|
+
|
|
95
|
+
def complexity_input(report, churn_present)
|
|
96
|
+
hs = report.hotspots
|
|
97
|
+
Score::ComplexityInput.new(
|
|
98
|
+
file_count: hs.size,
|
|
99
|
+
total_complexity: hs.sum(&:complexity),
|
|
100
|
+
total_score: hs.sum(&:score),
|
|
101
|
+
churn_present: churn_present
|
|
102
|
+
)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def dead_code_input(report, index)
|
|
106
|
+
Score::DeadCodeInput.new(
|
|
107
|
+
symbol_count: index.definitions.size,
|
|
108
|
+
confidence_sum: report.findings.sum(&:confidence),
|
|
109
|
+
finding_count: report.findings.size,
|
|
110
|
+
resolved: report.resolved
|
|
111
|
+
)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def duplication_input(report, file_count)
|
|
115
|
+
# Only the EXTRA copies are consolidatable mass; confidence-weight so a
|
|
116
|
+
# low-confidence "similar" rhyme barely registers.
|
|
117
|
+
weighted = report.findings.sum { |f| f.confidence * f.mass * (f.occurrences.size - 1) }
|
|
118
|
+
Score::DuplicationInput.new(
|
|
119
|
+
file_count: file_count,
|
|
120
|
+
weighted_dup_mass: weighted,
|
|
121
|
+
set_count: report.findings.size
|
|
122
|
+
)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def coverage_input(report)
|
|
126
|
+
s = report.summary
|
|
127
|
+
Score::CoverageInput.new(hot: s[:hot], cold: s[:cold])
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# A boundaries run contributes only when it ran AND the project is packwerk-
|
|
131
|
+
# configured; an unconfigured repo yields a successful-but-empty report that
|
|
132
|
+
# must be SKIPPED, not scored as vacuously healthy.
|
|
133
|
+
def boundaries_present?(run)
|
|
134
|
+
run.ok? && run.value.configured
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def boundaries_input(report, file_count)
|
|
138
|
+
Score::BoundariesInput.new(
|
|
139
|
+
file_count: file_count,
|
|
140
|
+
weighted_violations: report.findings.sum { |f| boundary_weight(f) * f.occurrences.size },
|
|
141
|
+
violation_count: report.findings.sum { |f| f.occurrences.size }
|
|
142
|
+
)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def boundary_weight(finding)
|
|
146
|
+
Boundaries::Severity::SEVERITY_WEIGHT.fetch(finding.severity, Boundaries::Severity::SEVERITY_WEIGHT.fetch("low"))
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# ---- component views (every slot, present or not) -------------------------
|
|
150
|
+
|
|
151
|
+
def component_views(composite, runs, coverage_requested:)
|
|
152
|
+
present = composite.components.to_h { |c| [c.name, c] }
|
|
153
|
+
present_names = composite.components.map(&:name)
|
|
154
|
+
|
|
155
|
+
KNOWN_COMPONENTS.map do |name|
|
|
156
|
+
component = present[name]
|
|
157
|
+
if component
|
|
158
|
+
HealthReport::ComponentView.new(
|
|
159
|
+
name: name,
|
|
160
|
+
category: component.category,
|
|
161
|
+
present: true,
|
|
162
|
+
score: component.score,
|
|
163
|
+
weight: Score::WEIGHTS.fetch(name),
|
|
164
|
+
normalized_weight: Score.normalized_weight(name, present_names),
|
|
165
|
+
summary: component.stats,
|
|
166
|
+
reasons: component.reasons,
|
|
167
|
+
diagnostic: nil
|
|
168
|
+
)
|
|
169
|
+
else
|
|
170
|
+
HealthReport::ComponentView.new(
|
|
171
|
+
name: name,
|
|
172
|
+
category: nil,
|
|
173
|
+
present: false,
|
|
174
|
+
score: nil,
|
|
175
|
+
weight: Score::WEIGHTS.fetch(name),
|
|
176
|
+
normalized_weight: nil,
|
|
177
|
+
summary: {},
|
|
178
|
+
reasons: [],
|
|
179
|
+
diagnostic: diagnostic_for(name, runs[name], coverage_requested)
|
|
180
|
+
)
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def diagnostic_for(name, run, coverage_requested)
|
|
186
|
+
return run.error if run&.error
|
|
187
|
+
case name
|
|
188
|
+
when "coverage"
|
|
189
|
+
coverage_requested ? "coverage produced no usable signal" : "no --coverage supplied"
|
|
190
|
+
when "boundaries"
|
|
191
|
+
"not a packwerk project (no packwerk.yml)"
|
|
192
|
+
else
|
|
193
|
+
"analysis produced no result"
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# ---- per-file roll-up (the cross-analysis join surface) -------------------
|
|
198
|
+
|
|
199
|
+
def file_rollup(runs, index, churn_present)
|
|
200
|
+
hotspots = runs["complexity"].ok? ? runs["complexity"].value.hotspots.to_h { |h| [h.path, h] } : {}
|
|
201
|
+
dead = runs["dead_code"].ok? ? runs["dead_code"].value.findings.group_by(&:path) : {}
|
|
202
|
+
dup = runs["duplication"].ok? ? clones_by_path(runs["duplication"].value) : {}
|
|
203
|
+
cov = runs["coverage"].ok? ? coverage_by_path(runs["coverage"].value) : {}
|
|
204
|
+
bnd = boundaries_present?(runs["boundaries"]) ? boundaries_by_path(runs["boundaries"].value) : {}
|
|
205
|
+
symbols_per_file = index.definitions.group_by(&:path).transform_values(&:size)
|
|
206
|
+
|
|
207
|
+
paths = Set.new
|
|
208
|
+
paths.merge(hotspots.keys)
|
|
209
|
+
paths.merge(dead.keys)
|
|
210
|
+
paths.merge(dup.keys)
|
|
211
|
+
paths.merge(cov.select { |_, c| c[:cold].positive? }.keys)
|
|
212
|
+
paths.merge(bnd.keys)
|
|
213
|
+
|
|
214
|
+
views = paths.map do |path|
|
|
215
|
+
file_view(path, hotspots[path], dead[path], dup[path], cov[path], bnd[path],
|
|
216
|
+
symbols_per_file[path], churn_present)
|
|
217
|
+
end
|
|
218
|
+
# Least-healthy first, path as a deterministic tie-break.
|
|
219
|
+
views.sort_by { |v| [v.score, v.path] }
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def file_view(path, hotspot, dead_findings, clone, coverage, boundaries, symbol_count, churn_present)
|
|
223
|
+
inputs = Score::Inputs.new(
|
|
224
|
+
complexity: hotspot && Score::ComplexityInput.new(
|
|
225
|
+
file_count: 1, total_complexity: hotspot.complexity,
|
|
226
|
+
total_score: hotspot.score, churn_present: churn_present
|
|
227
|
+
),
|
|
228
|
+
dead_code: dead_findings && Score::DeadCodeInput.new(
|
|
229
|
+
symbol_count: [symbol_count.to_i, dead_findings.size].max,
|
|
230
|
+
confidence_sum: dead_findings.sum(&:confidence),
|
|
231
|
+
finding_count: dead_findings.size, resolved: true
|
|
232
|
+
),
|
|
233
|
+
duplication: clone && Score::DuplicationInput.new(
|
|
234
|
+
file_count: 1, weighted_dup_mass: clone[:weighted_mass], set_count: clone[:sets]
|
|
235
|
+
),
|
|
236
|
+
coverage: coverage && tracked?(coverage) && Score::CoverageInput.new(
|
|
237
|
+
hot: coverage[:hot], cold: coverage[:cold]
|
|
238
|
+
),
|
|
239
|
+
boundaries: boundaries && Score::BoundariesInput.new(
|
|
240
|
+
file_count: 1, weighted_violations: boundaries[:weighted], violation_count: boundaries[:count]
|
|
241
|
+
)
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
composite = Score.assess(inputs)
|
|
245
|
+
ids = file_symbol_ids(hotspot, dead_findings, clone, coverage)
|
|
246
|
+
HealthReport::FileView.new(
|
|
247
|
+
path: path,
|
|
248
|
+
score: composite.score,
|
|
249
|
+
grade: composite.grade,
|
|
250
|
+
components: composite.components.to_h { |c| [c.name, c.score] },
|
|
251
|
+
symbol_ids: ids.first(SYMBOLS_PER_FILE),
|
|
252
|
+
symbol_count: ids.size
|
|
253
|
+
)
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
# Contributing join keys for a file, dead-finding / clone / coverage / hotspot
|
|
257
|
+
# in that order, de-duplicated.
|
|
258
|
+
def file_symbol_ids(hotspot, dead_findings, clone, coverage)
|
|
259
|
+
ids = []
|
|
260
|
+
ids.concat(dead_findings.map(&:symbol_id)) if dead_findings
|
|
261
|
+
ids.concat(clone[:symbol_ids]) if clone
|
|
262
|
+
ids.concat(coverage[:cold_ids]) if coverage
|
|
263
|
+
ids.concat(hotspot.methods.map(&:symbol_id)) if hotspot
|
|
264
|
+
ids.compact.uniq
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def tracked?(coverage)
|
|
268
|
+
(coverage[:hot] + coverage[:cold]).positive?
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# path => {weighted_mass:, sets:, symbol_ids:} from clone occurrences in that file.
|
|
272
|
+
def clones_by_path(report)
|
|
273
|
+
acc = Hash.new { |h, k| h[k] = {weighted_mass: 0.0, sets: 0, symbol_ids: []} }
|
|
274
|
+
report.findings.each do |finding|
|
|
275
|
+
finding.occurrences.each do |occ|
|
|
276
|
+
bucket = acc[occ.path]
|
|
277
|
+
bucket[:weighted_mass] += finding.confidence * finding.mass
|
|
278
|
+
bucket[:sets] += 1
|
|
279
|
+
bucket[:symbol_ids] << occ.symbol_id if occ.symbol_id
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
acc.default_proc = nil # so later missing-key reads return nil instead of mutating
|
|
283
|
+
acc
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
# path => {weighted:, count:} from boundary-violation occurrences in that file.
|
|
287
|
+
# Boundary occurrences carry a null symbol_id (file-keyed), so they contribute to
|
|
288
|
+
# the per-file roll-up at PATH granularity only — never to file_symbol_ids.
|
|
289
|
+
def boundaries_by_path(report)
|
|
290
|
+
acc = Hash.new { |h, k| h[k] = {weighted: 0.0, count: 0} }
|
|
291
|
+
report.findings.each do |finding|
|
|
292
|
+
weight = boundary_weight(finding)
|
|
293
|
+
finding.occurrences.each do |occ|
|
|
294
|
+
acc[occ.path][:weighted] += weight
|
|
295
|
+
acc[occ.path][:count] += 1
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
acc.default_proc = nil # so later missing-key reads return nil instead of mutating
|
|
299
|
+
acc
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
# path => {hot:, cold:, cold_ids:} from coverage entries (path parsed from symbol_id).
|
|
303
|
+
def coverage_by_path(report)
|
|
304
|
+
acc = Hash.new { |h, k| h[k] = {hot: 0, cold: 0, cold_ids: []} }
|
|
305
|
+
report.entries.each do |entry|
|
|
306
|
+
path = entry.symbol_id.split(":", 3).first
|
|
307
|
+
case entry.runtime
|
|
308
|
+
when :hot then acc[path][:hot] += 1
|
|
309
|
+
when :cold
|
|
310
|
+
acc[path][:cold] += 1
|
|
311
|
+
acc[path][:cold_ids] << entry.symbol_id
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
acc.default_proc = nil # so later missing-key reads return nil instead of mutating
|
|
315
|
+
acc
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
require_relative "health/score"
|