@pseolint/core 0.6.3 → 0.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -0
- package/dist/ai/tools/fetch-sitemap.js +2 -1
- package/dist/ai/tools/fetch-sitemap.js.map +1 -1
- package/dist/auditor.d.ts +2 -1
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +324 -79
- package/dist/auditor.js.map +1 -1
- package/dist/enrich-findings.d.ts.map +1 -1
- package/dist/enrich-findings.js +27 -5
- package/dist/enrich-findings.js.map +1 -1
- package/dist/parser.d.ts.map +1 -1
- package/dist/parser.js +17 -1
- package/dist/parser.js.map +1 -1
- package/dist/rules/content/title-uniqueness.d.ts.map +1 -1
- package/dist/rules/content/title-uniqueness.js +13 -0
- package/dist/rules/content/title-uniqueness.js.map +1 -1
- package/dist/site-classifier.d.ts.map +1 -1
- package/dist/site-classifier.js +7 -1
- package/dist/site-classifier.js.map +1 -1
- package/dist/stratified-sample.js +2 -1
- package/dist/stratified-sample.js.map +1 -1
- package/dist/types.d.ts +47 -3
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +22 -2
- package/dist/types.js.map +1 -1
- package/package.json +5 -3
- package/schemas/audit-summary.schema.json +295 -0
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://pseolint.dev/schemas/2026-06-v0.6/audit-summary.schema.json",
|
|
4
|
+
"title": "pseolint AuditSummary (JSON output contract)",
|
|
5
|
+
"description": "The stable public shape that `formatJson` (pseolint --format json) serializes. Programmatic consumers (CI gates, pseolint-gate-style scripts) should validate against this and branch on `schemaVersion`. `schemaVersion` bumps on every breaking OR additive-public output change. The engine emits additional internal/legacy fields not described here; `additionalProperties` is therefore permissive (true) so this schema pins the CONTRACT consumers gate on without rejecting forward-compatible additions.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"additionalProperties": true,
|
|
8
|
+
"required": [
|
|
9
|
+
"schemaVersion",
|
|
10
|
+
"verdict",
|
|
11
|
+
"risk",
|
|
12
|
+
"headline",
|
|
13
|
+
"categories",
|
|
14
|
+
"issues",
|
|
15
|
+
"diagnostics",
|
|
16
|
+
"pageCount"
|
|
17
|
+
],
|
|
18
|
+
"properties": {
|
|
19
|
+
"schemaVersion": {
|
|
20
|
+
"description": "Output-contract version tag. Bumps on every breaking OR additive-public output change. Consumers MUST branch on this. Equal to the `$id` version segment.",
|
|
21
|
+
"const": "2026-06-v0.6"
|
|
22
|
+
},
|
|
23
|
+
"verdict": {
|
|
24
|
+
"description": "User-facing verdict ladder, replacing the legacy numeric score as the human signal.",
|
|
25
|
+
"type": "string",
|
|
26
|
+
"enum": ["ready", "caution", "concerning", "critical"]
|
|
27
|
+
},
|
|
28
|
+
"risk": {
|
|
29
|
+
"description": "Internal numeric risk score, 0-100, lower is better. Retained for CI thresholding, trend deltas, and alert-gate diff logic. Never shown to humans. When `truncated` is true, treat as a LOWER bound.",
|
|
30
|
+
"type": "number",
|
|
31
|
+
"minimum": 0,
|
|
32
|
+
"maximum": 100
|
|
33
|
+
},
|
|
34
|
+
"headline": {
|
|
35
|
+
"description": "One-line count summary, e.g. \"3 ship-blockers, 16 should-fix\".",
|
|
36
|
+
"type": "string"
|
|
37
|
+
},
|
|
38
|
+
"categories": {
|
|
39
|
+
"description": "Per-category grade + raw issue count. Keyed by the 5 fixed category keys. NOTE: these objects carry grades/counts only, NOT the issue arrays (those live under `issues`). The `audit` category exists for completeness but is weight-0 and never affects the verdict.",
|
|
40
|
+
"type": "object",
|
|
41
|
+
"additionalProperties": false,
|
|
42
|
+
"required": ["integrity", "discoverability", "citation", "data", "audit"],
|
|
43
|
+
"properties": {
|
|
44
|
+
"integrity": { "$ref": "#/$defs/categoryGrade" },
|
|
45
|
+
"discoverability": { "$ref": "#/$defs/categoryGrade" },
|
|
46
|
+
"citation": { "$ref": "#/$defs/categoryGrade" },
|
|
47
|
+
"data": { "$ref": "#/$defs/categoryGrade" },
|
|
48
|
+
"audit": { "$ref": "#/$defs/categoryGrade" }
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
"issues": {
|
|
52
|
+
"description": "Findings bucketed by SEVERITY. This is the v0.4+ replacement for the flat `findings` array and is NOT category-keyed. To get a flat list for a gate: [...issues.blockers, ...issues.shouldFix, ...issues.informational].",
|
|
53
|
+
"type": "object",
|
|
54
|
+
"additionalProperties": false,
|
|
55
|
+
"required": ["blockers", "shouldFix", "informational"],
|
|
56
|
+
"properties": {
|
|
57
|
+
"blockers": {
|
|
58
|
+
"description": "Severity = error or critical. Must be fixed before shipping.",
|
|
59
|
+
"type": "array",
|
|
60
|
+
"items": { "$ref": "#/$defs/ruleResult" }
|
|
61
|
+
},
|
|
62
|
+
"shouldFix": {
|
|
63
|
+
"description": "Severity = warning. Should be fixed before scaling.",
|
|
64
|
+
"type": "array",
|
|
65
|
+
"items": { "$ref": "#/$defs/ruleResult" }
|
|
66
|
+
},
|
|
67
|
+
"informational": {
|
|
68
|
+
"description": "Severity = info. Tracked for trend analysis.",
|
|
69
|
+
"type": "array",
|
|
70
|
+
"items": { "$ref": "#/$defs/ruleResult" }
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
},
|
|
74
|
+
"diagnostics": {
|
|
75
|
+
"description": "Engine-internal diagnostics (origin readiness, crawl stats, engine-emitted audit/* findings). Weight 0 — never affects the verdict. Shape is intentionally loose here; consumers should not gate on diagnostics internals.",
|
|
76
|
+
"type": "object"
|
|
77
|
+
},
|
|
78
|
+
"pageCount": {
|
|
79
|
+
"description": "Number of pages audited this run. When `truncated` is true, the crawl did not complete, so treat as a LOWER bound.",
|
|
80
|
+
"type": "integer",
|
|
81
|
+
"minimum": 0
|
|
82
|
+
},
|
|
83
|
+
"templates": {
|
|
84
|
+
"description": "v0.6 per-template breakdown. Present on sites where the classifier found >=2 template clusters; an empty array (or omitted, on older blobs / tiny sites) means the per-URL findings carry everything. Additive: consumers reading only `issues`/`findings` still work.",
|
|
85
|
+
"type": "array",
|
|
86
|
+
"items": { "$ref": "#/$defs/template" }
|
|
87
|
+
},
|
|
88
|
+
"templateDetected": {
|
|
89
|
+
"description": "True when the enrichment pipeline detected template-generated content.",
|
|
90
|
+
"type": "boolean"
|
|
91
|
+
},
|
|
92
|
+
"rawFindingCount": {
|
|
93
|
+
"description": "Pre-enrichment finding count, for backward compatibility with CI scripts that counted raw findings before clustering collapsed them.",
|
|
94
|
+
"type": "integer",
|
|
95
|
+
"minimum": 0
|
|
96
|
+
},
|
|
97
|
+
"auditedUrls": {
|
|
98
|
+
"description": "URLs actually audited this run (after sampling, dedup, and policy filtering), sorted for deterministic diffs. Absent when no pages were audited.",
|
|
99
|
+
"type": "array",
|
|
100
|
+
"items": { "type": "string" }
|
|
101
|
+
},
|
|
102
|
+
"skippedUrls": {
|
|
103
|
+
"description": "v0.5+ monitoring: URLs the change-driven matrix decided to skip pre-fetch (their findings were carried forward from prior state).",
|
|
104
|
+
"type": "array",
|
|
105
|
+
"items": { "type": "string" }
|
|
106
|
+
},
|
|
107
|
+
"truncated": {
|
|
108
|
+
"description": "True when the run did NOT complete the full crawl (e.g. the backpressure watchdog aborted on a degraded origin). The report still carries whatever findings were collected before the abort. CI gates MUST treat coverage as PARTIAL: counts, `risk`, and `verdict` are LOWER bounds. Absent or false on complete runs.",
|
|
109
|
+
"type": "boolean"
|
|
110
|
+
},
|
|
111
|
+
"truncatedReason": {
|
|
112
|
+
"description": "Human-readable reason the run was truncated. Present only when `truncated` is true.",
|
|
113
|
+
"type": "string"
|
|
114
|
+
}
|
|
115
|
+
},
|
|
116
|
+
"$defs": {
|
|
117
|
+
"categoryGrade": {
|
|
118
|
+
"description": "Per-category letter grade + raw issue count.",
|
|
119
|
+
"type": "object",
|
|
120
|
+
"additionalProperties": false,
|
|
121
|
+
"required": ["grade", "issues"],
|
|
122
|
+
"properties": {
|
|
123
|
+
"grade": {
|
|
124
|
+
"description": "Letter grade for the category.",
|
|
125
|
+
"type": "string",
|
|
126
|
+
"enum": ["A", "B", "C", "D", "F"]
|
|
127
|
+
},
|
|
128
|
+
"issues": {
|
|
129
|
+
"description": "Raw count of findings contributing to this category.",
|
|
130
|
+
"type": "integer",
|
|
131
|
+
"minimum": 0
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
},
|
|
135
|
+
"ruleResult": {
|
|
136
|
+
"description": "A single rule finding. Only `ruleId`, `severity`, and `message` are guaranteed; all other fields are populated contextually by the rule and enrichment pipeline.",
|
|
137
|
+
"type": "object",
|
|
138
|
+
"additionalProperties": true,
|
|
139
|
+
"required": ["ruleId", "severity", "message"],
|
|
140
|
+
"properties": {
|
|
141
|
+
"ruleId": {
|
|
142
|
+
"description": "Stable rule identifier, e.g. \"spam/thin-content\".",
|
|
143
|
+
"type": "string"
|
|
144
|
+
},
|
|
145
|
+
"severity": {
|
|
146
|
+
"description": "Finding severity. blockers bucket = error|critical, shouldFix = warning, informational = info.",
|
|
147
|
+
"type": "string",
|
|
148
|
+
"enum": ["info", "warning", "error", "critical"]
|
|
149
|
+
},
|
|
150
|
+
"message": {
|
|
151
|
+
"description": "Human-readable description of the finding.",
|
|
152
|
+
"type": "string"
|
|
153
|
+
},
|
|
154
|
+
"fix": {
|
|
155
|
+
"description": "What to do about this finding.",
|
|
156
|
+
"type": "string"
|
|
157
|
+
},
|
|
158
|
+
"ref": {
|
|
159
|
+
"description": "Google documentation URL backing this finding.",
|
|
160
|
+
"type": "string"
|
|
161
|
+
},
|
|
162
|
+
"docsUrl": {
|
|
163
|
+
"description": "Marketing-page deeplink for this rule (https://pseolint.dev/rules/{slug}).",
|
|
164
|
+
"type": "string"
|
|
165
|
+
},
|
|
166
|
+
"pageUrl": {
|
|
167
|
+
"description": "Primary page this finding refers to, when applicable.",
|
|
168
|
+
"type": "string"
|
|
169
|
+
},
|
|
170
|
+
"relatedUrls": {
|
|
171
|
+
"description": "Other URLs involved (cluster members, related pairs).",
|
|
172
|
+
"type": "array",
|
|
173
|
+
"items": { "type": "string" }
|
|
174
|
+
},
|
|
175
|
+
"effort": {
|
|
176
|
+
"description": "Fix effort level assigned by the enrichment pipeline.",
|
|
177
|
+
"type": "string",
|
|
178
|
+
"enum": ["quick", "moderate", "structural"]
|
|
179
|
+
},
|
|
180
|
+
"confidence": {
|
|
181
|
+
"description": "Confidence in this finding. Defaults to `high` when omitted.",
|
|
182
|
+
"type": "string",
|
|
183
|
+
"enum": ["high", "medium", "low", "speculative"]
|
|
184
|
+
},
|
|
185
|
+
"template": {
|
|
186
|
+
"description": "v0.6: which template the finding belongs to. Absent for site-level findings.",
|
|
187
|
+
"type": "string"
|
|
188
|
+
},
|
|
189
|
+
"carriedForward": {
|
|
190
|
+
"description": "v0.5+: true when carried forward from a prior audit without re-verification this run.",
|
|
191
|
+
"type": "boolean"
|
|
192
|
+
},
|
|
193
|
+
"lastVerifiedAt": {
|
|
194
|
+
"description": "v0.5+: ISO timestamp of the last audit that re-fetched the page and confirmed this finding.",
|
|
195
|
+
"type": "string"
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
},
|
|
199
|
+
"template": {
|
|
200
|
+
"description": "v0.6 per-template audit breakdown.",
|
|
201
|
+
"type": "object",
|
|
202
|
+
"additionalProperties": true,
|
|
203
|
+
"required": [
|
|
204
|
+
"signature",
|
|
205
|
+
"totalUrls",
|
|
206
|
+
"totalDiscoveredUrls",
|
|
207
|
+
"auditedUrls",
|
|
208
|
+
"verdict",
|
|
209
|
+
"risk",
|
|
210
|
+
"categories",
|
|
211
|
+
"variance",
|
|
212
|
+
"findingIds"
|
|
213
|
+
],
|
|
214
|
+
"properties": {
|
|
215
|
+
"signature": {
|
|
216
|
+
"description": "Stable identifier derived from the URL pattern, e.g. \"/listing/:slug\".",
|
|
217
|
+
"type": "string"
|
|
218
|
+
},
|
|
219
|
+
"totalUrls": {
|
|
220
|
+
"description": "Cluster size in the discovered URL set (NOT the audit sample).",
|
|
221
|
+
"type": "integer",
|
|
222
|
+
"minimum": 0
|
|
223
|
+
},
|
|
224
|
+
"totalDiscoveredUrls": {
|
|
225
|
+
"description": "Total discovered URLs (denominator for coverage % calculation).",
|
|
226
|
+
"type": "integer",
|
|
227
|
+
"minimum": 0
|
|
228
|
+
},
|
|
229
|
+
"auditedUrls": {
|
|
230
|
+
"description": "URLs actually audited for this template.",
|
|
231
|
+
"type": "array",
|
|
232
|
+
"items": { "type": "string" }
|
|
233
|
+
},
|
|
234
|
+
"verdict": {
|
|
235
|
+
"description": "Per-template verdict, independent of site-level verdict.",
|
|
236
|
+
"type": "string",
|
|
237
|
+
"enum": ["ready", "caution", "concerning", "critical"]
|
|
238
|
+
},
|
|
239
|
+
"risk": {
|
|
240
|
+
"description": "Per-template risk score (0-100), independent of site-level risk.",
|
|
241
|
+
"type": "number",
|
|
242
|
+
"minimum": 0,
|
|
243
|
+
"maximum": 100
|
|
244
|
+
},
|
|
245
|
+
"categories": {
|
|
246
|
+
"description": "Per-template category grades + counts.",
|
|
247
|
+
"type": "object",
|
|
248
|
+
"additionalProperties": false,
|
|
249
|
+
"required": ["integrity", "discoverability", "citation", "data", "audit"],
|
|
250
|
+
"properties": {
|
|
251
|
+
"integrity": { "$ref": "#/$defs/categoryGrade" },
|
|
252
|
+
"discoverability": { "$ref": "#/$defs/categoryGrade" },
|
|
253
|
+
"citation": { "$ref": "#/$defs/categoryGrade" },
|
|
254
|
+
"data": { "$ref": "#/$defs/categoryGrade" },
|
|
255
|
+
"audit": { "$ref": "#/$defs/categoryGrade" }
|
|
256
|
+
}
|
|
257
|
+
},
|
|
258
|
+
"variance": {
|
|
259
|
+
"description": "Per-template fire-rate variance across the sampled pages.",
|
|
260
|
+
"type": "object",
|
|
261
|
+
"additionalProperties": true,
|
|
262
|
+
"required": ["ruleFireRates", "uniformityScore", "topDriver"],
|
|
263
|
+
"properties": {
|
|
264
|
+
"ruleFireRates": {
|
|
265
|
+
"description": "Per-rule fire-rate within the template, e.g. {\"spam/thin-content\": 0.8}.",
|
|
266
|
+
"type": "object",
|
|
267
|
+
"additionalProperties": { "type": "number" }
|
|
268
|
+
},
|
|
269
|
+
"uniformityScore": {
|
|
270
|
+
"description": "0-1, higher = more uniform quality across the template's pages.",
|
|
271
|
+
"type": "number",
|
|
272
|
+
"minimum": 0,
|
|
273
|
+
"maximum": 1
|
|
274
|
+
},
|
|
275
|
+
"topDriver": {
|
|
276
|
+
"description": "Worst-firing rule + its rate, or null when no rule fired.",
|
|
277
|
+
"type": ["object", "null"],
|
|
278
|
+
"additionalProperties": false,
|
|
279
|
+
"required": ["ruleId", "fireRate"],
|
|
280
|
+
"properties": {
|
|
281
|
+
"ruleId": { "type": "string" },
|
|
282
|
+
"fireRate": { "type": "number", "minimum": 0, "maximum": 1 }
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
},
|
|
287
|
+
"findingIds": {
|
|
288
|
+
"description": "Finding IDs whose pageUrl is in auditedUrls. Reference, not duplication.",
|
|
289
|
+
"type": "array",
|
|
290
|
+
"items": { "type": "string" }
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|