@de-otio/bibcheck 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +147 -0
  3. package/dist/cache/fs-cache.d.ts +55 -0
  4. package/dist/cache/fs-cache.d.ts.map +1 -0
  5. package/dist/cache/fs-cache.js +264 -0
  6. package/dist/cache/fs-cache.js.map +1 -0
  7. package/dist/canonical.d.ts +29 -0
  8. package/dist/canonical.d.ts.map +1 -0
  9. package/dist/canonical.js +132 -0
  10. package/dist/canonical.js.map +1 -0
  11. package/dist/check.d.ts +140 -0
  12. package/dist/check.d.ts.map +1 -0
  13. package/dist/check.js +646 -0
  14. package/dist/check.js.map +1 -0
  15. package/dist/cli.d.ts +19 -0
  16. package/dist/cli.d.ts.map +1 -0
  17. package/dist/cli.js +357 -0
  18. package/dist/cli.js.map +1 -0
  19. package/dist/config.d.ts +175 -0
  20. package/dist/config.d.ts.map +1 -0
  21. package/dist/config.js +180 -0
  22. package/dist/config.js.map +1 -0
  23. package/dist/databases/crossref.d.ts +53 -0
  24. package/dist/databases/crossref.d.ts.map +1 -0
  25. package/dist/databases/crossref.js +138 -0
  26. package/dist/databases/crossref.js.map +1 -0
  27. package/dist/databases/index.d.ts +12 -0
  28. package/dist/databases/index.d.ts.map +1 -0
  29. package/dist/databases/index.js +9 -0
  30. package/dist/databases/index.js.map +1 -0
  31. package/dist/databases/openalex.d.ts +29 -0
  32. package/dist/databases/openalex.d.ts.map +1 -0
  33. package/dist/databases/openalex.js +117 -0
  34. package/dist/databases/openalex.js.map +1 -0
  35. package/dist/databases/openlibrary.d.ts +26 -0
  36. package/dist/databases/openlibrary.d.ts.map +1 -0
  37. package/dist/databases/openlibrary.js +79 -0
  38. package/dist/databases/openlibrary.js.map +1 -0
  39. package/dist/databases/worldcat.d.ts +33 -0
  40. package/dist/databases/worldcat.d.ts.map +1 -0
  41. package/dist/databases/worldcat.js +145 -0
  42. package/dist/databases/worldcat.js.map +1 -0
  43. package/dist/doctor.d.ts +44 -0
  44. package/dist/doctor.d.ts.map +1 -0
  45. package/dist/doctor.js +386 -0
  46. package/dist/doctor.js.map +1 -0
  47. package/dist/existence.d.ts +70 -0
  48. package/dist/existence.d.ts.map +1 -0
  49. package/dist/existence.js +308 -0
  50. package/dist/existence.js.map +1 -0
  51. package/dist/http.d.ts +97 -0
  52. package/dist/http.d.ts.map +1 -0
  53. package/dist/http.js +543 -0
  54. package/dist/http.js.map +1 -0
  55. package/dist/identifiers.d.ts +44 -0
  56. package/dist/identifiers.d.ts.map +1 -0
  57. package/dist/identifiers.js +111 -0
  58. package/dist/identifiers.js.map +1 -0
  59. package/dist/index.d.ts +9 -0
  60. package/dist/index.d.ts.map +1 -0
  61. package/dist/index.js +8 -0
  62. package/dist/index.js.map +1 -0
  63. package/dist/linkage.d.ts +29 -0
  64. package/dist/linkage.d.ts.map +1 -0
  65. package/dist/linkage.js +73 -0
  66. package/dist/linkage.js.map +1 -0
  67. package/dist/markdown/blocks.d.ts +19 -0
  68. package/dist/markdown/blocks.d.ts.map +1 -0
  69. package/dist/markdown/blocks.js +69 -0
  70. package/dist/markdown/blocks.js.map +1 -0
  71. package/dist/markdown/citekeys.d.ts +22 -0
  72. package/dist/markdown/citekeys.d.ts.map +1 -0
  73. package/dist/markdown/citekeys.js +100 -0
  74. package/dist/markdown/citekeys.js.map +1 -0
  75. package/dist/markdown/glob.d.ts +18 -0
  76. package/dist/markdown/glob.d.ts.map +1 -0
  77. package/dist/markdown/glob.js +26 -0
  78. package/dist/markdown/glob.js.map +1 -0
  79. package/dist/markdown/prose.d.ts +19 -0
  80. package/dist/markdown/prose.d.ts.map +1 -0
  81. package/dist/markdown/prose.js +81 -0
  82. package/dist/markdown/prose.js.map +1 -0
  83. package/dist/output/json.d.ts +21 -0
  84. package/dist/output/json.d.ts.map +1 -0
  85. package/dist/output/json.js +24 -0
  86. package/dist/output/json.js.map +1 -0
  87. package/dist/output/markdown.d.ts +21 -0
  88. package/dist/output/markdown.d.ts.map +1 -0
  89. package/dist/output/markdown.js +194 -0
  90. package/dist/output/markdown.js.map +1 -0
  91. package/dist/output/sarif.d.ts +31 -0
  92. package/dist/output/sarif.d.ts.map +1 -0
  93. package/dist/output/sarif.js +322 -0
  94. package/dist/output/sarif.js.map +1 -0
  95. package/dist/output/text.d.ts +27 -0
  96. package/dist/output/text.d.ts.map +1 -0
  97. package/dist/output/text.js +212 -0
  98. package/dist/output/text.js.map +1 -0
  99. package/dist/phrases/load.d.ts +34 -0
  100. package/dist/phrases/load.d.ts.map +1 -0
  101. package/dist/phrases/load.js +148 -0
  102. package/dist/phrases/load.js.map +1 -0
  103. package/dist/phrases.d.ts +27 -0
  104. package/dist/phrases.d.ts.map +1 -0
  105. package/dist/phrases.js +116 -0
  106. package/dist/phrases.js.map +1 -0
  107. package/dist/schema/csl.d.ts +429 -0
  108. package/dist/schema/csl.d.ts.map +1 -0
  109. package/dist/schema/csl.js +101 -0
  110. package/dist/schema/csl.js.map +1 -0
  111. package/dist/schema/output.d.ts +1116 -0
  112. package/dist/schema/output.d.ts.map +1 -0
  113. package/dist/schema/output.js +419 -0
  114. package/dist/schema/output.js.map +1 -0
  115. package/dist/suppression.d.ts +106 -0
  116. package/dist/suppression.d.ts.map +1 -0
  117. package/dist/suppression.js +134 -0
  118. package/dist/suppression.js.map +1 -0
  119. package/dist/version.d.ts +11 -0
  120. package/dist/version.d.ts.map +1 -0
  121. package/dist/version.js +14 -0
  122. package/dist/version.js.map +1 -0
  123. package/dist/worklist.d.ts +32 -0
  124. package/dist/worklist.d.ts.map +1 -0
  125. package/dist/worklist.js +211 -0
  126. package/dist/worklist.js.map +1 -0
  127. package/package.json +82 -0
package/dist/check.js ADDED
@@ -0,0 +1,646 @@
1
+ /**
2
+ * `bibcheck check` orchestrator.
3
+ *
4
+ * Composes all five subcommands (existence, canonical, linkage, phrases,
5
+ * worklist) and assembles the top-level Output.
6
+ *
7
+ * Design notes:
8
+ * - Subcommands run sequentially for deterministic log output.
9
+ * - Each subcommand runs with its own 5-minute deadline.
10
+ * - If a subcommand throws, the error is caught, logged, and a degraded
11
+ * (error-flavored) result is emitted for that layer; the run continues.
12
+ * - The final Output is validated against OutputSchema before return.
13
+ */
14
+ import { readFile as nodeReadFile } from 'node:fs/promises';
15
+ import { fileURLToPath } from 'node:url';
16
+ import path from 'node:path';
17
+ import { isGated, parseAllowsForBibliography, } from './suppression.js';
18
+ import { OutputSchema, SCHEMA_VERSION } from './schema/output.js';
19
+ import { loadBibliography, BibliographyParseError } from './schema/csl.js';
20
+ import { loadDenylist, PhraseLoaderError } from './phrases/load.js';
21
+ import { createFsCache } from './cache/fs-cache.js';
22
+ import { createHttpClient, isPrivateApiBase } from './http.js';
23
+ import { USER_AGENT_BASE } from './version.js';
24
+ import { createCrossRefClient, createOpenAlexClient, createOpenLibraryClient, } from './databases/index.js';
25
+ import { runExistence } from './existence.js';
26
+ import { runIdentifiers } from './identifiers.js';
27
+ import { runCanonical } from './canonical.js';
28
+ import { runLinkage } from './linkage.js';
29
+ import { runPhrases } from './phrases.js';
30
+ import { runWorklist } from './worklist.js';
31
+ const noopLogger = {
32
+ info: () => undefined,
33
+ warn: () => undefined,
34
+ error: () => undefined,
35
+ };
36
+ // ---------------------------------------------------------------------------
37
+ // buildCheckDeps
38
+ // ---------------------------------------------------------------------------
39
+ export async function buildCheckDeps(opts) {
40
+ const { config, cwd, signal, userAgent, } = opts;
41
+ const logger = opts.logger ?? noopLogger;
42
+ // Load bibliography
43
+ let bibliography;
44
+ try {
45
+ bibliography = await loadBibliography({ path: config.bibliography.file, cwd });
46
+ }
47
+ catch (err) {
48
+ const message = err instanceof Error ? err.message : String(err);
49
+ logger.error('bibliography.load_failed', { error: message });
50
+ throw err instanceof BibliographyParseError ? err : new BibliographyParseError(message, err);
51
+ }
52
+ // Load phrase denylist (failures are non-fatal)
53
+ let patterns = [];
54
+ if (config.phrases.file !== null) {
55
+ try {
56
+ patterns = await loadDenylist({ path: config.phrases.file, cwd });
57
+ }
58
+ catch (err) {
59
+ const message = err instanceof Error ? err.message : String(err);
60
+ logger.warn('phrases.load_failed', { error: message });
61
+ if (!(err instanceof PhraseLoaderError)) {
62
+ logger.warn('phrases.unexpected_error', { error: message });
63
+ }
64
+ patterns = [];
65
+ }
66
+ }
67
+ // Create cache
68
+ const cache = createFsCache({
69
+ dir: path.resolve(cwd, config.cache.dir),
70
+ maxSizeMb: config.cache.max_size_mb ?? null,
71
+ });
72
+ // Create HTTP client. If the operator has explicitly pointed any DB API base
73
+ // at a private/loopback host (e.g. a local stub or mirror), honor that
74
+ // deliberate config by allowing private hosts. The per-hop SSRF guard still
75
+ // protects untrusted bibliography URLs in the default (public-API) case.
76
+ const allowPrivateHosts = isPrivateApiBase(config.apis.crossref_base) ||
77
+ isPrivateApiBase(config.apis.openalex_base) ||
78
+ isPrivateApiBase(config.apis.openlibrary_base);
79
+ const http = createHttpClient({
80
+ userAgent: userAgent ?? USER_AGENT_BASE,
81
+ defaultTimeoutMs: 10_000,
82
+ maxRetries: 2,
83
+ perOriginConcurrency: 2,
84
+ allowPrivateHosts,
85
+ });
86
+ return {
87
+ config,
88
+ cwd,
89
+ bibliography,
90
+ patterns,
91
+ http,
92
+ cache,
93
+ logger,
94
+ signal,
95
+ };
96
+ }
97
+ // ---------------------------------------------------------------------------
98
+ // Degraded result builders
99
+ // ---------------------------------------------------------------------------
100
+ function degradedExistenceLayer(message) {
101
+ return {
102
+ status: 'unverifiable',
103
+ evidence: 'unverifiable',
104
+ checkedFor: [],
105
+ notCheckedFor: ['existence', 'metadata', 'canonical-url', 'claim-support'],
106
+ checks: [{ source: 'crossref', result: 'error', evidence: { error: message } }],
107
+ error: message,
108
+ };
109
+ }
110
+ function degradedCanonicalLayer() {
111
+ return { status: 'not-applicable', url: null };
112
+ }
113
+ // ---------------------------------------------------------------------------
114
+ // Tool version
115
+ // ---------------------------------------------------------------------------
116
+ async function readPackageVersion() {
117
+ try {
118
+ const thisFile = fileURLToPath(import.meta.url);
119
+ // Traverse up from dist/ or src/ to find package.json
120
+ let dir = path.dirname(thisFile);
121
+ for (let i = 0; i < 4; i++) {
122
+ const candidate = path.join(dir, 'package.json');
123
+ try {
124
+ const raw = await nodeReadFile(candidate, 'utf-8');
125
+ const parsed = JSON.parse(raw);
126
+ if (typeof parsed.version === 'string')
127
+ return parsed.version;
128
+ }
129
+ catch {
130
+ // not found at this level
131
+ }
132
+ dir = path.dirname(dir);
133
+ }
134
+ }
135
+ catch {
136
+ // ignore
137
+ }
138
+ return '0.0.0';
139
+ }
140
+ // ---------------------------------------------------------------------------
141
+ // runCheck
142
+ // ---------------------------------------------------------------------------
143
+ /**
144
+ * Orchestrates all five subcommands and assembles the validated Output.
145
+ *
146
+ * Subcommands run sequentially. Each has a 5-minute deadline via
147
+ * `AbortSignal.any([deps.signal, AbortSignal.timeout(300_000)])`.
148
+ *
149
+ * If a subcommand throws (including timeout), the error is caught, logged,
150
+ * and a degraded result is emitted for that layer. The run continues with
151
+ * remaining subcommands.
152
+ */
153
+ export async function runCheck(deps) {
154
+ const { config, cwd, bibliography, patterns, http, cache, logger, signal, skip, readFile = (p) => nodeReadFile(p, 'utf-8'), _runExistence: doRunExistence = runExistence, _runCanonical: doRunCanonical = runCanonical, _runLinkage: doRunLinkage = runLinkage, _runPhrases: doRunPhrases = runPhrases, _runWorklist: doRunWorklist = runWorklist, } = deps;
155
+ const SUBCOMMAND_TIMEOUT_MS = 300_000; // 5 minutes
156
+ function subSignal() {
157
+ return AbortSignal.any([signal, AbortSignal.timeout(SUBCOMMAND_TIMEOUT_MS)]);
158
+ }
159
+ // Per-entry maps: citekey → layer result
160
+ const identifiersMap = new Map();
161
+ const existenceMap = new Map();
162
+ const canonicalMap = new Map();
163
+ // Pre-populate maps with null (skipped) for all bibliography entries
164
+ for (const entry of bibliography) {
165
+ identifiersMap.set(entry.citekey, null);
166
+ existenceMap.set(entry.citekey, null);
167
+ canonicalMap.set(entry.citekey, null);
168
+ }
169
+ // --- identifiers (Layer 0: pure, local, pre-network well-formedness) ---
170
+ // Always run when existence runs: a malformed/bad-checksum identifier is a
171
+ // cheap fabrication signal that both gates (summary.malformedIdentifiers)
172
+ // and short-circuits the network existence call. Skipped only when the
173
+ // existence layer itself is skipped (no point validating ids we won't use).
174
+ const identifierInvalid = new Set();
175
+ if (!skip?.has('existence')) {
176
+ const idResult = runIdentifiers({ bibliography });
177
+ for (const e of idResult.entries) {
178
+ identifiersMap.set(e.citekey, e.identifiers);
179
+ const ids = e.identifiers;
180
+ // A DOI/ISBN that is present but malformed/bad-checksum cannot be looked
181
+ // up. (A bad URL does not block existence — existence keys off DOI/ISBN/
182
+ // title — but still counts toward malformedIdentifiers in the summary.)
183
+ if (ids.doi === 'malformed' ||
184
+ ids.isbn === 'malformed' ||
185
+ ids.isbn === 'bad-checksum') {
186
+ identifierInvalid.add(e.citekey);
187
+ }
188
+ }
189
+ }
190
+ // --- existence ---
191
+ if (!skip?.has('existence')) {
192
+ try {
193
+ const crossref = createCrossRefClient({
194
+ http,
195
+ cache,
196
+ mailto: config.apis.crossref_mailto ?? undefined,
197
+ baseUrl: config.apis.crossref_base,
198
+ });
199
+ const openalex = createOpenAlexClient({
200
+ http,
201
+ cache,
202
+ mailto: config.apis.openalex_mailto ?? undefined,
203
+ baseUrl: config.apis.openalex_base,
204
+ });
205
+ const openlibrary = createOpenLibraryClient({
206
+ http,
207
+ cache,
208
+ baseUrl: config.apis.openlibrary_base,
209
+ });
210
+ const existenceDeps = {
211
+ bibliography,
212
+ clients: { crossref, openalex, openlibrary },
213
+ identifierInvalid,
214
+ signal: subSignal(),
215
+ };
216
+ const result = await doRunExistence(existenceDeps);
217
+ for (const e of result.entries) {
218
+ existenceMap.set(e.citekey, e.existence);
219
+ }
220
+ // Surface transport failures explicitly. An entry whose existence checks
221
+ // are *all* transport errors (DNS/connect failure, 5xx after retries)
222
+ // must not be silently treated as a clean "unverifiable" pass. We emit a
223
+ // clear, actionable top-level message so the failure is not masked as
224
+ // success. Entries deliberately skipped for a malformed identifier are
225
+ // excluded — their all-error checks are an intentional short-circuit,
226
+ // not a connectivity problem.
227
+ const transportFailed = result.entries.filter((e) => !identifierInvalid.has(e.citekey) &&
228
+ e.existence.checks.length > 0 &&
229
+ e.existence.checks.every((c) => c.result === 'error'));
230
+ if (transportFailed.length > 0) {
231
+ logger.error('existence.transport_failure', {
232
+ message: 'Could not reach one or more bibliographic databases. ' +
233
+ 'Existence could not be verified — this is a connectivity error, ' +
234
+ 'not a confirmation that the works are absent. Check your network ' +
235
+ 'connection and the [apis] base URLs in bibcheck.toml.',
236
+ affectedEntries: transportFailed.map((e) => e.citekey),
237
+ });
238
+ }
239
+ }
240
+ catch (err) {
241
+ const message = err instanceof Error ? err.message : String(err);
242
+ logger.error('existence.failed', { error: message });
243
+ // Emit degraded existence for all entries
244
+ for (const entry of bibliography) {
245
+ existenceMap.set(entry.citekey, degradedExistenceLayer(message));
246
+ }
247
+ }
248
+ }
249
+ // --- canonical ---
250
+ if (!skip?.has('canonical')) {
251
+ try {
252
+ const canonicalDeps = {
253
+ config,
254
+ bibliography,
255
+ http,
256
+ cache,
257
+ signal: subSignal(),
258
+ };
259
+ const result = await doRunCanonical(canonicalDeps);
260
+ for (const e of result.entries) {
261
+ canonicalMap.set(e.citekey, e.canonical ?? null);
262
+ }
263
+ }
264
+ catch (err) {
265
+ const message = err instanceof Error ? err.message : String(err);
266
+ logger.error('canonical.failed', { error: message });
267
+ // Emit degraded canonical for all entries
268
+ for (const entry of bibliography) {
269
+ canonicalMap.set(entry.citekey, degradedCanonicalLayer());
270
+ }
271
+ }
272
+ }
273
+ // --- linkage ---
274
+ let linkageResult = { linkage: [] };
275
+ if (!skip?.has('linkage')) {
276
+ try {
277
+ const linkageDeps = {
278
+ config,
279
+ cwd,
280
+ bibliography,
281
+ readFile,
282
+ signal: subSignal(),
283
+ };
284
+ linkageResult = await doRunLinkage(linkageDeps);
285
+ }
286
+ catch (err) {
287
+ const message = err instanceof Error ? err.message : String(err);
288
+ logger.error('linkage.failed', { error: message });
289
+ }
290
+ }
291
+ // --- phrases ---
292
+ let phrasesResult = { phraseFlags: [] };
293
+ if (!skip?.has('phrases')) {
294
+ try {
295
+ const phrasesDeps = {
296
+ config,
297
+ cwd,
298
+ patterns,
299
+ readFile,
300
+ signal: subSignal(),
301
+ };
302
+ phrasesResult = await doRunPhrases(phrasesDeps);
303
+ }
304
+ catch (err) {
305
+ const message = err instanceof Error ? err.message : String(err);
306
+ logger.error('phrases.failed', { error: message });
307
+ }
308
+ }
309
+ // --- worklist ---
310
+ let worklistResult = { worklist: [] };
311
+ if (!skip?.has('worklist')) {
312
+ try {
313
+ const worklistDeps = {
314
+ config,
315
+ cwd,
316
+ bibliography,
317
+ readFile,
318
+ signal: subSignal(),
319
+ };
320
+ worklistResult = await doRunWorklist(worklistDeps);
321
+ }
322
+ catch (err) {
323
+ const message = err instanceof Error ? err.message : String(err);
324
+ logger.error('worklist.failed', { error: message });
325
+ }
326
+ }
327
+ // --- Assemble entries ---
328
+ const entries = bibliography.map((bib) => ({
329
+ citekey: bib.citekey,
330
+ identifiers: identifiersMap.get(bib.citekey) ?? null,
331
+ existence: existenceMap.get(bib.citekey) ?? null,
332
+ canonical: canonicalMap.get(bib.citekey) ?? null,
333
+ }));
334
+ // --- Compute summary ---
335
+ const CANONICAL_ISSUE_STATUSES = new Set([
336
+ 'wrong-host',
337
+ 'dead-url',
338
+ 'live-url-not-archived-snapshot',
339
+ 'no-url-on-pre-doi-entry',
340
+ ]);
341
+ const totalEntries = bibliography.length;
342
+ // The four existence buckets PARTITION the entries: every entry lands in
343
+ // exactly one, so they reconcile to totalEntries (T20 invariant, enforced by
344
+ // OutputSchema). An entry whose existence layer was not run (null, e.g.
345
+ // existence skipped) is treated as `unverifiable` for the partition — we
346
+ // could not place it in any database, so it is not verified/mismatched/absent.
347
+ let verified = 0;
348
+ let metadataMismatches = 0;
349
+ let notFoundInDatabases = 0;
350
+ let unverifiable = 0;
351
+ let malformedIdentifiers = 0;
352
+ let canonicalIssues = 0;
353
+ for (const entry of entries) {
354
+ const ex = entry.existence;
355
+ const can = entry.canonical;
356
+ const ids = entry.identifiers;
357
+ const status = ex?.status ?? 'unverifiable';
358
+ switch (status) {
359
+ case 'verified':
360
+ verified += 1;
361
+ break;
362
+ case 'metadata-mismatch':
363
+ metadataMismatches += 1;
364
+ break;
365
+ case 'not-found-in-databases':
366
+ notFoundInDatabases += 1;
367
+ break;
368
+ case 'unverifiable':
369
+ unverifiable += 1;
370
+ break;
371
+ }
372
+ // Malformed-identifier count (T21): any entry with a malformed/bad-checksum
373
+ // DOI/ISBN/URL. Overlaps the existence buckets (those entries are
374
+ // unverifiable) — it is a separate fabrication-signal counter, not a fifth
375
+ // bucket. Gates by default.
376
+ if (ids !== null &&
377
+ (ids.doi === 'malformed' ||
378
+ ids.isbn === 'malformed' ||
379
+ ids.isbn === 'bad-checksum' ||
380
+ ids.url === 'malformed')) {
381
+ malformedIdentifiers += 1;
382
+ }
383
+ if (can !== null &&
384
+ CANONICAL_ISSUE_STATUSES.has(can.status)) {
385
+ canonicalIssues += 1;
386
+ }
387
+ }
388
+ const linkageFailures = linkageResult.linkage.filter((l) => l.status === 'unresolved').length;
389
+ // Reverse linkage (H2): bibliography citekeys never referenced in any doc.
390
+ // Informational only — counted for visibility but NOT added to
391
+ // checkExitReasons, so orphans never affect the exit code.
392
+ const orphanedEntries = linkageResult.linkage.filter((l) => l.status === 'orphan').length;
393
+ const phraseFlags = phrasesResult.phraseFlags.filter((f) => f.status === 'flagged').length;
394
+ const worklistItems = worklistResult.worklist.length;
395
+ const summary = {
396
+ totalEntries,
397
+ verified,
398
+ metadataMismatches,
399
+ notFoundInDatabases,
400
+ malformedIdentifiers,
401
+ unverifiable,
402
+ canonicalIssues,
403
+ linkageFailures,
404
+ phraseFlags,
405
+ worklistItems,
406
+ orphanedEntries,
407
+ };
408
+ // --- Tool info ---
409
+ const version = await readPackageVersion();
410
+ const output = {
411
+ schemaVersion: SCHEMA_VERSION,
412
+ tool: { name: 'bibcheck', version },
413
+ summary,
414
+ entries,
415
+ linkage: linkageResult.linkage,
416
+ phraseFlags: phrasesResult.phraseFlags,
417
+ worklist: worklistResult.worklist,
418
+ };
419
+ // --- T23 suppression: warn on reason-less allows + log acknowledged findings ---
420
+ // Reason is MANDATORY: a `bibcheck-allow` with an empty/missing reason does
421
+ // NOT suppress (isGated ignores it); warn so the omission is visible rather
422
+ // than silently ineffective. An unknown finding-type token likewise warns.
423
+ // Suppressed findings stay in the document (totals unchanged) and are logged
424
+ // as informational acknowledgements — never silently dropped.
425
+ {
426
+ const ctx = buildSuppressionContext(config, bibliography);
427
+ const { unknownTypes, reasonless } = parseAllowDiagnostics(bibliography);
428
+ for (const u of reasonless) {
429
+ logger.warn('suppression.allow_missing_reason', {
430
+ citekey: u.citekey,
431
+ findingType: u.findingType,
432
+ message: `bibcheck-allow for '${u.findingType}' on @${u.citekey} has no (reason: ...); ` +
433
+ 'reason is mandatory, so this allow does NOT suppress. Add a reason to silence the finding.',
434
+ });
435
+ }
436
+ for (const u of unknownTypes) {
437
+ logger.warn('suppression.allow_unknown_type', {
438
+ citekey: u.citekey,
439
+ token: u.token,
440
+ message: `bibcheck-allow on @${u.citekey} names unknown finding-type '${u.token}'; ` +
441
+ 'this directive suppresses nothing.',
442
+ });
443
+ }
444
+ for (const ack of collectAcknowledgedFindings(output, ctx)) {
445
+ logger.info('check.acknowledged_finding', {
446
+ citekey: ack.citekey,
447
+ findingType: ack.findingType,
448
+ suppressedBy: ack.reason,
449
+ message: `@${ack.citekey}: '${ack.findingType}' suppressed by ${ack.reason} ` +
450
+ '(reported as acknowledged, excluded from the build gate).',
451
+ });
452
+ }
453
+ }
454
+ // --- Validate ---
455
+ const parsed = OutputSchema.safeParse(output);
456
+ if (!parsed.success) {
457
+ const firstIssue = parsed.error.issues[0];
458
+ const msg = firstIssue
459
+ ? `${firstIssue.path.join('.')}: ${firstIssue.message}`
460
+ : parsed.error.message;
461
+ logger.error('output.schema_invalid', { error: msg });
462
+ throw new Error(`Output failed schema validation (bibcheck bug): ${msg}`);
463
+ }
464
+ return parsed.data;
465
+ }
466
+ // ---------------------------------------------------------------------------
467
+ // CHECK_NON_ZERO_REASON
468
+ // ---------------------------------------------------------------------------
469
+ export const CHECK_NON_ZERO_REASON = {
470
+ flagged_phrase: 'flagged_phrase',
471
+ unresolved_linkage: 'unresolved_linkage',
472
+ canonical_issue: 'canonical_issue',
473
+ metadata_mismatch: 'metadata_mismatch',
474
+ // NEW in 0.2.0 (T22): secure-default gating (Q1). Gate by default; T23 layers
475
+ // source-type exemptions and per-finding suppression on top via the optional
476
+ // SuppressionContext passed to checkExitReasons (see below).
477
+ not_found_in_databases: 'not_found_in_databases',
478
+ malformed_identifier: 'malformed_identifier',
479
+ };
480
+ // ---------------------------------------------------------------------------
481
+ // checkExitReasons
482
+ // ---------------------------------------------------------------------------
483
+ const CANONICAL_EXIT_STATUSES = new Set([
484
+ 'dead-url',
485
+ 'wrong-host',
486
+ 'no-url-on-pre-doi-entry',
487
+ 'live-url-not-archived-snapshot',
488
+ ]);
489
+ /** True when an entry has any malformed/bad-checksum identifier (gating signal). */
490
+ function entryHasMalformedIdentifier(e) {
491
+ const ids = e.identifiers;
492
+ return (ids !== null &&
493
+ (ids.doi === 'malformed' ||
494
+ ids.isbn === 'malformed' ||
495
+ ids.isbn === 'bad-checksum' ||
496
+ ids.url === 'malformed'));
497
+ }
498
+ /**
499
+ * Returns the list of finding kinds that should cause a non-zero exit.
500
+ * Empty array → exit 0.
501
+ *
502
+ * Rules:
503
+ * - 'flagged_phrase' if any phraseFlags[].status === 'flagged'
504
+ * - 'unresolved_linkage' if any linkage[].status === 'unresolved'
505
+ * - 'canonical_issue' if any entry's canonical.status is in the problem set
506
+ * - 'metadata_mismatch' if any entry's existence.status === 'metadata-mismatch'
507
+ * - 'not_found_in_databases' if any (non-suppressed) entry's existence.status
508
+ * === 'not-found-in-databases' (B1 fix — absence is a fabrication signal
509
+ * and gates by default per Q1)
510
+ * - 'malformed_identifier' if any (non-suppressed) entry has a malformed
511
+ * DOI/ISBN/URL (a cheap fabrication signal; gates by default)
512
+ *
513
+ * Does NOT trigger non-zero exit:
514
+ * - acknowledged phrases
515
+ * - worklist items
516
+ * - unverifiable existence (graceful degradation)
517
+ *
518
+ * T23: the optional `ctx` filters WHICH entries reach the gate. A not-found or
519
+ * malformed finding does NOT gate when `isGated` resolves it to a source-type
520
+ * exemption or a valid per-entry allow — the gate itself is unchanged, only the
521
+ * per-entry predicate is narrowed. Suppressed findings remain in the Output
522
+ * document (entries + summary counts) and are NOT removed; they are surfaced as
523
+ * informational acknowledgements (see `collectAcknowledgedFindings`). When
524
+ * `ctx` is omitted, every not-found / malformed finding gates unconditionally
525
+ * (the pre-T23 secure default).
526
+ */
527
+ export function checkExitReasons(output, ctx) {
528
+ const reasons = [];
529
+ if (output.phraseFlags.some((f) => f.status === 'flagged')) {
530
+ reasons.push(CHECK_NON_ZERO_REASON.flagged_phrase);
531
+ }
532
+ if (output.linkage.some((l) => l.status === 'unresolved')) {
533
+ reasons.push(CHECK_NON_ZERO_REASON.unresolved_linkage);
534
+ }
535
+ if (output.entries.some((e) => e.canonical !== null && CANONICAL_EXIT_STATUSES.has(e.canonical.status))) {
536
+ reasons.push(CHECK_NON_ZERO_REASON.canonical_issue);
537
+ }
538
+ if (output.entries.some((e) => e.existence !== null && e.existence.status === 'metadata-mismatch')) {
539
+ reasons.push(CHECK_NON_ZERO_REASON.metadata_mismatch);
540
+ }
541
+ // --- Q1 secure default + T23 suppression: not-found + malformed ---
542
+ // Without a context, both gate unconditionally. With one, each finding is
543
+ // routed through `isGated`; only findings that resolve to `gated: true` count.
544
+ const gatedNotFound = output.entries.some((e) => {
545
+ if (e.existence === null || e.existence.status !== 'not-found-in-databases')
546
+ return false;
547
+ if (ctx === undefined)
548
+ return true;
549
+ return isGated({
550
+ citekey: e.citekey,
551
+ findingType: 'not-found',
552
+ cslType: ctx.cslTypeByCitekey.get(e.citekey),
553
+ config: ctx.config,
554
+ allows: ctx.allows,
555
+ }).gated;
556
+ });
557
+ if (gatedNotFound) {
558
+ reasons.push(CHECK_NON_ZERO_REASON.not_found_in_databases);
559
+ }
560
+ const gatedMalformed = output.entries.some((e) => {
561
+ if (!entryHasMalformedIdentifier(e))
562
+ return false;
563
+ if (ctx === undefined)
564
+ return true;
565
+ return isGated({
566
+ citekey: e.citekey,
567
+ findingType: 'malformed-identifier',
568
+ cslType: ctx.cslTypeByCitekey.get(e.citekey),
569
+ config: ctx.config,
570
+ allows: ctx.allows,
571
+ }).gated;
572
+ });
573
+ if (gatedMalformed) {
574
+ reasons.push(CHECK_NON_ZERO_REASON.malformed_identifier);
575
+ }
576
+ return reasons;
577
+ }
578
+ /**
579
+ * Collect the not-found / malformed findings that WOULD have gated but were
580
+ * suppressed by a source-type exemption or a per-entry allow. These stay in the
581
+ * Output document (totals are unchanged); this list drives the informational
582
+ * `check.acknowledged_finding` log entries, mirroring how an acknowledged
583
+ * phrase is reported rather than dropped. Pure.
584
+ */
585
+ export function collectAcknowledgedFindings(output, ctx) {
586
+ const acks = [];
587
+ for (const e of output.entries) {
588
+ const cslType = ctx.cslTypeByCitekey.get(e.citekey);
589
+ if (e.existence !== null && e.existence.status === 'not-found-in-databases') {
590
+ const r = isGated({
591
+ citekey: e.citekey,
592
+ findingType: 'not-found',
593
+ cslType,
594
+ config: ctx.config,
595
+ allows: ctx.allows,
596
+ });
597
+ if (!r.gated && r.reason !== 'default') {
598
+ acks.push({ citekey: e.citekey, findingType: 'not-found', reason: r.reason });
599
+ }
600
+ }
601
+ if (entryHasMalformedIdentifier(e)) {
602
+ const r = isGated({
603
+ citekey: e.citekey,
604
+ findingType: 'malformed-identifier',
605
+ cslType,
606
+ config: ctx.config,
607
+ allows: ctx.allows,
608
+ });
609
+ if (!r.gated && r.reason !== 'default') {
610
+ acks.push({ citekey: e.citekey, findingType: 'malformed-identifier', reason: r.reason });
611
+ }
612
+ }
613
+ }
614
+ return acks;
615
+ }
616
+ /**
617
+ * Build the T23 suppression context from the config and the loaded
618
+ * bibliography: the citekey → CSL-type map (the frozen Output schema does not
619
+ * carry `type`, but it is needed to resolve source-type exemptions) and the
620
+ * parsed per-entry `bibcheck-allow` directives. Pure. The CLI calls this and
621
+ * passes the result to `checkExitReasons`.
622
+ */
623
+ export function buildSuppressionContext(config, bibliography) {
624
+ const cslTypeByCitekey = new Map();
625
+ for (const e of bibliography) {
626
+ cslTypeByCitekey.set(e.citekey, e.type);
627
+ }
628
+ const { allows } = parseAllowsForBibliography(bibliography.map((e) => ({ citekey: e.citekey, note: e.note })));
629
+ return { config, cslTypeByCitekey, allows };
630
+ }
631
+ /**
632
+ * Diagnostics over the parsed allows: directives with an unknown finding-type
633
+ * token and valid-type directives whose reason was omitted (reason is
634
+ * mandatory; these do not suppress). Pure; drives the `runCheck` warnings.
635
+ */
636
+ function parseAllowDiagnostics(bibliography) {
637
+ const { allows, unknownTypes } = parseAllowsForBibliography(bibliography.map((e) => ({ citekey: e.citekey, note: e.note })));
638
+ const reasonless = allows
639
+ .filter((a) => a.reason === null)
640
+ .map((a) => ({ citekey: a.citekey, findingType: a.findingType }));
641
+ return {
642
+ unknownTypes: unknownTypes.map((u) => ({ citekey: u.citekey, token: u.token })),
643
+ reasonless,
644
+ };
645
+ }
646
+ //# sourceMappingURL=check.js.map