@de-otio/bibcheck 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +147 -0
- package/dist/cache/fs-cache.d.ts +55 -0
- package/dist/cache/fs-cache.d.ts.map +1 -0
- package/dist/cache/fs-cache.js +264 -0
- package/dist/cache/fs-cache.js.map +1 -0
- package/dist/canonical.d.ts +29 -0
- package/dist/canonical.d.ts.map +1 -0
- package/dist/canonical.js +132 -0
- package/dist/canonical.js.map +1 -0
- package/dist/check.d.ts +140 -0
- package/dist/check.d.ts.map +1 -0
- package/dist/check.js +646 -0
- package/dist/check.js.map +1 -0
- package/dist/cli.d.ts +19 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +357 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +175 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +180 -0
- package/dist/config.js.map +1 -0
- package/dist/databases/crossref.d.ts +53 -0
- package/dist/databases/crossref.d.ts.map +1 -0
- package/dist/databases/crossref.js +138 -0
- package/dist/databases/crossref.js.map +1 -0
- package/dist/databases/index.d.ts +12 -0
- package/dist/databases/index.d.ts.map +1 -0
- package/dist/databases/index.js +9 -0
- package/dist/databases/index.js.map +1 -0
- package/dist/databases/openalex.d.ts +29 -0
- package/dist/databases/openalex.d.ts.map +1 -0
- package/dist/databases/openalex.js +117 -0
- package/dist/databases/openalex.js.map +1 -0
- package/dist/databases/openlibrary.d.ts +26 -0
- package/dist/databases/openlibrary.d.ts.map +1 -0
- package/dist/databases/openlibrary.js +79 -0
- package/dist/databases/openlibrary.js.map +1 -0
- package/dist/databases/worldcat.d.ts +33 -0
- package/dist/databases/worldcat.d.ts.map +1 -0
- package/dist/databases/worldcat.js +145 -0
- package/dist/databases/worldcat.js.map +1 -0
- package/dist/doctor.d.ts +44 -0
- package/dist/doctor.d.ts.map +1 -0
- package/dist/doctor.js +386 -0
- package/dist/doctor.js.map +1 -0
- package/dist/existence.d.ts +70 -0
- package/dist/existence.d.ts.map +1 -0
- package/dist/existence.js +308 -0
- package/dist/existence.js.map +1 -0
- package/dist/http.d.ts +97 -0
- package/dist/http.d.ts.map +1 -0
- package/dist/http.js +543 -0
- package/dist/http.js.map +1 -0
- package/dist/identifiers.d.ts +44 -0
- package/dist/identifiers.d.ts.map +1 -0
- package/dist/identifiers.js +111 -0
- package/dist/identifiers.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -0
- package/dist/linkage.d.ts +29 -0
- package/dist/linkage.d.ts.map +1 -0
- package/dist/linkage.js +73 -0
- package/dist/linkage.js.map +1 -0
- package/dist/markdown/blocks.d.ts +19 -0
- package/dist/markdown/blocks.d.ts.map +1 -0
- package/dist/markdown/blocks.js +69 -0
- package/dist/markdown/blocks.js.map +1 -0
- package/dist/markdown/citekeys.d.ts +22 -0
- package/dist/markdown/citekeys.d.ts.map +1 -0
- package/dist/markdown/citekeys.js +100 -0
- package/dist/markdown/citekeys.js.map +1 -0
- package/dist/markdown/glob.d.ts +18 -0
- package/dist/markdown/glob.d.ts.map +1 -0
- package/dist/markdown/glob.js +26 -0
- package/dist/markdown/glob.js.map +1 -0
- package/dist/markdown/prose.d.ts +19 -0
- package/dist/markdown/prose.d.ts.map +1 -0
- package/dist/markdown/prose.js +81 -0
- package/dist/markdown/prose.js.map +1 -0
- package/dist/output/json.d.ts +21 -0
- package/dist/output/json.d.ts.map +1 -0
- package/dist/output/json.js +24 -0
- package/dist/output/json.js.map +1 -0
- package/dist/output/markdown.d.ts +21 -0
- package/dist/output/markdown.d.ts.map +1 -0
- package/dist/output/markdown.js +194 -0
- package/dist/output/markdown.js.map +1 -0
- package/dist/output/sarif.d.ts +31 -0
- package/dist/output/sarif.d.ts.map +1 -0
- package/dist/output/sarif.js +322 -0
- package/dist/output/sarif.js.map +1 -0
- package/dist/output/text.d.ts +27 -0
- package/dist/output/text.d.ts.map +1 -0
- package/dist/output/text.js +212 -0
- package/dist/output/text.js.map +1 -0
- package/dist/phrases/load.d.ts +34 -0
- package/dist/phrases/load.d.ts.map +1 -0
- package/dist/phrases/load.js +148 -0
- package/dist/phrases/load.js.map +1 -0
- package/dist/phrases.d.ts +27 -0
- package/dist/phrases.d.ts.map +1 -0
- package/dist/phrases.js +116 -0
- package/dist/phrases.js.map +1 -0
- package/dist/schema/csl.d.ts +429 -0
- package/dist/schema/csl.d.ts.map +1 -0
- package/dist/schema/csl.js +101 -0
- package/dist/schema/csl.js.map +1 -0
- package/dist/schema/output.d.ts +1116 -0
- package/dist/schema/output.d.ts.map +1 -0
- package/dist/schema/output.js +419 -0
- package/dist/schema/output.js.map +1 -0
- package/dist/suppression.d.ts +106 -0
- package/dist/suppression.d.ts.map +1 -0
- package/dist/suppression.js +134 -0
- package/dist/suppression.js.map +1 -0
- package/dist/version.d.ts +11 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +14 -0
- package/dist/version.js.map +1 -0
- package/dist/worklist.d.ts +32 -0
- package/dist/worklist.d.ts.map +1 -0
- package/dist/worklist.js +211 -0
- package/dist/worklist.js.map +1 -0
- package/package.json +82 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phrase denylist loader.
|
|
3
|
+
*
|
|
4
|
+
* Reads a project-supplied TOML file, validates it with Zod, compiles each
|
|
5
|
+
* pattern with RE2JS (linear-time guarantees; ReDoS-safe), and returns the
|
|
6
|
+
* resulting CompiledPattern[].
|
|
7
|
+
*/
|
|
8
|
+
import { z } from 'zod';
|
|
9
|
+
import { parse as parseToml } from 'smol-toml';
|
|
10
|
+
import { readFile } from 'node:fs/promises';
|
|
11
|
+
import { join } from 'node:path';
|
|
12
|
+
import { RE2JS } from 're2js';
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Error class
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
export class PhraseLoaderError extends Error {
|
|
17
|
+
cause;
|
|
18
|
+
name = 'PhraseLoaderError';
|
|
19
|
+
constructor(message, cause) {
|
|
20
|
+
super(message);
|
|
21
|
+
this.cause = cause;
|
|
22
|
+
if (typeof Error.captureStackTrace === 'function') {
|
|
23
|
+
Error.captureStackTrace(this, PhraseLoaderError);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// Zod schema
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
const DenylistEntrySchema = z.object({
|
|
31
|
+
key: z.string().min(1),
|
|
32
|
+
regex: z.string().min(1),
|
|
33
|
+
flags: z.string().optional(),
|
|
34
|
+
reference_url: z.string().url().nullable().optional(),
|
|
35
|
+
description: z.string().optional(),
|
|
36
|
+
});
|
|
37
|
+
const DenylistFileSchema = z.object({
|
|
38
|
+
patterns: z.array(DenylistEntrySchema).optional(),
|
|
39
|
+
});
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
// Prototype-pollution guard
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
const DANGEROUS_KEYS = new Set(['__proto__', 'constructor', 'prototype']);
|
|
44
|
+
function checkPollution(node, filePath) {
|
|
45
|
+
if (node === null || typeof node !== 'object')
|
|
46
|
+
return;
|
|
47
|
+
for (const key of Object.keys(node)) {
|
|
48
|
+
if (DANGEROUS_KEYS.has(key)) {
|
|
49
|
+
throw new PhraseLoaderError(`Prototype pollution attempt: ${filePath}`);
|
|
50
|
+
}
|
|
51
|
+
checkPollution(node[key], filePath);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
// Flag translation
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
function translateFlags(flags, key) {
|
|
58
|
+
let bits = 0;
|
|
59
|
+
for (const ch of flags) {
|
|
60
|
+
if (ch === 'i') {
|
|
61
|
+
bits |= RE2JS.CASE_INSENSITIVE;
|
|
62
|
+
}
|
|
63
|
+
else if (ch === 'm') {
|
|
64
|
+
bits |= RE2JS.MULTILINE;
|
|
65
|
+
}
|
|
66
|
+
else if (ch === 's') {
|
|
67
|
+
bits |= RE2JS.DOTALL;
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
throw new PhraseLoaderError(`Pattern ${key} has unknown flag '${ch}': only 'i', 'm', 's' are supported`);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return bits;
|
|
74
|
+
}
|
|
75
|
+
// ---------------------------------------------------------------------------
|
|
76
|
+
// Loader
|
|
77
|
+
// ---------------------------------------------------------------------------
|
|
78
|
+
export async function loadDenylist(opts) {
|
|
79
|
+
const cwd = opts.cwd ?? process.cwd();
|
|
80
|
+
const resolvedPath = join(cwd, opts.path);
|
|
81
|
+
let contents;
|
|
82
|
+
try {
|
|
83
|
+
contents = await readFile(resolvedPath, 'utf-8');
|
|
84
|
+
}
|
|
85
|
+
catch (err) {
|
|
86
|
+
throw new PhraseLoaderError(`denylist file not found: ${resolvedPath}`, err);
|
|
87
|
+
}
|
|
88
|
+
let raw;
|
|
89
|
+
try {
|
|
90
|
+
raw = parseToml(contents);
|
|
91
|
+
}
|
|
92
|
+
catch (err) {
|
|
93
|
+
const reason = err instanceof Error ? err.message : String(err);
|
|
94
|
+
throw new PhraseLoaderError(`TOML parse error in ${resolvedPath}: ${reason}`, err);
|
|
95
|
+
}
|
|
96
|
+
// Prototype-pollution guard before Zod validation
|
|
97
|
+
checkPollution(raw, resolvedPath);
|
|
98
|
+
let parsed;
|
|
99
|
+
try {
|
|
100
|
+
parsed = DenylistFileSchema.parse(raw);
|
|
101
|
+
}
|
|
102
|
+
catch (err) {
|
|
103
|
+
if (err instanceof z.ZodError) {
|
|
104
|
+
const first = err.issues[0];
|
|
105
|
+
if (first !== undefined) {
|
|
106
|
+
const fieldPath = first.path.join('.');
|
|
107
|
+
throw new PhraseLoaderError(`Denylist validation error at ${fieldPath}: ${first.message}`, err);
|
|
108
|
+
}
|
|
109
|
+
/* c8 ignore next */
|
|
110
|
+
// unreachable: ZodError always has at least one issue
|
|
111
|
+
throw new PhraseLoaderError(`Denylist validation failed: ${err.message}`, err);
|
|
112
|
+
}
|
|
113
|
+
throw err;
|
|
114
|
+
}
|
|
115
|
+
const entries = parsed.patterns ?? [];
|
|
116
|
+
// Duplicate-key detection
|
|
117
|
+
const seenKeys = new Set();
|
|
118
|
+
for (const entry of entries) {
|
|
119
|
+
if (seenKeys.has(entry.key)) {
|
|
120
|
+
throw new PhraseLoaderError(`Duplicate key '${entry.key}' in denylist: ${resolvedPath}`);
|
|
121
|
+
}
|
|
122
|
+
seenKeys.add(entry.key);
|
|
123
|
+
}
|
|
124
|
+
// Compile each pattern
|
|
125
|
+
const result = [];
|
|
126
|
+
for (const entry of entries) {
|
|
127
|
+
const flagsStr = entry.flags ?? '';
|
|
128
|
+
const flagBits = translateFlags(flagsStr, entry.key);
|
|
129
|
+
let compiled;
|
|
130
|
+
try {
|
|
131
|
+
compiled = RE2JS.compile(entry.regex, flagBits);
|
|
132
|
+
}
|
|
133
|
+
catch (err) {
|
|
134
|
+
const reason = err instanceof Error ? err.message : String(err);
|
|
135
|
+
throw new PhraseLoaderError(`Pattern ${entry.key} is not RE2-safe (no backreferences or lookahead): ${reason}`, err);
|
|
136
|
+
}
|
|
137
|
+
result.push({
|
|
138
|
+
key: entry.key,
|
|
139
|
+
regex: entry.regex,
|
|
140
|
+
flags: flagsStr,
|
|
141
|
+
compiled,
|
|
142
|
+
referenceUrl: entry.reference_url ?? null,
|
|
143
|
+
description: entry.description,
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
return result;
|
|
147
|
+
}
|
|
148
|
+
//# sourceMappingURL=load.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"load.js","sourceRoot":"","sources":["../../src/phrases/load.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,WAAW,CAAC;AAC/C,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,KAAK,EAAE,MAAM,OAAO,CAAC;AA4B9B,8EAA8E;AAC9E,cAAc;AACd,8EAA8E;AAE9E,MAAM,OAAO,iBAAkB,SAAQ,KAAK;IAGY;IAF7C,IAAI,GAAG,mBAA4B,CAAC;IAE7C,YAAY,OAAe,EAA2B,KAAe;QACnE,KAAK,CAAC,OAAO,CAAC,CAAC;QADqC,UAAK,GAAL,KAAK,CAAU;QAEnE,IAAI,OAAO,KAAK,CAAC,iBAAiB,KAAK,UAAU,EAAE,CAAC;YAClD,KAAK,CAAC,iBAAiB,CAAC,IAAI,EAAE,iBAAiB,CAAC,CAAC;QACnD,CAAC;IACH,CAAC;CACF;AAED,8EAA8E;AAC9E,aAAa;AACb,8EAA8E;AAE9E,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;IACnC,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;IACtB,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;IACxB,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC5B,aAAa,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;IACrD,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;CACnC,CAAC,CAAC;AAEH,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,QAAQ,EAAE,CAAC,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC,QAAQ,EAAE;CAClD,CAAC,CAAC;AAEH,8EAA8E;AAC9E,4BAA4B;AAC5B,8EAA8E;AAE9E,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,CAAC,WAAW,EAAE,aAAa,EAAE,WAAW,CAAC,CAAC,CAAC;AAE1E,SAAS,cAAc,CAAC,IAAa,EAAE,QAAgB;IACrD,IAAI,IAAI,KAAK,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO;IACtD,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,IAA+B,CAAC,EAAE,CAAC;QAC/D,IAAI,cAAc,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAC5B,MAAM,IAAI,iBAAiB,CAAC,gCAAgC,QAAQ,EAAE,CAAC,CAAC;QAC1E,CAAC;QACD,cAAc,CAAE,IAAgC,CAAC,GAAG,CAAC,EAAE,QAAQ,CAAC,CAAC;IACnE,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,mBAAmB;AACnB,8EAA8E;AAE9E,SAAS,cAAc,CAAC,KAAa,EAAE,GAAW;IAChD,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,MAAM,EAAE,IAAI,KAAK,EAAE,CAAC;QACvB,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACf,IAAI,IAAI,KAAK,CAAC,gBAAgB,CAAC;QACjC,CAAC;aAAM,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACtB,IAAI,IAAI,KAAK,CAAC,SAAS,CAAC;QAC1B,CAAC;aAAM,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACtB,IAAI,IAAI,KAAK,CAAC,MAAM,CAAC;QACvB,CAAC;aAAM,CAAC;YACN,MAAM,IAAI,iBAAiB,CACzB,WAAW,GAAG,sBAAsB,EAAE,qCAAqC,CAC5E,CAAC;QACJ,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,8EAA8E;AAC9E,SAAS;AACT,8EAA8E;AAE9E,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,IAAyB;IAC1D,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;IACtC,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;IAE1C,IAAI,QAAgB,CAAC;IACrB,IAAI,CAAC;QACH,QAAQ,GAAG,MAAM,QAAQ,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;IACnD,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,iBAAiB,CAAC,4BAA4B,YAAY,EAAE,EAAE,GAAG,CAAC,CAAC;IAC/E,CAAC;IAED,IAAI,GAAY,CAAC;IACjB,IAAI,CAAC;QACH,GAAG,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC;IAC5B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,MAAM,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAChE,MAAM,IAAI,iBAAiB,CAAC,uBAAuB,YAAY,KAAK,MAAM,EAAE,EAAE,GAAG,CAAC,CAAC;IACrF,CAAC;IAED,kDAAkD;IAClD,cAAc,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;IAElC,IAAI,MAA0C,CAAC;IAC/C,IAAI,CAAC;QACH,MAAM,GAAG,kBAAkB,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACzC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,CAAC,CAAC,QAAQ,EAAE,CAAC;YAC9B,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;YAC5B,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;gBACxB,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACvC,MAAM,IAAI,iBAAiB,CACzB,gCAAgC,SAAS,KAAK,KAAK,CAAC,OAAO,EAAE,EAC7D,GAAG,CACJ,CAAC;YACJ,CAAC;YACD,oBAAoB;YACpB,sDAAsD;YACtD,MAAM,IAAI,iBAAiB,CAAC,+BAA+B,GAAG,CAAC,OAAO,EAAE,EAAE,GAAG,CAAC,CAAC;QACjF,CAAC;QACD,MAAM,GAAG,CAAC;IACZ,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,IAAI,EAAE,CAAC;IAEtC,0BAA0B;IAC1B,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IACnC,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,IAAI,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;YAC5B,MAAM,IAAI,iBAAiB,CAAC,kBAAkB,KAAK,CAAC,GAAG,kBAAkB,YAAY,EAAE,CAAC,CAAC;QAC3F,CAAC;QACD,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC1B,CAAC;IAED,uBAAuB;IACvB,MAAM,MAAM,GAAsB,EAAE,CAAC;IACrC,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,IAAI,EAAE,CAAC;QACnC,MAAM,QAAQ,GAAG,cAAc,CAAC,QAAQ,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC;QAErD,IAAI,QAA0C,CAAC;QAC/C,IAAI,CAAC;YACH,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QAClD,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,MAAM,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAChE,MAAM,IAAI,iBAAiB,CACzB,WAAW,KAAK,CAAC,GAAG,sDAAsD,MAAM,EAAE,EAClF,GAAG,CACJ,CAAC;QACJ,CAAC;QAED,MAAM,CAAC,IAAI,CAAC;YACV,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,KAAK,EAAE,QAAQ;YACf,QAAQ;YACR,YAAY,EAAE,KAAK,CAAC,aAAa,IAAI,IAAI;YACzC,WAAW,EAAE,KAAK,CAAC,WAAW;SAC/B,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `bibcheck phrases` subcommand.
|
|
3
|
+
*
|
|
4
|
+
* Performs a regex pass over markdown prose lines against a project-supplied
|
|
5
|
+
* phrase denylist, with `<!-- bibcheck-allow: <key> -->` acknowledgement-comment
|
|
6
|
+
* detection.
|
|
7
|
+
*
|
|
8
|
+
* Patterns are pre-compiled by the caller (buildCheckDeps / T13 / T15) and
|
|
9
|
+
* passed in via `RunPhrasesDeps.patterns`. When the array is empty (no
|
|
10
|
+
* denylist configured), the function short-circuits immediately without
|
|
11
|
+
* touching the filesystem.
|
|
12
|
+
*/
|
|
13
|
+
import type { Config } from './config.js';
|
|
14
|
+
import type { PhraseFlag } from './schema/output.js';
|
|
15
|
+
import type { CompiledPattern } from './phrases/load.js';
|
|
16
|
+
export interface RunPhrasesDeps {
|
|
17
|
+
config: Config;
|
|
18
|
+
cwd: string;
|
|
19
|
+
patterns: CompiledPattern[];
|
|
20
|
+
readFile: (path: string) => Promise<string>;
|
|
21
|
+
signal: AbortSignal;
|
|
22
|
+
}
|
|
23
|
+
export interface RunPhrasesResult {
|
|
24
|
+
phraseFlags: PhraseFlag[];
|
|
25
|
+
}
|
|
26
|
+
export declare function runPhrases(deps: RunPhrasesDeps): Promise<RunPhrasesResult>;
|
|
27
|
+
//# sourceMappingURL=phrases.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"phrases.d.ts","sourceRoot":"","sources":["../src/phrases.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAC1C,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAQzD,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,eAAe,EAAE,CAAC;IAC5B,QAAQ,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;IAC5C,MAAM,EAAE,WAAW,CAAC;CACrB;AAED,MAAM,WAAW,gBAAgB;IAC/B,WAAW,EAAE,UAAU,EAAE,CAAC;CAC3B;AAgCD,wBAAsB,UAAU,CAAC,IAAI,EAAE,cAAc,GAAG,OAAO,CAAC,gBAAgB,CAAC,CA0FhF"}
|
package/dist/phrases.js
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `bibcheck phrases` subcommand.
|
|
3
|
+
*
|
|
4
|
+
* Performs a regex pass over markdown prose lines against a project-supplied
|
|
5
|
+
* phrase denylist, with `<!-- bibcheck-allow: <key> -->` acknowledgement-comment
|
|
6
|
+
* detection.
|
|
7
|
+
*
|
|
8
|
+
* Patterns are pre-compiled by the caller (buildCheckDeps / T13 / T15) and
|
|
9
|
+
* passed in via `RunPhrasesDeps.patterns`. When the array is empty (no
|
|
10
|
+
* denylist configured), the function short-circuits immediately without
|
|
11
|
+
* touching the filesystem.
|
|
12
|
+
*/
|
|
13
|
+
import { discoverDocs } from './markdown/glob.js';
|
|
14
|
+
import { extractProseLines } from './markdown/prose.js';
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// Acknowledgement comment regex
|
|
17
|
+
//
|
|
18
|
+
// Matches: <!-- bibcheck-allow: <key> -->
|
|
19
|
+
// The key must consist of word characters and hyphens.
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
const ALLOW_COMMENT_RE = /<!--\s*bibcheck-allow:\s*([\w-]+)\s*-->/i;
|
|
22
|
+
/**
|
|
23
|
+
* Return the set of pattern keys acknowledged in a given text string.
|
|
24
|
+
* A single line may contain multiple acknowledgement comments, so we use
|
|
25
|
+
* matchAll to collect them all.
|
|
26
|
+
*/
|
|
27
|
+
function acknowledgedKeys(text) {
|
|
28
|
+
const keys = new Set();
|
|
29
|
+
const globalRe = /<!--\s*bibcheck-allow:\s*([\w-]+)\s*-->/gi;
|
|
30
|
+
for (const m of text.matchAll(globalRe)) {
|
|
31
|
+
const key = m[1];
|
|
32
|
+
if (key !== undefined) {
|
|
33
|
+
keys.add(key);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
return keys;
|
|
37
|
+
}
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
// runPhrases
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
export async function runPhrases(deps) {
|
|
42
|
+
const { config, cwd, patterns, readFile, signal } = deps;
|
|
43
|
+
// Fast-path: no patterns configured → nothing to do.
|
|
44
|
+
if (patterns.length === 0) {
|
|
45
|
+
return { phraseFlags: [] };
|
|
46
|
+
}
|
|
47
|
+
// Throw immediately if already aborted.
|
|
48
|
+
if (signal.aborted) {
|
|
49
|
+
// eslint-disable-next-line @typescript-eslint/no-throw-literal
|
|
50
|
+
throw signal.reason;
|
|
51
|
+
}
|
|
52
|
+
const docs = await discoverDocs({
|
|
53
|
+
cwd,
|
|
54
|
+
include: config.docs.include,
|
|
55
|
+
exclude: config.docs.exclude,
|
|
56
|
+
});
|
|
57
|
+
const phraseFlags = [];
|
|
58
|
+
for (const doc of docs) {
|
|
59
|
+
// Check abort between documents.
|
|
60
|
+
if (signal.aborted) {
|
|
61
|
+
// eslint-disable-next-line @typescript-eslint/no-throw-literal
|
|
62
|
+
throw signal.reason;
|
|
63
|
+
}
|
|
64
|
+
const content = await readFile(doc.path);
|
|
65
|
+
const proseLines = extractProseLines(content);
|
|
66
|
+
// Build a map of line-number → text for fast preceding-line lookup.
|
|
67
|
+
// extractProseLines returns only prose lines (excluding code blocks,
|
|
68
|
+
// inline code, HTML comments, front-matter), sorted by line number.
|
|
69
|
+
const lineTextMap = new Map();
|
|
70
|
+
for (const pl of proseLines) {
|
|
71
|
+
lineTextMap.set(pl.line, pl.text);
|
|
72
|
+
}
|
|
73
|
+
for (const { line, text } of proseLines) {
|
|
74
|
+
// Collect acknowledged keys from the current line and the immediately
|
|
75
|
+
// preceding source line (which may or may not itself be a prose line).
|
|
76
|
+
// We look up the raw content for the preceding line from the full
|
|
77
|
+
// source split, but extractProseLines already excluded that line from
|
|
78
|
+
// being reported as prose, so we need the raw source for the
|
|
79
|
+
// acknowledgement check.
|
|
80
|
+
//
|
|
81
|
+
// Strategy: split the full content into raw lines and use line-1 for
|
|
82
|
+
// the preceding raw text. We rebuild this cheaply per-document.
|
|
83
|
+
//
|
|
84
|
+
// NOTE: This is computed lazily only when a match is found to avoid
|
|
85
|
+
// splitting on every prose line. We'll split once per document below.
|
|
86
|
+
const rawLines = content.split('\n');
|
|
87
|
+
const prevRawText = line > 1 ? (rawLines[line - 2] ?? '') : '';
|
|
88
|
+
const currentKeys = acknowledgedKeys(text);
|
|
89
|
+
const prevKeys = acknowledgedKeys(prevRawText);
|
|
90
|
+
for (const pattern of patterns) {
|
|
91
|
+
// Use a RE2JS Matcher to find ALL matches on this prose line.
|
|
92
|
+
const matcher = pattern.compiled.matcher(text);
|
|
93
|
+
while (matcher.find()) {
|
|
94
|
+
const matchedText = matcher.group();
|
|
95
|
+
if (matchedText === null) {
|
|
96
|
+
// Should not happen for a successful find(), but guard defensively.
|
|
97
|
+
continue;
|
|
98
|
+
}
|
|
99
|
+
// Determine acknowledgement status.
|
|
100
|
+
const isAcknowledged = currentKeys.has(pattern.key) || prevKeys.has(pattern.key);
|
|
101
|
+
const flag = {
|
|
102
|
+
status: isAcknowledged ? 'acknowledged' : 'flagged',
|
|
103
|
+
patternKey: pattern.key,
|
|
104
|
+
referenceUrl: pattern.referenceUrl,
|
|
105
|
+
file: doc.relativePath,
|
|
106
|
+
line,
|
|
107
|
+
matchedText,
|
|
108
|
+
};
|
|
109
|
+
phraseFlags.push(flag);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
return { phraseFlags };
|
|
115
|
+
}
|
|
116
|
+
//# sourceMappingURL=phrases.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"phrases.js","sourceRoot":"","sources":["../src/phrases.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAKH,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AAkBxD,8EAA8E;AAC9E,gCAAgC;AAChC,EAAE;AACF,0CAA0C;AAC1C,uDAAuD;AACvD,8EAA8E;AAE9E,MAAM,gBAAgB,GAAG,0CAA0C,CAAC;AAEpE;;;;GAIG;AACH,SAAS,gBAAgB,CAAC,IAAY;IACpC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,QAAQ,GAAG,2CAA2C,CAAC;IAC7D,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACxC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACjB,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;YACtB,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAChB,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,8EAA8E;AAC9E,aAAa;AACb,8EAA8E;AAE9E,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,IAAoB;IACnD,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;IAEzD,qDAAqD;IACrD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC;IAC7B,CAAC;IAED,wCAAwC;IACxC,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QACnB,+DAA+D;QAC/D,MAAM,MAAM,CAAC,MAAiB,CAAC;IACjC,CAAC;IAED,MAAM,IAAI,GAAG,MAAM,YAAY,CAAC;QAC9B,GAAG;QACH,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,OAAO;QAC5B,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,OAAO;KAC7B,CAAC,CAAC;IAEH,MAAM,WAAW,GAAiB,EAAE,CAAC;IAErC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,iCAAiC;QACjC,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACnB,+DAA+D;YAC/D,MAAM,MAAM,CAAC,MAAiB,CAAC;QACjC,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,UAAU,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;QAE9C,oEAAoE;QACpE,qEAAqE;QACrE,oEAAoE;QACpE,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC;QAC9C,KAAK,MAAM,EAAE,IAAI,UAAU,EAAE,CAAC;YAC5B,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;QACpC,CAAC;QAED,KAAK,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,UAAU,EAAE,CAAC;YACxC,sEAAsE;YACtE,uEAAuE;YACvE,kEAAkE;YAClE,sEAAsE;YACtE,6DAA6D;YAC7D,yBAAyB;YACzB,EAAE;YACF,qEAAqE;YACrE,gEAAgE;YAChE,EAAE;YACF,oEAAoE;YACpE,uEAAuE;YACvE,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACrC,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAE/D,MAAM,WAAW,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;YAC3C,MAAM,QAAQ,GAAG,gBAAgB,CAAC,WAAW,CAAC,CAAC;YAE/C,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;gBAC/B,8DAA8D;gBAC9D,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;gBAE/C,OAAO,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;oBACtB,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;oBACpC,IAAI,WAAW,KAAK,IAAI,EAAE,CAAC;wBACzB,oEAAoE;wBACpE,SAAS;oBACX,CAAC;oBAED,oCAAoC;oBACpC,MAAM,cAAc,GAClB,WAAW,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;oBAE5D,MAAM,IAAI,GAAe;wBACvB,MAAM,EAAE,cAAc,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,SAAS;wBACnD,UAAU,EAAE,OAAO,CAAC,GAAG;wBACvB,YAAY,EAAE,OAAO,CAAC,YAAY;wBAClC,IAAI,EAAE,GAAG,CAAC,YAAY;wBACtB,IAAI;wBACJ,WAAW;qBACZ,CAAC;oBAEF,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACzB,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,WAAW,EAAE,CAAC;AACzB,CAAC"}
|