@de-otio/bibcheck 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +147 -0
  3. package/dist/cache/fs-cache.d.ts +55 -0
  4. package/dist/cache/fs-cache.d.ts.map +1 -0
  5. package/dist/cache/fs-cache.js +264 -0
  6. package/dist/cache/fs-cache.js.map +1 -0
  7. package/dist/canonical.d.ts +29 -0
  8. package/dist/canonical.d.ts.map +1 -0
  9. package/dist/canonical.js +132 -0
  10. package/dist/canonical.js.map +1 -0
  11. package/dist/check.d.ts +140 -0
  12. package/dist/check.d.ts.map +1 -0
  13. package/dist/check.js +646 -0
  14. package/dist/check.js.map +1 -0
  15. package/dist/cli.d.ts +19 -0
  16. package/dist/cli.d.ts.map +1 -0
  17. package/dist/cli.js +357 -0
  18. package/dist/cli.js.map +1 -0
  19. package/dist/config.d.ts +175 -0
  20. package/dist/config.d.ts.map +1 -0
  21. package/dist/config.js +180 -0
  22. package/dist/config.js.map +1 -0
  23. package/dist/databases/crossref.d.ts +53 -0
  24. package/dist/databases/crossref.d.ts.map +1 -0
  25. package/dist/databases/crossref.js +138 -0
  26. package/dist/databases/crossref.js.map +1 -0
  27. package/dist/databases/index.d.ts +12 -0
  28. package/dist/databases/index.d.ts.map +1 -0
  29. package/dist/databases/index.js +9 -0
  30. package/dist/databases/index.js.map +1 -0
  31. package/dist/databases/openalex.d.ts +29 -0
  32. package/dist/databases/openalex.d.ts.map +1 -0
  33. package/dist/databases/openalex.js +117 -0
  34. package/dist/databases/openalex.js.map +1 -0
  35. package/dist/databases/openlibrary.d.ts +26 -0
  36. package/dist/databases/openlibrary.d.ts.map +1 -0
  37. package/dist/databases/openlibrary.js +79 -0
  38. package/dist/databases/openlibrary.js.map +1 -0
  39. package/dist/databases/worldcat.d.ts +33 -0
  40. package/dist/databases/worldcat.d.ts.map +1 -0
  41. package/dist/databases/worldcat.js +145 -0
  42. package/dist/databases/worldcat.js.map +1 -0
  43. package/dist/doctor.d.ts +44 -0
  44. package/dist/doctor.d.ts.map +1 -0
  45. package/dist/doctor.js +386 -0
  46. package/dist/doctor.js.map +1 -0
  47. package/dist/existence.d.ts +70 -0
  48. package/dist/existence.d.ts.map +1 -0
  49. package/dist/existence.js +308 -0
  50. package/dist/existence.js.map +1 -0
  51. package/dist/http.d.ts +97 -0
  52. package/dist/http.d.ts.map +1 -0
  53. package/dist/http.js +543 -0
  54. package/dist/http.js.map +1 -0
  55. package/dist/identifiers.d.ts +44 -0
  56. package/dist/identifiers.d.ts.map +1 -0
  57. package/dist/identifiers.js +111 -0
  58. package/dist/identifiers.js.map +1 -0
  59. package/dist/index.d.ts +9 -0
  60. package/dist/index.d.ts.map +1 -0
  61. package/dist/index.js +8 -0
  62. package/dist/index.js.map +1 -0
  63. package/dist/linkage.d.ts +29 -0
  64. package/dist/linkage.d.ts.map +1 -0
  65. package/dist/linkage.js +73 -0
  66. package/dist/linkage.js.map +1 -0
  67. package/dist/markdown/blocks.d.ts +19 -0
  68. package/dist/markdown/blocks.d.ts.map +1 -0
  69. package/dist/markdown/blocks.js +69 -0
  70. package/dist/markdown/blocks.js.map +1 -0
  71. package/dist/markdown/citekeys.d.ts +22 -0
  72. package/dist/markdown/citekeys.d.ts.map +1 -0
  73. package/dist/markdown/citekeys.js +100 -0
  74. package/dist/markdown/citekeys.js.map +1 -0
  75. package/dist/markdown/glob.d.ts +18 -0
  76. package/dist/markdown/glob.d.ts.map +1 -0
  77. package/dist/markdown/glob.js +26 -0
  78. package/dist/markdown/glob.js.map +1 -0
  79. package/dist/markdown/prose.d.ts +19 -0
  80. package/dist/markdown/prose.d.ts.map +1 -0
  81. package/dist/markdown/prose.js +81 -0
  82. package/dist/markdown/prose.js.map +1 -0
  83. package/dist/output/json.d.ts +21 -0
  84. package/dist/output/json.d.ts.map +1 -0
  85. package/dist/output/json.js +24 -0
  86. package/dist/output/json.js.map +1 -0
  87. package/dist/output/markdown.d.ts +21 -0
  88. package/dist/output/markdown.d.ts.map +1 -0
  89. package/dist/output/markdown.js +194 -0
  90. package/dist/output/markdown.js.map +1 -0
  91. package/dist/output/sarif.d.ts +31 -0
  92. package/dist/output/sarif.d.ts.map +1 -0
  93. package/dist/output/sarif.js +322 -0
  94. package/dist/output/sarif.js.map +1 -0
  95. package/dist/output/text.d.ts +27 -0
  96. package/dist/output/text.d.ts.map +1 -0
  97. package/dist/output/text.js +212 -0
  98. package/dist/output/text.js.map +1 -0
  99. package/dist/phrases/load.d.ts +34 -0
  100. package/dist/phrases/load.d.ts.map +1 -0
  101. package/dist/phrases/load.js +148 -0
  102. package/dist/phrases/load.js.map +1 -0
  103. package/dist/phrases.d.ts +27 -0
  104. package/dist/phrases.d.ts.map +1 -0
  105. package/dist/phrases.js +116 -0
  106. package/dist/phrases.js.map +1 -0
  107. package/dist/schema/csl.d.ts +429 -0
  108. package/dist/schema/csl.d.ts.map +1 -0
  109. package/dist/schema/csl.js +101 -0
  110. package/dist/schema/csl.js.map +1 -0
  111. package/dist/schema/output.d.ts +1116 -0
  112. package/dist/schema/output.d.ts.map +1 -0
  113. package/dist/schema/output.js +419 -0
  114. package/dist/schema/output.js.map +1 -0
  115. package/dist/suppression.d.ts +106 -0
  116. package/dist/suppression.d.ts.map +1 -0
  117. package/dist/suppression.js +134 -0
  118. package/dist/suppression.js.map +1 -0
  119. package/dist/version.d.ts +11 -0
  120. package/dist/version.d.ts.map +1 -0
  121. package/dist/version.js +14 -0
  122. package/dist/version.js.map +1 -0
  123. package/dist/worklist.d.ts +32 -0
  124. package/dist/worklist.d.ts.map +1 -0
  125. package/dist/worklist.js +211 -0
  126. package/dist/worklist.js.map +1 -0
  127. package/package.json +82 -0
@@ -0,0 +1,111 @@
1
+ /**
2
+ * bibcheck identifiers — local, offline DOI / ISBN / URL well-formedness checks.
3
+ *
4
+ * A pure functional-core module: no I/O, all input passed as arguments. It runs
5
+ * BEFORE any network call and catches the large class of AI-fabricated citations
6
+ * that carry a malformed identifier (a transposed ISBN digit, a DOI with stray
7
+ * punctuation, a non-URL in `url:`) — the cheapest, highest-yield hallucination
8
+ * signal. Emits the `IdentifiersLayer` from the output schema.
9
+ *
10
+ * No runtime dependencies: ISBN check-digit validation and normalization are
11
+ * hand-rolled (both are small, well-specified algorithms).
12
+ */
13
+ // ---------------------------------------------------------------------------
14
+ // DOI
15
+ // ---------------------------------------------------------------------------
16
+ /** Matches a syntactically well-formed DOI after any resolver prefix is stripped. */
17
+ const DOI_RE = /^10\.\d{4,}\/\S+$/i;
18
+ /** Strip a leading resolver prefix (`https://doi.org/`, `http://dx.doi.org/`, `doi:`). */
19
+ function stripDoiPrefix(doi) {
20
+ return doi
21
+ .trim()
22
+ .replace(/^https?:\/\/(dx\.)?doi\.org\//i, '')
23
+ .replace(/^doi:/i, '');
24
+ }
25
+ /** Validate a DOI string. */
26
+ export function validateDoi(doi) {
27
+ return DOI_RE.test(stripDoiPrefix(doi)) ? 'ok' : 'malformed';
28
+ }
29
+ // ---------------------------------------------------------------------------
30
+ // ISBN
31
+ // ---------------------------------------------------------------------------
32
+ /**
33
+ * Normalize an ISBN to its bare digit string (hyphens/spaces removed, upper-cased
34
+ * for a trailing `X`). Returns null when the input is not a 10- or 13-character
35
+ * ISBN shape. Exposed so callers (e.g. existence cache keys) can key on a
36
+ * canonical form rather than the raw, variably-hyphenated string.
37
+ */
38
+ export function normalizeIsbn(raw) {
39
+ const stripped = raw.replace(/[\s-]/g, '').toUpperCase();
40
+ if (/^\d{9}[\dX]$/.test(stripped))
41
+ return stripped; // ISBN-10
42
+ if (/^\d{13}$/.test(stripped))
43
+ return stripped; // ISBN-13
44
+ return null;
45
+ }
46
+ /** True if a 10-char ISBN-10 string has a valid check digit. */
47
+ function isbn10CheckOk(s) {
48
+ let sum = 0;
49
+ for (let i = 0; i < 9; i++) {
50
+ sum += (10 - i) * Number(s[i]);
51
+ }
52
+ const check = s[9] === 'X' ? 10 : Number(s[9]);
53
+ return (sum + check) % 11 === 0;
54
+ }
55
+ /** True if a 13-char ISBN-13 string has a valid check digit. */
56
+ function isbn13CheckOk(s) {
57
+ let sum = 0;
58
+ for (let i = 0; i < 13; i++) {
59
+ sum += (i % 2 === 0 ? 1 : 3) * Number(s[i]);
60
+ }
61
+ return sum % 10 === 0;
62
+ }
63
+ /** Validate an ISBN string (10 or 13). */
64
+ export function validateIsbn(isbn) {
65
+ const normalized = normalizeIsbn(isbn);
66
+ if (normalized === null)
67
+ return 'malformed';
68
+ const ok = normalized.length === 10 ? isbn10CheckOk(normalized) : isbn13CheckOk(normalized);
69
+ return ok ? 'ok' : 'bad-checksum';
70
+ }
71
+ // ---------------------------------------------------------------------------
72
+ // URL
73
+ // ---------------------------------------------------------------------------
74
+ /** Validate that a string is a well-formed http/https URL. */
75
+ export function validateUrl(url) {
76
+ let parsed;
77
+ try {
78
+ parsed = new URL(url);
79
+ }
80
+ catch {
81
+ return 'malformed';
82
+ }
83
+ return parsed.protocol === 'http:' || parsed.protocol === 'https:' ? 'ok' : 'malformed';
84
+ }
85
+ // ---------------------------------------------------------------------------
86
+ // Per-entry layer + runner
87
+ // ---------------------------------------------------------------------------
88
+ /** True for a present, non-empty identifier string. */
89
+ function present(value) {
90
+ return typeof value === 'string' && value.trim() !== '';
91
+ }
92
+ /** Compute the identifiers layer for one entry. */
93
+ export function identifiersFor(entry) {
94
+ const doi = present(entry.doi) ? validateDoi(entry.doi) : 'not-applicable';
95
+ const isbn = present(entry.isbn) ? validateIsbn(entry.isbn) : 'not-applicable';
96
+ const url = present(entry.url) ? validateUrl(entry.url) : 'not-applicable';
97
+ return { doi, isbn, url };
98
+ }
99
+ /**
100
+ * Validate every entry's identifiers. Synchronous, deterministic, no I/O.
101
+ * Output order matches input order.
102
+ */
103
+ export function runIdentifiers(deps) {
104
+ return {
105
+ entries: deps.bibliography.map((entry) => ({
106
+ citekey: entry.citekey,
107
+ identifiers: identifiersFor(entry),
108
+ })),
109
+ };
110
+ }
111
+ //# sourceMappingURL=identifiers.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"identifiers.js","sourceRoot":"","sources":["../src/identifiers.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAiBH,8EAA8E;AAC9E,MAAM;AACN,8EAA8E;AAE9E,qFAAqF;AACrF,MAAM,MAAM,GAAG,oBAAoB,CAAC;AAEpC,0FAA0F;AAC1F,SAAS,cAAc,CAAC,GAAW;IACjC,OAAO,GAAG;SACP,IAAI,EAAE;SACN,OAAO,CAAC,gCAAgC,EAAE,EAAE,CAAC;SAC7C,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;AAC3B,CAAC;AAED,6BAA6B;AAC7B,MAAM,UAAU,WAAW,CAAC,GAAW;IACrC,OAAO,MAAM,CAAC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC;AAC/D,CAAC;AAED,8EAA8E;AAC9E,OAAO;AACP,8EAA8E;AAE9E;;;;;GAKG;AACH,MAAM,UAAU,aAAa,CAAC,GAAW;IACvC,MAAM,QAAQ,GAAG,GAAG,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;IACzD,IAAI,cAAc,CAAC,IAAI,CAAC,QAAQ,CAAC;QAAE,OAAO,QAAQ,CAAC,CAAC,UAAU;IAC9D,IAAI,UAAU,CAAC,IAAI,CAAC,QAAQ,CAAC;QAAE,OAAO,QAAQ,CAAC,CAAC,UAAU;IAC1D,OAAO,IAAI,CAAC;AACd,CAAC;AAED,gEAAgE;AAChE,SAAS,aAAa,CAAC,CAAS;IAC9B,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3B,GAAG,IAAI,CAAC,EAAE,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACjC,CAAC;IACD,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/C,OAAO,CAAC,GAAG,GAAG,KAAK,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;AAClC,CAAC;AAED,gEAAgE;AAChE,SAAS,aAAa,CAAC,CAAS;IAC9B,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC9C,CAAC;IACD,OAAO,GAAG,GAAG,EAAE,KAAK,CAAC,CAAC;AACxB,CAAC;AAED,0CAA0C;AAC1C,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,MAAM,UAAU,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;IACvC,IAAI,UAAU,KAAK,IAAI;QAAE,OAAO,WAAW,CAAC;IAC5C,MAAM,EAAE,GAAG,UAAU,CAAC,MAAM,KAAK,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,UAAU,CAAC,CAAC;IAC5F,OAAO,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,cAAc,CAAC;AACpC,CAAC;AAED,8EAA8E;AAC9E,MAAM;AACN,8EAA8E;AAE9E,8DAA8D;AAC9D,MAAM,UAAU,WAAW,CAAC,GAAW;IACrC,IAAI,MAAW,CAAC;IAChB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IACxB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,WAAW,CAAC;IACrB,CAAC;IACD,OAAO,MAAM,CAAC,QAAQ,KAAK,OAAO,IAAI,MAAM,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC;AAC1F,CAAC;AAED,8EAA8E;AAC9E,2BAA2B;AAC3B,8EAA8E;AAE9E,uDAAuD;AACvD,SAAS,OAAO,CAAC,KAAyB;IACxC,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;AAC1D,CAAC;AAED,mDAAmD;AACnD,MAAM,UAAU,cAAc,CAAC,KAAe;IAC5C,MAAM,GAAG,GAAqB,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,gBAAgB,CAAC;IAC7F,MAAM,IAAI,GAAqB,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,gBAAgB,CAAC;IACjG,MAAM,GAAG,GAAqB,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,gBAAgB,CAAC;IAC7F,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,CAAC;AAC5B,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,IAAwB;IACrD,OAAO;QACL,OAAO,EAAE,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YACzC,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,WAAW,EAAE,cAAc,CAAC,KAAK,CAAC;SACnC,CAAC,CAAC;KACJ,CAAC;AACJ,CAAC"}
@@ -0,0 +1,9 @@
1
+ /**
2
+ * bibcheck — humanities-aware citation verification for CSL-JSON bibliographies.
3
+ *
4
+ * Library entry point. Re-exports the output schema for programmatic consumers.
5
+ * For CLI use, see `cli.ts` and the `bibcheck` binary.
6
+ */
7
+ export { SCHEMA_VERSION, ToolInfoSchema, SummarySchema, ExistenceCheckSourceSchema, ExistenceCheckResultSchema, ExistenceCheckSchema, ExistenceStatusSchema, ExistenceLayerSchema, CanonicalStatusSchema, CanonicalLayerSchema, EntrySchema, LinkageStatusSchema, LinkageReferenceSchema, LinkageEntrySchema, PhraseFlagStatusSchema, PhraseFlagSchema, WorklistItemTypeSchema, WorklistItemSchema, OutputSchema, } from './schema/output.js';
8
+ export type { ToolInfo, Summary, ExistenceCheckSource, ExistenceCheckResult, ExistenceCheck, ExistenceStatus, ExistenceLayer, CanonicalStatus, CanonicalLayer, Entry, LinkageStatus, LinkageReference, LinkageEntry, PhraseFlagStatus, PhraseFlag, WorklistItemType, WorklistItem, Output, } from './schema/output.js';
9
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EACL,cAAc,EACd,cAAc,EACd,aAAa,EACb,0BAA0B,EAC1B,0BAA0B,EAC1B,oBAAoB,EACpB,qBAAqB,EACrB,oBAAoB,EACpB,qBAAqB,EACrB,oBAAoB,EACpB,WAAW,EACX,mBAAmB,EACnB,sBAAsB,EACtB,kBAAkB,EAClB,sBAAsB,EACtB,gBAAgB,EAChB,sBAAsB,EACtB,kBAAkB,EAClB,YAAY,GACb,MAAM,oBAAoB,CAAC;AAC5B,YAAY,EACV,QAAQ,EACR,OAAO,EACP,oBAAoB,EACpB,oBAAoB,EACpB,cAAc,EACd,eAAe,EACf,cAAc,EACd,eAAe,EACf,cAAc,EACd,KAAK,EACL,aAAa,EACb,gBAAgB,EAChB,YAAY,EACZ,gBAAgB,EAChB,UAAU,EACV,gBAAgB,EAChB,YAAY,EACZ,MAAM,GACP,MAAM,oBAAoB,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,8 @@
1
+ /**
2
+ * bibcheck — humanities-aware citation verification for CSL-JSON bibliographies.
3
+ *
4
+ * Library entry point. Re-exports the output schema for programmatic consumers.
5
+ * For CLI use, see `cli.ts` and the `bibcheck` binary.
6
+ */
7
+ export { SCHEMA_VERSION, ToolInfoSchema, SummarySchema, ExistenceCheckSourceSchema, ExistenceCheckResultSchema, ExistenceCheckSchema, ExistenceStatusSchema, ExistenceLayerSchema, CanonicalStatusSchema, CanonicalLayerSchema, EntrySchema, LinkageStatusSchema, LinkageReferenceSchema, LinkageEntrySchema, PhraseFlagStatusSchema, PhraseFlagSchema, WorklistItemTypeSchema, WorklistItemSchema, OutputSchema, } from './schema/output.js';
8
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EACL,cAAc,EACd,cAAc,EACd,aAAa,EACb,0BAA0B,EAC1B,0BAA0B,EAC1B,oBAAoB,EACpB,qBAAqB,EACrB,oBAAoB,EACpB,qBAAqB,EACrB,oBAAoB,EACpB,WAAW,EACX,mBAAmB,EACnB,sBAAsB,EACtB,kBAAkB,EAClB,sBAAsB,EACtB,gBAAgB,EAChB,sBAAsB,EACtB,kBAAkB,EAClB,YAAY,GACb,MAAM,oBAAoB,CAAC"}
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Linkage subcommand: verify that every @citekey reference in the markdown
3
+ * documents resolves to an entry in the bibliography.
4
+ *
5
+ * For each citekey found in prose, emits a LinkageEntry with status
6
+ * 'resolved' or 'unresolved' and the full list of file:line references.
7
+ *
8
+ * Reverse linkage (H2): each bibliography citekey that is NEVER referenced in
9
+ * any doc is also emitted, with status 'orphan' and an empty `references`
10
+ * array. Orphans are INFORMATIONAL — an uncited bibliography entry is a smell
11
+ * (e.g. an LLM-padded reference list), not proof of fabrication — so they do
12
+ * NOT gate `bibcheck check`. They are excluded from `summary.linkageFailures`
13
+ * (which counts only 'unresolved'); see `summary.orphanedEntries`.
14
+ */
15
+ import type { CslEntry } from './schema/csl.js';
16
+ import type { LinkageEntry } from './schema/output.js';
17
+ import type { Config } from './config.js';
18
+ export interface RunLinkageDeps {
19
+ config: Config;
20
+ cwd: string;
21
+ bibliography: CslEntry[];
22
+ readFile: (path: string) => Promise<string>;
23
+ signal: AbortSignal;
24
+ }
25
+ export interface RunLinkageResult {
26
+ linkage: LinkageEntry[];
27
+ }
28
+ export declare function runLinkage(deps: RunLinkageDeps): Promise<RunLinkageResult>;
29
+ //# sourceMappingURL=linkage.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"linkage.d.ts","sourceRoot":"","sources":["../src/linkage.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAIH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,KAAK,EAAE,YAAY,EAAoB,MAAM,oBAAoB,CAAC;AACzE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAE1C,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,YAAY,EAAE,QAAQ,EAAE,CAAC;IACzB,QAAQ,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;IAC5C,MAAM,EAAE,WAAW,CAAC;CACrB;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,YAAY,EAAE,CAAC;CACzB;AAED,wBAAsB,UAAU,CAAC,IAAI,EAAE,cAAc,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAgEhF"}
@@ -0,0 +1,73 @@
1
+ /**
2
+ * Linkage subcommand: verify that every @citekey reference in the markdown
3
+ * documents resolves to an entry in the bibliography.
4
+ *
5
+ * For each citekey found in prose, emits a LinkageEntry with status
6
+ * 'resolved' or 'unresolved' and the full list of file:line references.
7
+ *
8
+ * Reverse linkage (H2): each bibliography citekey that is NEVER referenced in
9
+ * any doc is also emitted, with status 'orphan' and an empty `references`
10
+ * array. Orphans are INFORMATIONAL — an uncited bibliography entry is a smell
11
+ * (e.g. an LLM-padded reference list), not proof of fabrication — so they do
12
+ * NOT gate `bibcheck check`. They are excluded from `summary.linkageFailures`
13
+ * (which counts only 'unresolved'); see `summary.orphanedEntries`.
14
+ */
15
+ import { discoverDocs } from './markdown/glob.js';
16
+ import { extractCitekeys } from './markdown/citekeys.js';
17
+ export async function runLinkage(deps) {
18
+ const { config, cwd, bibliography, readFile, signal } = deps;
19
+ // Step 1: Discover docs
20
+ const docs = await discoverDocs({
21
+ cwd,
22
+ include: config.docs.include,
23
+ exclude: config.docs.exclude,
24
+ });
25
+ // Step 2: Build bibliography citekey set
26
+ const bibKeys = new Set(bibliography.map((e) => e.citekey));
27
+ // Step 3: Process each doc, aggregate references by citekey
28
+ const referenceMap = new Map();
29
+ for (const doc of docs) {
30
+ if (signal.aborted) {
31
+ const err = new Error('runLinkage aborted');
32
+ err.name = 'AbortError';
33
+ throw err;
34
+ }
35
+ const content = await readFile(doc.path);
36
+ const citekeyRefs = extractCitekeys(content, doc.relativePath);
37
+ for (const ref of citekeyRefs) {
38
+ let refs = referenceMap.get(ref.citekey);
39
+ if (refs === undefined) {
40
+ refs = [];
41
+ referenceMap.set(ref.citekey, refs);
42
+ }
43
+ const linkRef = { file: ref.file, line: ref.line };
44
+ if (ref.locator !== null)
45
+ linkRef.locator = ref.locator;
46
+ if (ref.authorSuppressed)
47
+ linkRef.authorSuppressed = true;
48
+ refs.push(linkRef);
49
+ }
50
+ }
51
+ // Step 4: Build LinkageEntry array for every citekey referenced in prose.
52
+ const linkage = [];
53
+ for (const [citekey, references] of referenceMap) {
54
+ // Stable secondary sort: deterministic regardless of doc-discovery order.
55
+ references.sort((a, b) => a.file.localeCompare(b.file) || a.line - b.line);
56
+ linkage.push({
57
+ citekey,
58
+ status: bibKeys.has(citekey) ? 'resolved' : 'unresolved',
59
+ references,
60
+ });
61
+ }
62
+ // Step 5: Reverse linkage — emit an 'orphan' entry for every bibliography
63
+ // citekey that no doc referenced. Informational only; never gates.
64
+ for (const citekey of bibKeys) {
65
+ if (!referenceMap.has(citekey)) {
66
+ linkage.push({ citekey, status: 'orphan', references: [] });
67
+ }
68
+ }
69
+ // Step 6: Sort by citekey for deterministic output
70
+ linkage.sort((a, b) => a.citekey.localeCompare(b.citekey));
71
+ return { linkage };
72
+ }
73
+ //# sourceMappingURL=linkage.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"linkage.js","sourceRoot":"","sources":["../src/linkage.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AAiBzD,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,IAAoB;IACnD,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;IAE7D,wBAAwB;IACxB,MAAM,IAAI,GAAG,MAAM,YAAY,CAAC;QAC9B,GAAG;QACH,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,OAAO;QAC5B,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,OAAO;KAC7B,CAAC,CAAC;IAEH,yCAAyC;IACzC,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;IAE5D,4DAA4D;IAC5D,MAAM,YAAY,GAAG,IAAI,GAAG,EAA8B,CAAC;IAE3D,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;YAC5C,GAAG,CAAC,IAAI,GAAG,YAAY,CAAC;YACxB,MAAM,GAAG,CAAC;QACZ,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,WAAW,GAAG,eAAe,CAAC,OAAO,EAAE,GAAG,CAAC,YAAY,CAAC,CAAC;QAE/D,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;YAC9B,IAAI,IAAI,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YACzC,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;gBACvB,IAAI,GAAG,EAAE,CAAC;gBACV,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;YACtC,CAAC;YACD,MAAM,OAAO,GAAqB,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;YACrE,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI;gBAAE,OAAO,CAAC,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC;YACxD,IAAI,GAAG,CAAC,gBAAgB;gBAAE,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC;YAC1D,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,0EAA0E;IAC1E,MAAM,OAAO,GAAmB,EAAE,CAAC;IAEnC,KAAK,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,IAAI,YAAY,EAAE,CAAC;QACjD,0EAA0E;QAC1E,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QAC3E,OAAO,CAAC,IAAI,CAAC;YACX,OAAO;YACP,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,YAAY;YACxD,UAAU;SACX,CAAC,CAAC;IACL,CAAC;IAED,0EAA0E;IAC1E,mEAAmE;IACnE,KAAK,MAAM,OAAO,IAAI,OAAO,EAAE,CAAC;QAC9B,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YAC/B,OAAO,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;QAC9D,CAAC;IACH,CAAC;IAED,mDAAmD;IACnD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;IAE3D,OAAO,EAAE,OAAO,EAAE,CAAC;AACrB,CAAC"}
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Block extraction from markdown: blockquotes and direct (typographic) quotes.
3
+ *
4
+ * extractBlockquotes walks the mdast for blockquote nodes.
5
+ * extractDirectQuotes uses extractProseLines and searches each prose line
6
+ * for curly/typographic quote pairs or straight-quote pairs of length >= 4.
7
+ */
8
+ export type Blockquote = {
9
+ text: string;
10
+ startLine: number;
11
+ endLine: number;
12
+ };
13
+ export type DirectQuote = {
14
+ text: string;
15
+ line: number;
16
+ };
17
+ export declare function extractBlockquotes(content: string): Blockquote[];
18
+ export declare function extractDirectQuotes(content: string): DirectQuote[];
19
+ //# sourceMappingURL=blocks.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"blocks.d.ts","sourceRoot":"","sources":["../../src/markdown/blocks.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAQH,MAAM,MAAM,UAAU,GAAG;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,MAAM,MAAM,WAAW,GAAG;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd,CAAC;AAEF,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,UAAU,EAAE,CAmBhE;AAwBD,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,MAAM,GAAG,WAAW,EAAE,CAwBlE"}
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Block extraction from markdown: blockquotes and direct (typographic) quotes.
3
+ *
4
+ * extractBlockquotes walks the mdast for blockquote nodes.
5
+ * extractDirectQuotes uses extractProseLines and searches each prose line
6
+ * for curly/typographic quote pairs or straight-quote pairs of length >= 4.
7
+ */
8
+ import { unified } from 'unified';
9
+ import remarkParse from 'remark-parse';
10
+ import { toString as mdastToString } from 'mdast-util-to-string';
11
+ import { extractProseLines } from './prose.js';
12
+ export function extractBlockquotes(content) {
13
+ const tree = unified().use(remarkParse).parse(content);
14
+ const results = [];
15
+ for (const node of tree.children) {
16
+ if (node.type === 'blockquote') {
17
+ const bq = node;
18
+ const pos = bq.position;
19
+ if (pos !== undefined && pos !== null) {
20
+ results.push({
21
+ text: mdastToString(bq),
22
+ startLine: pos.start.line,
23
+ endLine: pos.end.line,
24
+ });
25
+ }
26
+ }
27
+ }
28
+ return results;
29
+ }
30
+ // Opening typographic quote characters (Unicode):
31
+ // U+201C LEFT DOUBLE QUOTATION MARK "
32
+ // U+201E DOUBLE LOW-9 QUOTATION MARK „
33
+ // U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK <<
34
+ // Closing typographic quote characters (Unicode):
35
+ // U+201D RIGHT DOUBLE QUOTATION MARK "
36
+ // U+00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK >>
37
+ //
38
+ // Negated inner class excludes both opening and closing curly quotes so the
39
+ // match stops at the first closing typographic quote.
40
+ //
41
+ // Using \u escapes to avoid any editor/file-write smart-quote substitution.
42
+ // eslint-disable-next-line no-misleading-character-class
43
+ const TYPOGRAPHIC_RE = new RegExp('[“„«]([^“”„«»]{4,})[”»]', 'g');
44
+ // Straight double-quote pairs: "..." where inner content is >= 4 chars.
45
+ // Uses ASCII 0x22 for the quote characters — no smart-quote ambiguity.
46
+ const STRAIGHT_RE = /"([^"]{4,})"/g;
47
+ export function extractDirectQuotes(content) {
48
+ const proseLines = extractProseLines(content);
49
+ const results = [];
50
+ for (const { line, text } of proseLines) {
51
+ TYPOGRAPHIC_RE.lastIndex = 0;
52
+ let match;
53
+ while ((match = TYPOGRAPHIC_RE.exec(text)) !== null) {
54
+ const quoted = match[1];
55
+ if (quoted !== undefined) {
56
+ results.push({ text: quoted, line });
57
+ }
58
+ }
59
+ STRAIGHT_RE.lastIndex = 0;
60
+ while ((match = STRAIGHT_RE.exec(text)) !== null) {
61
+ const quoted = match[1];
62
+ if (quoted !== undefined) {
63
+ results.push({ text: quoted, line });
64
+ }
65
+ }
66
+ }
67
+ return results;
68
+ }
69
+ //# sourceMappingURL=blocks.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"blocks.js","sourceRoot":"","sources":["../../src/markdown/blocks.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,WAAW,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,QAAQ,IAAI,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAEjE,OAAO,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAa/C,MAAM,UAAU,kBAAkB,CAAC,OAAe;IAChD,MAAM,IAAI,GAAG,OAAO,EAAE,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,KAAK,CAAC,OAAO,CAAS,CAAC;IAC/D,MAAM,OAAO,GAAiB,EAAE,CAAC;IAEjC,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QACjC,IAAI,IAAI,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;YAC/B,MAAM,EAAE,GAAG,IAAuB,CAAC;YACnC,MAAM,GAAG,GAAG,EAAE,CAAC,QAAQ,CAAC;YACxB,IAAI,GAAG,KAAK,SAAS,IAAI,GAAG,KAAK,IAAI,EAAE,CAAC;gBACtC,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,aAAa,CAAC,EAAE,CAAC;oBACvB,SAAS,EAAE,GAAG,CAAC,KAAK,CAAC,IAAI;oBACzB,OAAO,EAAE,GAAG,CAAC,GAAG,CAAC,IAAI;iBACtB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,kDAAkD;AAClD,0CAA0C;AAC1C,2CAA2C;AAC3C,0DAA0D;AAC1D,kDAAkD;AAClD,2CAA2C;AAC3C,2DAA2D;AAC3D,EAAE;AACF,4EAA4E;AAC5E,sDAAsD;AACtD,EAAE;AACF,4EAA4E;AAC5E,yDAAyD;AACzD,MAAM,cAAc,GAAG,IAAI,MAAM,CAC/B,yBAAyB,EACzB,GAAG,CACJ,CAAC;AAEF,wEAAwE;AACxE,uEAAuE;AACvE,MAAM,WAAW,GAAG,eAAe,CAAC;AAEpC,MAAM,UAAU,mBAAmB,CAAC,OAAe;IACjD,MAAM,UAAU,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAC9C,MAAM,OAAO,GAAkB,EAAE,CAAC;IAElC,KAAK,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,UAAU,EAAE,CAAC;QACxC,cAAc,CAAC,SAAS,GAAG,CAAC,CAAC;QAC7B,IAAI,KAA6B,CAAC;QAClC,OAAO,CAAC,KAAK,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACpD,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACxB,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;gBACzB,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;YACvC,CAAC;QACH,CAAC;QAED,WAAW,CAAC,SAAS,GAAG,CAAC,CAAC;QAC1B,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACjD,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACxB,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;gBACzB,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;YACvC,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Citekey extraction from markdown prose.
3
+ *
4
+ * Uses extractProseLines to skip code blocks, inline code, and HTML, then
5
+ * parses Pandoc-style citations on each prose line. Handles:
6
+ * - bracketed groups with multiple ;-separated items: `[@a; @b, pp. 33-35]`
7
+ * - author-suppression: `-@key` / `[-@key]`
8
+ * - locators (the suffix after a key within a bracket): `[@key, p. 42]`
9
+ * - bare in-text citations: `@key`
10
+ *
11
+ * Hand-rolled (no citation-parser dependency). Locators are surfaced as the
12
+ * raw suffix string; page-range *validation* is out of scope.
13
+ */
14
+ export type CitekeyReference = {
15
+ citekey: string;
16
+ file: string;
17
+ line: number;
18
+ locator: string | null;
19
+ authorSuppressed: boolean;
20
+ };
21
+ export declare function extractCitekeys(content: string, file: string): CitekeyReference[];
22
+ //# sourceMappingURL=citekeys.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"citekeys.d.ts","sourceRoot":"","sources":["../../src/markdown/citekeys.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAIH,MAAM,MAAM,gBAAgB,GAAG;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,gBAAgB,EAAE,OAAO,CAAC;CAC3B,CAAC;AAkDF,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,gBAAgB,EAAE,CAkDjF"}
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Citekey extraction from markdown prose.
3
+ *
4
+ * Uses extractProseLines to skip code blocks, inline code, and HTML, then
5
+ * parses Pandoc-style citations on each prose line. Handles:
6
+ * - bracketed groups with multiple ;-separated items: `[@a; @b, pp. 33-35]`
7
+ * - author-suppression: `-@key` / `[-@key]`
8
+ * - locators (the suffix after a key within a bracket): `[@key, p. 42]`
9
+ * - bare in-text citations: `@key`
10
+ *
11
+ * Hand-rolled (no citation-parser dependency). Locators are surfaced as the
12
+ * raw suffix string; page-range *validation* is out of scope.
13
+ */
14
+ import { extractProseLines } from './prose.js';
15
+ // Citekey grammar: starts with alpha/digit/underscore/colon; may contain dots
16
+ // and hyphens internally; must end with an alphanumeric char, underscore, or
17
+ // colon (trailing punctuation belongs to the surrounding sentence). Matches the
18
+ // long-standing bibcheck/Pandoc behaviour.
19
+ const KEY = '[a-zA-Z0-9_:][a-zA-Z0-9_:.-]*[a-zA-Z0-9_:]|[a-zA-Z0-9_:]';
20
+ /** A citation token: optional author-suppression `-`, then `@`, then a key. */
21
+ const CITE_TOKEN_RE = new RegExp(`(-?)@(${KEY})`, 'g');
22
+ /** A bracketed group `[ ... ]`; the inner text is inspected for citations. */
23
+ const BRACKET_RE = /\[([^\]]*)\]/g;
24
+ /** Replace inline code spans (`` `...` ``) with equal-length blanks so their
25
+ * contents are never parsed as citations. (prose.ts already strips fenced
26
+ * code blocks and HTML; this covers inline spans within a prose line.) */
27
+ function maskInlineCode(text) {
28
+ return text.replace(/`[^`\n]*`/g, (m) => ' '.repeat(m.length));
29
+ }
30
+ /** Strip a leading comma/whitespace and return the locator, or null if empty. */
31
+ function extractLocator(suffix) {
32
+ const trimmed = suffix.replace(/^[\s,]+/, '').trim();
33
+ return trimmed.length > 0 ? trimmed : null;
34
+ }
35
+ /** Parse a single bracket item (e.g. `see @smith2020, pp. 33-35`). */
36
+ function parseBracketItem(item, file, line, col) {
37
+ CITE_TOKEN_RE.lastIndex = 0;
38
+ const m = CITE_TOKEN_RE.exec(item);
39
+ if (m === null || m[2] === undefined)
40
+ return null;
41
+ const suffix = item.slice(m.index + m[0].length);
42
+ return {
43
+ citekey: m[2],
44
+ file,
45
+ line,
46
+ locator: extractLocator(suffix),
47
+ authorSuppressed: m[1] === '-',
48
+ col,
49
+ };
50
+ }
51
+ export function extractCitekeys(content, file) {
52
+ const proseLines = extractProseLines(content);
53
+ const results = [];
54
+ for (const { line, text: rawText } of proseLines) {
55
+ const text = maskInlineCode(rawText);
56
+ const lineRefs = [];
57
+ // 1. Bracketed citations. Record their spans so the bare-key pass below
58
+ // does not double-count keys that live inside a bracket.
59
+ const spans = [];
60
+ BRACKET_RE.lastIndex = 0;
61
+ let bm;
62
+ while ((bm = BRACKET_RE.exec(text)) !== null) {
63
+ const inner = bm[1];
64
+ if (inner === undefined || !inner.includes('@'))
65
+ continue;
66
+ spans.push([bm.index, bm.index + bm[0].length]);
67
+ for (const rawItem of inner.split(';')) {
68
+ const ref = parseBracketItem(rawItem, file, line, bm.index);
69
+ if (ref !== null)
70
+ lineRefs.push(ref);
71
+ }
72
+ }
73
+ // 2. Bare in-text citations, outside any bracket span.
74
+ let masked = text;
75
+ for (const [start, end] of spans) {
76
+ masked = masked.slice(0, start) + ' '.repeat(end - start) + masked.slice(end);
77
+ }
78
+ CITE_TOKEN_RE.lastIndex = 0;
79
+ let tm;
80
+ while ((tm = CITE_TOKEN_RE.exec(masked)) !== null) {
81
+ if (tm[2] === undefined)
82
+ continue;
83
+ lineRefs.push({
84
+ citekey: tm[2],
85
+ file,
86
+ line,
87
+ locator: null,
88
+ authorSuppressed: tm[1] === '-',
89
+ col: tm.index,
90
+ });
91
+ }
92
+ // Document order within the line.
93
+ lineRefs.sort((a, b) => a.col - b.col);
94
+ for (const { col: _col, ...ref } of lineRefs) {
95
+ results.push(ref);
96
+ }
97
+ }
98
+ return results;
99
+ }
100
+ //# sourceMappingURL=citekeys.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"citekeys.js","sourceRoot":"","sources":["../../src/markdown/citekeys.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAU/C,8EAA8E;AAC9E,6EAA6E;AAC7E,gFAAgF;AAChF,2CAA2C;AAC3C,MAAM,GAAG,GAAG,0DAA0D,CAAC;AAEvE,+EAA+E;AAC/E,MAAM,aAAa,GAAG,IAAI,MAAM,CAAC,SAAS,GAAG,GAAG,EAAE,GAAG,CAAC,CAAC;AAEvD,8EAA8E;AAC9E,MAAM,UAAU,GAAG,eAAe,CAAC;AAEnC;;2EAE2E;AAC3E,SAAS,cAAc,CAAC,IAAY;IAClC,OAAO,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;AACjE,CAAC;AAED,iFAAiF;AACjF,SAAS,cAAc,CAAC,MAAc;IACpC,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IACrD,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7C,CAAC;AAID,sEAAsE;AACtE,SAAS,gBAAgB,CACvB,IAAY,EACZ,IAAY,EACZ,IAAY,EACZ,GAAW;IAEX,aAAa,CAAC,SAAS,GAAG,CAAC,CAAC;IAC5B,MAAM,CAAC,GAAG,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACnC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,SAAS;QAAE,OAAO,IAAI,CAAC;IAClD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IACjD,OAAO;QACL,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;QACb,IAAI;QACJ,IAAI;QACJ,OAAO,EAAE,cAAc,CAAC,MAAM,CAAC;QAC/B,gBAAgB,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG;QAC9B,GAAG;KACJ,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,OAAe,EAAE,IAAY;IAC3D,MAAM,UAAU,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAC9C,MAAM,OAAO,GAAuB,EAAE,CAAC;IAEvC,KAAK,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,UAAU,EAAE,CAAC;QACjD,MAAM,IAAI,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,QAAQ,GAAiB,EAAE,CAAC;QAElC,wEAAwE;QACxE,4DAA4D;QAC5D,MAAM,KAAK,GAA4B,EAAE,CAAC;QAC1C,UAAU,CAAC,SAAS,GAAG,CAAC,CAAC;QACzB,IAAI,EAA0B,CAAC;QAC/B,OAAO,CAAC,EAAE,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC7C,MAAM,KAAK,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;YACpB,IAAI,KAAK,KAAK,SAAS,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC;gBAAE,SAAS;YAC1D,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;YAChD,KAAK,MAAM,OAAO,IAAI,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;gBACvC,MAAM,GAAG,GAAG,gBAAgB,CAAC,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC;gBAC5D,IAAI,GAAG,KAAK,IAAI;oBAAE,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACvC,CAAC;QACH,CAAC;QAED,uDAAuD;QACvD,IAAI,MAAM,GAAG,IAAI,CAAC;QAClB,KAAK,MAAM,CAAC,KAAK,EAAE,GAAG,CAAC,IAAI,KAAK,EAAE,CAAC;YACjC,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,GAAG,KAAK,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAChF,CAAC;QACD,aAAa,CAAC,SAAS,GAAG,CAAC,CAAC;QAC5B,IAAI,EAA0B,CAAC;QAC/B,OAAO,CAAC,EAAE,GAAG,aAAa,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAClD,IAAI,EAAE,CAAC,CAAC,CAAC,KAAK,SAAS;gBAAE,SAAS;YAClC,QAAQ,CAAC,IAAI,CAAC;gBACZ,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;gBACd,IAAI;gBACJ,IAAI;gBACJ,OAAO,EAAE,IAAI;gBACb,gBAAgB,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG;gBAC/B,GAAG,EAAE,EAAE,CAAC,KAAK;aACd,CAAC,CAAC;QACL,CAAC;QAED,kCAAkC;QAClC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;QACvC,KAAK,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,GAAG,EAAE,IAAI,QAAQ,EAAE,CAAC;YAC7C,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACpB,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -0,0 +1,18 @@
1
+ /**
2
+ * File discovery using tinyglobby include/exclude patterns.
3
+ *
4
+ * Returns discovered markdown files as absolute paths with a relative path
5
+ * computed against the cwd. Results are sorted alphabetically by relativePath
6
+ * for deterministic ordering.
7
+ */
8
+ export type DiscoverDocsOptions = {
9
+ cwd: string;
10
+ include: string[];
11
+ exclude?: string[];
12
+ };
13
+ export type DiscoveredDoc = {
14
+ path: string;
15
+ relativePath: string;
16
+ };
17
+ export declare function discoverDocs(opts: DiscoverDocsOptions): Promise<DiscoveredDoc[]>;
18
+ //# sourceMappingURL=glob.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"glob.d.ts","sourceRoot":"","sources":["../../src/markdown/glob.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAKH,MAAM,MAAM,mBAAmB,GAAG;IAChC,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;CACpB,CAAC;AAEF,MAAM,MAAM,aAAa,GAAG;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,CAAC;CACtB,CAAC;AAEF,wBAAsB,YAAY,CAAC,IAAI,EAAE,mBAAmB,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC,CAkBtF"}
@@ -0,0 +1,26 @@
1
+ /**
2
+ * File discovery using tinyglobby include/exclude patterns.
3
+ *
4
+ * Returns discovered markdown files as absolute paths with a relative path
5
+ * computed against the cwd. Results are sorted alphabetically by relativePath
6
+ * for deterministic ordering.
7
+ */
8
+ import { glob } from 'tinyglobby';
9
+ import { relative } from 'node:path';
10
+ export async function discoverDocs(opts) {
11
+ if (opts.include.length === 0) {
12
+ return [];
13
+ }
14
+ const absolutePaths = await glob(opts.include, {
15
+ cwd: opts.cwd,
16
+ ignore: opts.exclude ?? [],
17
+ absolute: true,
18
+ });
19
+ const docs = absolutePaths.map((p) => ({
20
+ path: p,
21
+ relativePath: relative(opts.cwd, p),
22
+ }));
23
+ docs.sort((a, b) => a.relativePath.localeCompare(b.relativePath));
24
+ return docs;
25
+ }
26
+ //# sourceMappingURL=glob.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"glob.js","sourceRoot":"","sources":["../../src/markdown/glob.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAarC,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,IAAyB;IAC1D,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE;QAC7C,GAAG,EAAE,IAAI,CAAC,GAAG;QACb,MAAM,EAAE,IAAI,CAAC,OAAO,IAAI,EAAE;QAC1B,QAAQ,EAAE,IAAI;KACf,CAAC,CAAC;IAEH,MAAM,IAAI,GAAoB,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACtD,IAAI,EAAE,CAAC;QACP,YAAY,EAAE,QAAQ,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;KACpC,CAAC,CAAC,CAAC;IAEJ,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,aAAa,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC;IAClE,OAAO,IAAI,CAAC;AACd,CAAC"}
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Shared helper: extract prose lines from markdown content.
3
+ *
4
+ * Parses the content via remark/unified to an mdast AST and walks the tree,
5
+ * skipping nodes whose content should not be treated as prose (code blocks,
6
+ * inline code, raw HTML, YAML/TOML front-matter). Returns one ProseLine per
7
+ * source line that contains prose, sorted and deduplicated by line number.
8
+ *
9
+ * YAML/TOML front-matter is stripped before AST parsing because remark-parse
10
+ * alone does not emit yaml/toml AST nodes — the remark-frontmatter plugin is
11
+ * required for that. Instead, we detect the standard --- / +++ delimiters
12
+ * manually and record which lines to exclude from the result.
13
+ */
14
+ export type ProseLine = {
15
+ line: number;
16
+ text: string;
17
+ };
18
+ export declare function extractProseLines(content: string): ProseLine[];
19
+ //# sourceMappingURL=prose.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prose.d.ts","sourceRoot":"","sources":["../../src/markdown/prose.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAMH,MAAM,MAAM,SAAS,GAAG;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd,CAAC;AA6DF,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,SAAS,EAAE,CAwB9D"}