@de-otio/bibcheck 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +147 -0
- package/dist/cache/fs-cache.d.ts +55 -0
- package/dist/cache/fs-cache.d.ts.map +1 -0
- package/dist/cache/fs-cache.js +264 -0
- package/dist/cache/fs-cache.js.map +1 -0
- package/dist/canonical.d.ts +29 -0
- package/dist/canonical.d.ts.map +1 -0
- package/dist/canonical.js +132 -0
- package/dist/canonical.js.map +1 -0
- package/dist/check.d.ts +140 -0
- package/dist/check.d.ts.map +1 -0
- package/dist/check.js +646 -0
- package/dist/check.js.map +1 -0
- package/dist/cli.d.ts +19 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +357 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +175 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +180 -0
- package/dist/config.js.map +1 -0
- package/dist/databases/crossref.d.ts +53 -0
- package/dist/databases/crossref.d.ts.map +1 -0
- package/dist/databases/crossref.js +138 -0
- package/dist/databases/crossref.js.map +1 -0
- package/dist/databases/index.d.ts +12 -0
- package/dist/databases/index.d.ts.map +1 -0
- package/dist/databases/index.js +9 -0
- package/dist/databases/index.js.map +1 -0
- package/dist/databases/openalex.d.ts +29 -0
- package/dist/databases/openalex.d.ts.map +1 -0
- package/dist/databases/openalex.js +117 -0
- package/dist/databases/openalex.js.map +1 -0
- package/dist/databases/openlibrary.d.ts +26 -0
- package/dist/databases/openlibrary.d.ts.map +1 -0
- package/dist/databases/openlibrary.js +79 -0
- package/dist/databases/openlibrary.js.map +1 -0
- package/dist/databases/worldcat.d.ts +33 -0
- package/dist/databases/worldcat.d.ts.map +1 -0
- package/dist/databases/worldcat.js +145 -0
- package/dist/databases/worldcat.js.map +1 -0
- package/dist/doctor.d.ts +44 -0
- package/dist/doctor.d.ts.map +1 -0
- package/dist/doctor.js +386 -0
- package/dist/doctor.js.map +1 -0
- package/dist/existence.d.ts +70 -0
- package/dist/existence.d.ts.map +1 -0
- package/dist/existence.js +308 -0
- package/dist/existence.js.map +1 -0
- package/dist/http.d.ts +97 -0
- package/dist/http.d.ts.map +1 -0
- package/dist/http.js +543 -0
- package/dist/http.js.map +1 -0
- package/dist/identifiers.d.ts +44 -0
- package/dist/identifiers.d.ts.map +1 -0
- package/dist/identifiers.js +111 -0
- package/dist/identifiers.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -0
- package/dist/linkage.d.ts +29 -0
- package/dist/linkage.d.ts.map +1 -0
- package/dist/linkage.js +73 -0
- package/dist/linkage.js.map +1 -0
- package/dist/markdown/blocks.d.ts +19 -0
- package/dist/markdown/blocks.d.ts.map +1 -0
- package/dist/markdown/blocks.js +69 -0
- package/dist/markdown/blocks.js.map +1 -0
- package/dist/markdown/citekeys.d.ts +22 -0
- package/dist/markdown/citekeys.d.ts.map +1 -0
- package/dist/markdown/citekeys.js +100 -0
- package/dist/markdown/citekeys.js.map +1 -0
- package/dist/markdown/glob.d.ts +18 -0
- package/dist/markdown/glob.d.ts.map +1 -0
- package/dist/markdown/glob.js +26 -0
- package/dist/markdown/glob.js.map +1 -0
- package/dist/markdown/prose.d.ts +19 -0
- package/dist/markdown/prose.d.ts.map +1 -0
- package/dist/markdown/prose.js +81 -0
- package/dist/markdown/prose.js.map +1 -0
- package/dist/output/json.d.ts +21 -0
- package/dist/output/json.d.ts.map +1 -0
- package/dist/output/json.js +24 -0
- package/dist/output/json.js.map +1 -0
- package/dist/output/markdown.d.ts +21 -0
- package/dist/output/markdown.d.ts.map +1 -0
- package/dist/output/markdown.js +194 -0
- package/dist/output/markdown.js.map +1 -0
- package/dist/output/sarif.d.ts +31 -0
- package/dist/output/sarif.d.ts.map +1 -0
- package/dist/output/sarif.js +322 -0
- package/dist/output/sarif.js.map +1 -0
- package/dist/output/text.d.ts +27 -0
- package/dist/output/text.d.ts.map +1 -0
- package/dist/output/text.js +212 -0
- package/dist/output/text.js.map +1 -0
- package/dist/phrases/load.d.ts +34 -0
- package/dist/phrases/load.d.ts.map +1 -0
- package/dist/phrases/load.js +148 -0
- package/dist/phrases/load.js.map +1 -0
- package/dist/phrases.d.ts +27 -0
- package/dist/phrases.d.ts.map +1 -0
- package/dist/phrases.js +116 -0
- package/dist/phrases.js.map +1 -0
- package/dist/schema/csl.d.ts +429 -0
- package/dist/schema/csl.d.ts.map +1 -0
- package/dist/schema/csl.js +101 -0
- package/dist/schema/csl.js.map +1 -0
- package/dist/schema/output.d.ts +1116 -0
- package/dist/schema/output.d.ts.map +1 -0
- package/dist/schema/output.js +419 -0
- package/dist/schema/output.js.map +1 -0
- package/dist/suppression.d.ts +106 -0
- package/dist/suppression.d.ts.map +1 -0
- package/dist/suppression.js +134 -0
- package/dist/suppression.js.map +1 -0
- package/dist/version.d.ts +11 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +14 -0
- package/dist/version.js.map +1 -0
- package/dist/worklist.d.ts +32 -0
- package/dist/worklist.d.ts.map +1 -0
- package/dist/worklist.js +211 -0
- package/dist/worklist.js.map +1 -0
- package/package.json +82 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Richard Myers
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# bibcheck
|
|
2
|
+
|
|
3
|
+
Catch citations an LLM invented — fabricated DOIs, non-existent ISBNs,
|
|
4
|
+
plausible-but-wrong identifiers — before they reach your bibliography or your
|
|
5
|
+
readers. bibcheck verifies that the works your sources describe actually exist
|
|
6
|
+
in scholarly databases, that their identifiers are well-formed, and that URLs
|
|
7
|
+
for pre-DOI primary sources point to trusted canonical editions.
|
|
8
|
+
|
|
9
|
+
> **Status: v0.1 — initial release.** CLI surface, JSON output schema, and
|
|
10
|
+
> configuration grammar are stable within the v0.x major; minor bumps are
|
|
11
|
+
> additive.
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
Requires Node.js >= 20.
|
|
16
|
+
|
|
17
|
+
```sh
|
|
18
|
+
# one-off, no install
|
|
19
|
+
npx @de-otio/bibcheck check
|
|
20
|
+
|
|
21
|
+
# or install globally
|
|
22
|
+
npm install -g @de-otio/bibcheck
|
|
23
|
+
bibcheck check
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
bibcheck looks for a `bibcheck.toml` in the working directory (it also runs with
|
|
27
|
+
sensible defaults and none at all). See [docs/usage.md](docs/usage.md) for a
|
|
28
|
+
quick start and [docs/configuration.md](docs/configuration.md) for the config
|
|
29
|
+
reference.
|
|
30
|
+
|
|
31
|
+
## Why bibcheck
|
|
32
|
+
|
|
33
|
+
AI-assisted research workflows regularly produce citations that look real but
|
|
34
|
+
aren't: a DOI where the last four digits are transposed, an ISBN whose check
|
|
35
|
+
digit is wrong, a journal article that was never published. These hallucinations
|
|
36
|
+
pass a spell-check and a format linter. They do not pass bibcheck.
|
|
37
|
+
|
|
38
|
+
bibcheck's existence check is default-gating: if a DOI or ISBN that your
|
|
39
|
+
bibliography records is absent from CrossRef, OpenAlex, and OpenLibrary, the
|
|
40
|
+
build fails. A malformed identifier — one that fails the structural rules
|
|
41
|
+
before any network call — fails even faster. The aim is to make fabricated
|
|
42
|
+
citations fail CI before they reach a reader.
|
|
43
|
+
|
|
44
|
+
## Verification boundary
|
|
45
|
+
|
|
46
|
+
**"Verified" means**: the work exists in CrossRef/OpenAlex/OpenLibrary and its
|
|
47
|
+
recorded metadata (title, first author) agrees with what your bibliography
|
|
48
|
+
says. That is a necessary check, not a sufficient one.
|
|
49
|
+
|
|
50
|
+
**bibcheck does not check** whether the cited source supports the claim your
|
|
51
|
+
prose is making. That is a human judgment, and bibcheck surfaces it as a manual
|
|
52
|
+
worklist item (`notCheckedFor: ["claim-support"]` in the JSON output). The
|
|
53
|
+
worklist is the bridge between automated and manual verification.
|
|
54
|
+
|
|
55
|
+
There are no numeric confidence scores anywhere in bibcheck's output. The
|
|
56
|
+
output carries a defined evidence vocabulary (`exists-metadata-match`,
|
|
57
|
+
`exists-metadata-mismatch`, `absent`, `unverifiable`) so downstream consumers
|
|
58
|
+
— including LLM agents — cannot read `"verified"` as "the citation's claim is
|
|
59
|
+
sound."
|
|
60
|
+
|
|
61
|
+
## What it does
|
|
62
|
+
|
|
63
|
+
- **Existence verification (default-gating).** For each bibliography entry,
|
|
64
|
+
checks DOI/ISBN/title against CrossRef, OpenAlex, and OpenLibrary. Absence
|
|
65
|
+
from all applicable databases is treated as a fabrication signal and fails
|
|
66
|
+
`bibcheck check` by default.
|
|
67
|
+
- **Identifier well-formedness (default-gating, pre-network).** Validates DOI
|
|
68
|
+
structure, ISBN check digits, and URL scheme locally, before any network
|
|
69
|
+
call. A malformed identifier short-circuits the existence lookup and is a
|
|
70
|
+
strong, cheap fabrication signal.
|
|
71
|
+
- **Canonical-edition URL verification.** For pre-DOI primary sources, checks
|
|
72
|
+
that each entry carries a `url:` pointing to a trusted canonical-edition host
|
|
73
|
+
(HathiTrust, Internet Archive, Liberty Fund OLL, Stanford Encyclopedia of
|
|
74
|
+
Philosophy archives, PhilPapers, national-library catalogues) and that the
|
|
75
|
+
URL is live.
|
|
76
|
+
- **Pandoc-citeproc-style linkage check.** Every `@citekey` reference in your
|
|
77
|
+
markdown documents resolves to an entry in the bibliography (handling the
|
|
78
|
+
full citation grammar — `[@a; @b, p. 5]`, author-suppression `-@key`,
|
|
79
|
+
locators). Deterministic CI-safe alternative to `pandoc --citeproc`'s
|
|
80
|
+
render-time warning. Also flags **orphaned entries** — bibliography entries
|
|
81
|
+
never cited in any document — as an informational (non-gating) signal that an
|
|
82
|
+
LLM may have padded the reference list.
|
|
83
|
+
- **Structured human-triage worklist.** Emits manual-verification items —
|
|
84
|
+
direct quotations, page-cited paraphrases, citations to contested-coverage
|
|
85
|
+
source types, non-canonical editions — with pre-filled verification URLs and
|
|
86
|
+
explicit `notCheckedFor: ["claim-support"]` annotations.
|
|
87
|
+
- **Versioned structured output.** JSON / Markdown / SARIF, schema versioned at
|
|
88
|
+
`0.3.0`. Designed for LLM agents, CI pipelines, and editor extensions.
|
|
89
|
+
|
|
90
|
+
bibcheck also exposes an **opt-in phrase denylist** (`bibcheck phrases`): a
|
|
91
|
+
regex pass over prose against patterns the project supplies via
|
|
92
|
+
`[phrases] file = "..."` in `bibcheck.toml`. Useful for style-guide
|
|
93
|
+
deprecations, retracted-source wording, or in-house terminology drift.
|
|
94
|
+
bibcheck does not ship a curated baseline — the feature is a configurable
|
|
95
|
+
lint, not curated guidance. Acknowledge an intentional match with
|
|
96
|
+
`<!-- bibcheck-allow: <key> -->` in the prose.
|
|
97
|
+
|
|
98
|
+
## What it does not do
|
|
99
|
+
|
|
100
|
+
- Render bibliography output. Use `pandoc --citeproc` or `citation-js` directly.
|
|
101
|
+
- Take PDF input. Use FiCi / ValiRef / cite_verify_cli for that.
|
|
102
|
+
- Verify quotation wording or whether the cited source supports the prose's
|
|
103
|
+
claim. This is surfaced as a manual worklist item, not automated.
|
|
104
|
+
- Run without a network connection. bibcheck requires internet access to
|
|
105
|
+
perform existence checks. See [docs/usage.md](docs/usage.md) for the failure
|
|
106
|
+
mode.
|
|
107
|
+
- Edit bibliography or docs. Reports findings; does not modify files.
|
|
108
|
+
|
|
109
|
+
## Internet required
|
|
110
|
+
|
|
111
|
+
bibcheck requires a live internet connection to verify existence. Running
|
|
112
|
+
without network access produces a clear error (transport failure logged
|
|
113
|
+
against affected entries); it does not silently degrade to "unverifiable."
|
|
114
|
+
|
|
115
|
+
## Status
|
|
116
|
+
|
|
117
|
+
v0.1. All seven subcommands are implemented. The output schema is at `0.3.0`.
|
|
118
|
+
See [docs/usage.md](docs/usage.md) for usage and
|
|
119
|
+
[docs/output-schema.md](docs/output-schema.md) for the JSON contract.
|
|
120
|
+
|
|
121
|
+
## Documentation
|
|
122
|
+
|
|
123
|
+
- [docs/usage.md](docs/usage.md) — installation, quick start, subcommands, CI
|
|
124
|
+
integration, exit codes, suppression workflow.
|
|
125
|
+
- [docs/configuration.md](docs/configuration.md) — full `bibcheck.toml`
|
|
126
|
+
reference including gating rules and per-entry suppression.
|
|
127
|
+
- [docs/output-schema.md](docs/output-schema.md) — JSON output schema contract
|
|
128
|
+
for downstream consumers (schema `0.3.0`).
|
|
129
|
+
- [docs/extending.md](docs/extending.md) — adding database clients, output
|
|
130
|
+
formats, and subcommands.
|
|
131
|
+
- [SECURITY.md](SECURITY.md) — security policy, data handling, and
|
|
132
|
+
vulnerability reporting.
|
|
133
|
+
- [RELEASING.md](RELEASING.md) — maintainer release checklist including
|
|
134
|
+
OIDC/npm Trusted Publishing notes.
|
|
135
|
+
|
|
136
|
+
## Development
|
|
137
|
+
|
|
138
|
+
```sh
|
|
139
|
+
npm install
|
|
140
|
+
npm run build # compile TypeScript to dist/
|
|
141
|
+
npm test # run vitest
|
|
142
|
+
npm run typecheck # tsc --noEmit
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## License
|
|
146
|
+
|
|
147
|
+
[MIT](LICENSE).
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Filesystem-backed TTL cache for external API responses.
|
|
3
|
+
*
|
|
4
|
+
* Uses keyv + keyv-file for persistence. Every stored value is wrapped in an
|
|
5
|
+
* envelope that carries a schema version and an explicit expiresAt timestamp so
|
|
6
|
+
* that version mismatches and TTL expiry are enforced independently of keyv's
|
|
7
|
+
* own TTL mechanism. This lets us invalidate the entire cache by bumping the
|
|
8
|
+
* version constant without touching the backing file.
|
|
9
|
+
*
|
|
10
|
+
* Known limitation: two concurrent bibcheck runs writing to the same cache
|
|
11
|
+
* directory race on the underlying JSON file. keyv-file uses a write-delay
|
|
12
|
+
* debounce rather than an atomic rename, so the last writer wins. This is
|
|
13
|
+
* acceptable for v0.1; a future task may add file locking.
|
|
14
|
+
*/
|
|
15
|
+
export interface Logger {
|
|
16
|
+
warn(event: string, ctx?: Record<string, unknown>): void;
|
|
17
|
+
}
|
|
18
|
+
export interface Cache {
|
|
19
|
+
get<T>(key: string, signal?: AbortSignal): Promise<T | null>;
|
|
20
|
+
set<T>(key: string, value: T, opts?: {
|
|
21
|
+
ttlMs?: number;
|
|
22
|
+
}): Promise<void>;
|
|
23
|
+
invalidate(key: string): Promise<void>;
|
|
24
|
+
clear(): Promise<void>;
|
|
25
|
+
}
|
|
26
|
+
export interface FsCacheOptions {
|
|
27
|
+
/** Base directory for the cache file. Created on first write. */
|
|
28
|
+
dir: string;
|
|
29
|
+
/** Default TTL in milliseconds. Defaults to 30 days. */
|
|
30
|
+
defaultTtlMs?: number;
|
|
31
|
+
/** Maximum total size of the cache file in megabytes. null = unlimited. */
|
|
32
|
+
maxSizeMb?: number | null;
|
|
33
|
+
/** Injectable clock for deterministic tests. */
|
|
34
|
+
clock?: {
|
|
35
|
+
now(): number;
|
|
36
|
+
};
|
|
37
|
+
/** Cache schema version. Bumping this invalidates all existing entries. */
|
|
38
|
+
version?: string;
|
|
39
|
+
/** Optional logger for operational warnings. */
|
|
40
|
+
logger?: Logger;
|
|
41
|
+
}
|
|
42
|
+
export declare class CacheError extends Error {
|
|
43
|
+
name: "CacheError";
|
|
44
|
+
constructor(message: string, options?: ErrorOptions);
|
|
45
|
+
}
|
|
46
|
+
export declare function createFsCache(opts: FsCacheOptions): Cache;
|
|
47
|
+
export declare function createMemoryCache(opts?: {
|
|
48
|
+
defaultTtlMs?: number;
|
|
49
|
+
clock?: {
|
|
50
|
+
now(): number;
|
|
51
|
+
};
|
|
52
|
+
}): Cache;
|
|
53
|
+
/** Remove all files inside a cache directory (not the directory itself). */
|
|
54
|
+
export declare function clearCacheDir(dir: string): Promise<void>;
|
|
55
|
+
//# sourceMappingURL=fs-cache.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fs-cache.d.ts","sourceRoot":"","sources":["../../src/cache/fs-cache.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAaH,MAAM,WAAW,MAAM;IACrB,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;CAC1D;AAMD,MAAM,WAAW,KAAK;IACpB,GAAG,CAAC,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC;IAC7D,GAAG,CAAC,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,CAAC,EAAE;QAAE,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACxE,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACvC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACxB;AAMD,MAAM,WAAW,cAAc;IAC7B,iEAAiE;IACjE,GAAG,EAAE,MAAM,CAAC;IACZ,wDAAwD;IACxD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,2EAA2E;IAC3E,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,gDAAgD;IAChD,KAAK,CAAC,EAAE;QAAE,GAAG,IAAI,MAAM,CAAA;KAAE,CAAC;IAC1B,2EAA2E;IAC3E,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,gDAAgD;IAChD,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAMD,qBAAa,UAAW,SAAQ,KAAK;IAC1B,IAAI,EAAG,YAAY,CAAU;gBAC1B,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY;CAGpD;AA6BD,wBAAgB,aAAa,CAAC,IAAI,EAAE,cAAc,GAAG,KAAK,CAuGzD;AA2GD,wBAAgB,iBAAiB,CAAC,IAAI,CAAC,EAAE;IACvC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE;QAAE,GAAG,IAAI,MAAM,CAAA;KAAE,CAAC;CAC3B,GAAG,KAAK,CAiCR;AAMD,4EAA4E;AAC5E,wBAAsB,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAU9D"}
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Filesystem-backed TTL cache for external API responses.
|
|
3
|
+
*
|
|
4
|
+
* Uses keyv + keyv-file for persistence. Every stored value is wrapped in an
|
|
5
|
+
* envelope that carries a schema version and an explicit expiresAt timestamp so
|
|
6
|
+
* that version mismatches and TTL expiry are enforced independently of keyv's
|
|
7
|
+
* own TTL mechanism. This lets us invalidate the entire cache by bumping the
|
|
8
|
+
* version constant without touching the backing file.
|
|
9
|
+
*
|
|
10
|
+
* Known limitation: two concurrent bibcheck runs writing to the same cache
|
|
11
|
+
* directory race on the underlying JSON file. keyv-file uses a write-delay
|
|
12
|
+
* debounce rather than an atomic rename, so the last writer wins. This is
|
|
13
|
+
* acceptable for v0.1; a future task may add file locking.
|
|
14
|
+
*/
|
|
15
|
+
import { createHash } from 'node:crypto';
|
|
16
|
+
import { stat, readFile, readdir, rm } from 'node:fs/promises';
|
|
17
|
+
import path from 'node:path';
|
|
18
|
+
import { Keyv } from 'keyv';
|
|
19
|
+
import { KeyvFile } from 'keyv-file';
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
// Error
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
export class CacheError extends Error {
|
|
24
|
+
name = 'CacheError';
|
|
25
|
+
constructor(message, options) {
|
|
26
|
+
super(message, options);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
// Internal types
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
const DEFAULT_TTL_MS = 30 * 24 * 60 * 60 * 1000; // 30 days
|
|
33
|
+
const DEFAULT_VERSION = '1';
|
|
34
|
+
function hashKey(key) {
|
|
35
|
+
return createHash('sha256').update(key).digest('hex');
|
|
36
|
+
}
|
|
37
|
+
function checkAbort(signal) {
|
|
38
|
+
if (signal?.aborted === true) {
|
|
39
|
+
throw signal.reason;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
// createFsCache
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
export function createFsCache(opts) {
|
|
46
|
+
const { dir, defaultTtlMs = DEFAULT_TTL_MS, maxSizeMb = null, clock = { now: () => Date.now() }, version = DEFAULT_VERSION, logger, } = opts;
|
|
47
|
+
const filename = path.join(dir, 'cache.json');
|
|
48
|
+
const store = new KeyvFile({ filename, writeDelay: 0 });
|
|
49
|
+
const keyv = new Keyv({ store, useKeyPrefix: false });
|
|
50
|
+
async function get(key, signal) {
|
|
51
|
+
checkAbort(signal);
|
|
52
|
+
const hash = hashKey(key);
|
|
53
|
+
let raw;
|
|
54
|
+
try {
|
|
55
|
+
raw = await keyv.get(hash);
|
|
56
|
+
}
|
|
57
|
+
catch (err) {
|
|
58
|
+
logger?.warn('cache.get.error', { key, error: String(err) });
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
if (raw === undefined || raw === null) {
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
// Validate envelope shape — if keyv-file holds corrupt/non-envelope data,
|
|
65
|
+
// treat it as a miss and clean up.
|
|
66
|
+
if (!isEnvelope(raw)) {
|
|
67
|
+
logger?.warn('cache.corrupt', { key });
|
|
68
|
+
await keyv.delete(hash).catch(() => undefined);
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
const envelope = raw;
|
|
72
|
+
if (envelope.version !== version) {
|
|
73
|
+
logger?.warn('cache.version_mismatch', {
|
|
74
|
+
key,
|
|
75
|
+
stored: envelope.version,
|
|
76
|
+
expected: version,
|
|
77
|
+
});
|
|
78
|
+
await keyv.delete(hash).catch(() => undefined);
|
|
79
|
+
return null;
|
|
80
|
+
}
|
|
81
|
+
if (clock.now() > envelope.expiresAt) {
|
|
82
|
+
await keyv.delete(hash).catch(() => undefined);
|
|
83
|
+
return null;
|
|
84
|
+
}
|
|
85
|
+
return envelope.value;
|
|
86
|
+
}
|
|
87
|
+
async function set(key, value, setOpts) {
|
|
88
|
+
checkAbort(undefined);
|
|
89
|
+
const ttlMs = setOpts?.ttlMs ?? defaultTtlMs;
|
|
90
|
+
const hash = hashKey(key);
|
|
91
|
+
const expiresAt = clock.now() + ttlMs;
|
|
92
|
+
const envelope = { version, value, expiresAt };
|
|
93
|
+
try {
|
|
94
|
+
// Pass ttlMs to keyv so it can handle native expiry via keyv-file; our
|
|
95
|
+
// envelope's expiresAt is the authoritative TTL guard on read.
|
|
96
|
+
await keyv.set(hash, envelope, ttlMs);
|
|
97
|
+
}
|
|
98
|
+
catch (err) {
|
|
99
|
+
throw new CacheError(`Failed to write cache entry for key "${key}"`, { cause: err });
|
|
100
|
+
}
|
|
101
|
+
// LRU-style size eviction: simple "if over cap, clear half" sweep.
|
|
102
|
+
// A proper LRU would track access order; for v0.1 we just evict the
|
|
103
|
+
// oldest half by expiresAt when the file exceeds maxSizeMb.
|
|
104
|
+
if (maxSizeMb !== null && maxSizeMb > 0) {
|
|
105
|
+
await maybeSweep(filename, maxSizeMb, keyv, clock);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
async function invalidate(key) {
|
|
109
|
+
checkAbort(undefined);
|
|
110
|
+
const hash = hashKey(key);
|
|
111
|
+
try {
|
|
112
|
+
await keyv.delete(hash);
|
|
113
|
+
}
|
|
114
|
+
catch (err) {
|
|
115
|
+
throw new CacheError(`Failed to invalidate cache entry for key "${key}"`, { cause: err });
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
async function clear() {
|
|
119
|
+
try {
|
|
120
|
+
await keyv.clear();
|
|
121
|
+
}
|
|
122
|
+
catch (err) {
|
|
123
|
+
throw new CacheError('Failed to clear cache', { cause: err });
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return { get, set, invalidate, clear };
|
|
127
|
+
}
|
|
128
|
+
// ---------------------------------------------------------------------------
|
|
129
|
+
// Size eviction helper
|
|
130
|
+
// ---------------------------------------------------------------------------
|
|
131
|
+
async function maybeSweep(filename, maxSizeMb, keyv, clock) {
|
|
132
|
+
let fileStat;
|
|
133
|
+
try {
|
|
134
|
+
fileStat = await stat(filename);
|
|
135
|
+
}
|
|
136
|
+
catch {
|
|
137
|
+
// File not yet written; nothing to sweep.
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
const sizeMb = fileStat.size / (1024 * 1024);
|
|
141
|
+
if (sizeMb <= maxSizeMb) {
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
// Collect all entries and sort oldest-first by expiresAt; evict the bottom
|
|
145
|
+
// half. Simple strategy: O(n) scan, O(n log n) sort, O(n/2) deletes.
|
|
146
|
+
//
|
|
147
|
+
// We read the backing JSON file directly because keyv-file's iterator
|
|
148
|
+
// filters by namespace, which conflicts with useKeyPrefix:false (our keys
|
|
149
|
+
// are raw SHA-256 hashes and never contain the 'keyv' namespace token).
|
|
150
|
+
const entries = [];
|
|
151
|
+
try {
|
|
152
|
+
const raw = await readFile(filename, 'utf8');
|
|
153
|
+
// keyv-file serialises the store as { cache: [[key, wrappedValue], ...], lastExpire: number }.
|
|
154
|
+
const parsed = JSON.parse(raw);
|
|
155
|
+
if (typeof parsed === 'object' &&
|
|
156
|
+
parsed !== null &&
|
|
157
|
+
Array.isArray(parsed['cache'])) {
|
|
158
|
+
for (const item of parsed['cache']) {
|
|
159
|
+
if (!Array.isArray(item) || item.length < 2)
|
|
160
|
+
continue;
|
|
161
|
+
const [key, wrapped] = item;
|
|
162
|
+
if (typeof key !== 'string')
|
|
163
|
+
continue;
|
|
164
|
+
// keyv-file wraps values as { value: serialised-string, expire?: number }
|
|
165
|
+
// The inner 'value' is a JSON string produced by @keyv/serialize.
|
|
166
|
+
let innerValue = null;
|
|
167
|
+
if (typeof wrapped === 'object' &&
|
|
168
|
+
wrapped !== null &&
|
|
169
|
+
'value' in wrapped) {
|
|
170
|
+
const wv = wrapped['value'];
|
|
171
|
+
if (typeof wv === 'string') {
|
|
172
|
+
try {
|
|
173
|
+
const decoded = JSON.parse(wv);
|
|
174
|
+
// @keyv/serialize wraps as { value: actualData, expires?: number }
|
|
175
|
+
innerValue = decoded['value'] ?? decoded;
|
|
176
|
+
}
|
|
177
|
+
catch {
|
|
178
|
+
innerValue = wv;
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
else {
|
|
182
|
+
innerValue = wv;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
if (isEnvelope(innerValue)) {
|
|
186
|
+
entries.push({ key, expiresAt: innerValue.expiresAt });
|
|
187
|
+
}
|
|
188
|
+
else {
|
|
189
|
+
entries.push({ key, expiresAt: clock.now() - 1 });
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
catch {
|
|
195
|
+
// Parse or read failure; skip sweep.
|
|
196
|
+
return;
|
|
197
|
+
}
|
|
198
|
+
if (entries.length === 0)
|
|
199
|
+
return;
|
|
200
|
+
entries.sort((a, b) => a.expiresAt - b.expiresAt);
|
|
201
|
+
const evictCount = Math.max(1, Math.floor(entries.length / 2));
|
|
202
|
+
const toEvict = entries.slice(0, evictCount);
|
|
203
|
+
for (const { key } of toEvict) {
|
|
204
|
+
await keyv.delete(key).catch(() => undefined);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
// ---------------------------------------------------------------------------
|
|
208
|
+
// Type guard for stored envelopes
|
|
209
|
+
// ---------------------------------------------------------------------------
|
|
210
|
+
function isEnvelope(val) {
|
|
211
|
+
if (typeof val !== 'object' || val === null)
|
|
212
|
+
return false;
|
|
213
|
+
const v = val;
|
|
214
|
+
return (typeof v['version'] === 'string' &&
|
|
215
|
+
typeof v['expiresAt'] === 'number' &&
|
|
216
|
+
'value' in v);
|
|
217
|
+
}
|
|
218
|
+
// ---------------------------------------------------------------------------
|
|
219
|
+
// createMemoryCache
|
|
220
|
+
// ---------------------------------------------------------------------------
|
|
221
|
+
export function createMemoryCache(opts) {
|
|
222
|
+
const defaultTtlMs = opts?.defaultTtlMs ?? DEFAULT_TTL_MS;
|
|
223
|
+
const clock = opts?.clock ?? { now: () => Date.now() };
|
|
224
|
+
const store = new Map();
|
|
225
|
+
async function get(key, signal) {
|
|
226
|
+
checkAbort(signal);
|
|
227
|
+
const entry = store.get(key);
|
|
228
|
+
if (entry === undefined)
|
|
229
|
+
return null;
|
|
230
|
+
if (clock.now() > entry.expiresAt) {
|
|
231
|
+
store.delete(key);
|
|
232
|
+
return null;
|
|
233
|
+
}
|
|
234
|
+
return entry.value;
|
|
235
|
+
}
|
|
236
|
+
async function set(key, value, setOpts) {
|
|
237
|
+
checkAbort(undefined);
|
|
238
|
+
const ttlMs = setOpts?.ttlMs ?? defaultTtlMs;
|
|
239
|
+
store.set(key, { value, expiresAt: clock.now() + ttlMs });
|
|
240
|
+
}
|
|
241
|
+
async function invalidate(key) {
|
|
242
|
+
checkAbort(undefined);
|
|
243
|
+
store.delete(key);
|
|
244
|
+
}
|
|
245
|
+
async function clear() {
|
|
246
|
+
store.clear();
|
|
247
|
+
}
|
|
248
|
+
return { get, set, invalidate, clear };
|
|
249
|
+
}
|
|
250
|
+
// ---------------------------------------------------------------------------
|
|
251
|
+
// Re-export dir cleanup helper for tests
|
|
252
|
+
// ---------------------------------------------------------------------------
|
|
253
|
+
/** Remove all files inside a cache directory (not the directory itself). */
|
|
254
|
+
export async function clearCacheDir(dir) {
|
|
255
|
+
let entries;
|
|
256
|
+
try {
|
|
257
|
+
entries = await readdir(dir);
|
|
258
|
+
}
|
|
259
|
+
catch {
|
|
260
|
+
return;
|
|
261
|
+
}
|
|
262
|
+
await Promise.all(entries.map((entry) => rm(path.join(dir, entry), { recursive: true, force: true })));
|
|
263
|
+
}
|
|
264
|
+
//# sourceMappingURL=fs-cache.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fs-cache.js","sourceRoot":"","sources":["../../src/cache/fs-cache.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,kBAAkB,CAAC;AAC/D,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAyCrC,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,MAAM,OAAO,UAAW,SAAQ,KAAK;IAC1B,IAAI,GAAG,YAAqB,CAAC;IACtC,YAAY,OAAe,EAAE,OAAsB;QACjD,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IAC1B,CAAC;CACF;AAED,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E;AAE9E,MAAM,cAAc,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,CAAC,UAAU;AAC3D,MAAM,eAAe,GAAG,GAAG,CAAC;AAQ5B,SAAS,OAAO,CAAC,GAAW;IAC1B,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AACxD,CAAC;AAED,SAAS,UAAU,CAAC,MAA+B;IACjD,IAAI,MAAM,EAAE,OAAO,KAAK,IAAI,EAAE,CAAC;QAC7B,MAAM,MAAM,CAAC,MAAe,CAAC;IAC/B,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,gBAAgB;AAChB,8EAA8E;AAE9E,MAAM,UAAU,aAAa,CAAC,IAAoB;IAChD,MAAM,EACJ,GAAG,EACH,YAAY,GAAG,cAAc,EAC7B,SAAS,GAAG,IAAI,EAChB,KAAK,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,EACjC,OAAO,GAAG,eAAe,EACzB,MAAM,GACP,GAAG,IAAI,CAAC;IAET,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;IAE9C,MAAM,KAAK,GAAG,IAAI,QAAQ,CAAC,EAAE,QAAQ,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC,CAAC;IACxD,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,EAAE,KAAK,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC,CAAC;IAEtD,KAAK,UAAU,GAAG,CAAI,GAAW,EAAE,MAAoB;QACrD,UAAU,CAAC,MAAM,CAAC,CAAC;QAEnB,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC;QAC1B,IAAI,GAAY,CAAC;QACjB,IAAI,CAAC;YACH,GAAG,GAAG,MAAM,IAAI,CAAC,GAAG,CAAU,IAAI,CAAC,CAAC;QACtC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,EAAE,IAAI,CAAC,iBAAiB,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC7D,OAAO,IAAI,CAAC;QACd,CAAC;QAED,IAAI,GAAG,KAAK,SAAS,IAAI,GAAG,KAAK,IAAI,EAAE,CAAC;YACtC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,0EAA0E;QAC1E,mCAAmC;QACnC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACrB,MAAM,EAAE,IAAI,CAAC,eAAe,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;YACvC,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;YAC/C,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,QAAQ,GAAG,GAAG,CAAC;QAErB,IAAI,QAAQ,CAAC,OAAO,KAAK,OAAO,EAAE,CAAC;YACjC,MAAM,EAAE,IAAI,CAAC,wBAAwB,EAAE;gBACrC,GAAG;gBACH,MAAM,EAAE,QAAQ,CAAC,OAAO;gBACxB,QAAQ,EAAE,OAAO;aAClB,CAAC,CAAC;YACH,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;YAC/C,OAAO,IAAI,CAAC;QACd,CAAC;QAED,IAAI,KAAK,CAAC,GAAG,EAAE,GAAG,QAAQ,CAAC,SAAS,EAAE,CAAC;YACrC,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;YAC/C,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO,QAAQ,CAAC,KAAU,CAAC;IAC7B,CAAC;IAED,KAAK,UAAU,GAAG,CAAI,GAAW,EAAE,KAAQ,EAAE,OAA4B;QACvE,UAAU,CAAC,SAAS,CAAC,CAAC;QAEtB,MAAM,KAAK,GAAG,OAAO,EAAE,KAAK,IAAI,YAAY,CAAC;QAC7C,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC;QAC1B,MAAM,SAAS,GAAG,KAAK,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;QAEtC,MAAM,QAAQ,GAAa,EAAE,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;QAEzD,IAAI,CAAC;YACH,uEAAuE;YACvE,+DAA+D;YAC/D,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC;QACxC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,UAAU,CAAC,wCAAwC,GAAG,GAAG,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;QACvF,CAAC;QAED,mEAAmE;QACnE,oEAAoE;QACpE,4DAA4D;QAC5D,IAAI,SAAS,KAAK,IAAI,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YACxC,MAAM,UAAU,CAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC;QACrD,CAAC;IACH,CAAC;IAED,KAAK,UAAU,UAAU,CAAC,GAAW;QACnC,UAAU,CAAC,SAAS,CAAC,CAAC;QACtB,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC;QAC1B,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAC1B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,UAAU,CAAC,6CAA6C,GAAG,GAAG,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;QAC5F,CAAC;IACH,CAAC;IAED,KAAK,UAAU,KAAK;QAClB,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QACrB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,UAAU,CAAC,uBAAuB,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;QAChE,CAAC;IACH,CAAC;IAED,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC;AACzC,CAAC;AAED,8EAA8E;AAC9E,uBAAuB;AACvB,8EAA8E;AAE9E,KAAK,UAAU,UAAU,CACvB,QAAgB,EAChB,SAAiB,EACjB,IAAU,EACV,KAAwB;IAExB,IAAI,QAA0C,CAAC;IAC/C,IAAI,CAAC;QACH,QAAQ,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAC;IAClC,CAAC;IAAC,MAAM,CAAC;QACP,0CAA0C;QAC1C,OAAO;IACT,CAAC;IAED,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;IAC7C,IAAI,MAAM,IAAI,SAAS,EAAE,CAAC;QACxB,OAAO;IACT,CAAC;IAED,2EAA2E;IAC3E,sEAAsE;IACtE,EAAE;IACF,sEAAsE;IACtE,0EAA0E;IAC1E,wEAAwE;IACxE,MAAM,OAAO,GAA8C,EAAE,CAAC;IAC9D,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QAC7C,+FAA+F;QAC/F,MAAM,MAAM,GAAY,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACxC,IACE,OAAO,MAAM,KAAK,QAAQ;YAC1B,MAAM,KAAK,IAAI;YACf,KAAK,CAAC,OAAO,CAAE,MAAkC,CAAC,OAAO,CAAC,CAAC,EAC3D,CAAC;YACD,KAAK,MAAM,IAAI,IAAK,MAA+B,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC7D,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;oBAAE,SAAS;gBACtD,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,GAAG,IAA0B,CAAC;gBAClD,IAAI,OAAO,GAAG,KAAK,QAAQ;oBAAE,SAAS;gBACtC,0EAA0E;gBAC1E,kEAAkE;gBAClE,IAAI,UAAU,GAAY,IAAI,CAAC;gBAC/B,IACE,OAAO,OAAO,KAAK,QAAQ;oBAC3B,OAAO,KAAK,IAAI;oBAChB,OAAO,IAAK,OAAmC,EAC/C,CAAC;oBACD,MAAM,EAAE,GAAI,OAAmC,CAAC,OAAO,CAAC,CAAC;oBACzD,IAAI,OAAO,EAAE,KAAK,QAAQ,EAAE,CAAC;wBAC3B,IAAI,CAAC;4BACH,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAA4B,CAAC;4BAC1D,mEAAmE;4BACnE,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,OAAO,CAAC;wBAC3C,CAAC;wBAAC,MAAM,CAAC;4BACP,UAAU,GAAG,EAAE,CAAC;wBAClB,CAAC;oBACH,CAAC;yBAAM,CAAC;wBACN,UAAU,GAAG,EAAE,CAAC;oBAClB,CAAC;gBACH,CAAC;gBACD,IAAI,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;oBAC3B,OAAO,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,SAAS,EAAE,UAAU,CAAC,SAAS,EAAE,CAAC,CAAC;gBACzD,CAAC;qBAAM,CAAC;oBACN,OAAO,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,SAAS,EAAE,KAAK,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;gBACpD,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,qCAAqC;QACrC,OAAO;IACT,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO;IAEjC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;IAClD,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;IAC/D,MAAM,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;IAE7C,KAAK,MAAM,EAAE,GAAG,EAAE,IAAI,OAAO,EAAE,CAAC;QAC9B,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;IAChD,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,kCAAkC;AAClC,8EAA8E;AAE9E,SAAS,UAAU,CAAC,GAAY;IAC9B,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,KAAK,IAAI;QAAE,OAAO,KAAK,CAAC;IAC1D,MAAM,CAAC,GAAG,GAA8B,CAAC;IACzC,OAAO,CACL,OAAO,CAAC,CAAC,SAAS,CAAC,KAAK,QAAQ;QAChC,OAAO,CAAC,CAAC,WAAW,CAAC,KAAK,QAAQ;QAClC,OAAO,IAAI,CAAC,CACb,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,oBAAoB;AACpB,8EAA8E;AAE9E,MAAM,UAAU,iBAAiB,CAAC,IAGjC;IACC,MAAM,YAAY,GAAG,IAAI,EAAE,YAAY,IAAI,cAAc,CAAC;IAC1D,MAAM,KAAK,GAAG,IAAI,EAAE,KAAK,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;IAEvD,MAAM,KAAK,GAAG,IAAI,GAAG,EAAiD,CAAC;IAEvE,KAAK,UAAU,GAAG,CAAI,GAAW,EAAE,MAAoB;QACrD,UAAU,CAAC,MAAM,CAAC,CAAC;QACnB,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAC7B,IAAI,KAAK,KAAK,SAAS;YAAE,OAAO,IAAI,CAAC;QACrC,IAAI,KAAK,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,SAAS,EAAE,CAAC;YAClC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAClB,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,KAAK,CAAC,KAAU,CAAC;IAC1B,CAAC;IAED,KAAK,UAAU,GAAG,CAAI,GAAW,EAAE,KAAQ,EAAE,OAA4B;QACvE,UAAU,CAAC,SAAS,CAAC,CAAC;QACtB,MAAM,KAAK,GAAG,OAAO,EAAE,KAAK,IAAI,YAAY,CAAC;QAC7C,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,CAAC,GAAG,EAAE,GAAG,KAAK,EAAE,CAAC,CAAC;IAC5D,CAAC;IAED,KAAK,UAAU,UAAU,CAAC,GAAW;QACnC,UAAU,CAAC,SAAS,CAAC,CAAC;QACtB,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IACpB,CAAC;IAED,KAAK,UAAU,KAAK;QAClB,KAAK,CAAC,KAAK,EAAE,CAAC;IAChB,CAAC;IAED,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC;AACzC,CAAC;AAED,8EAA8E;AAC9E,yCAAyC;AACzC,8EAA8E;AAE9E,4EAA4E;AAC5E,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,GAAW;IAC7C,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACH,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;IACT,CAAC;IACD,MAAM,OAAO,CAAC,GAAG,CACf,OAAO,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CACpF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Canonical-edition URL verification subcommand.
|
|
3
|
+
*
|
|
4
|
+
* For each bibliography entry that lacks a DOI or ISBN (pre-DOI primary
|
|
5
|
+
* sources), verifies that the entry's `url` field points to a trusted
|
|
6
|
+
* canonical-edition host and that the URL is live.
|
|
7
|
+
*
|
|
8
|
+
* Injects `headCheck` for testability; defaults to the real implementation.
|
|
9
|
+
*/
|
|
10
|
+
import { headCheck as defaultHeadCheck } from './http.js';
|
|
11
|
+
import type { HttpClient } from './http.js';
|
|
12
|
+
import type { Cache } from './cache/fs-cache.js';
|
|
13
|
+
import type { CslEntry } from './schema/csl.js';
|
|
14
|
+
import type { Entry } from './schema/output.js';
|
|
15
|
+
import type { Config } from './config.js';
|
|
16
|
+
export interface RunCanonicalDeps {
|
|
17
|
+
config: Config;
|
|
18
|
+
bibliography: CslEntry[];
|
|
19
|
+
http: HttpClient;
|
|
20
|
+
cache?: Cache;
|
|
21
|
+
signal: AbortSignal;
|
|
22
|
+
/** Injectable for tests; defaults to the real headCheck from ./http.js */
|
|
23
|
+
headCheck?: typeof defaultHeadCheck;
|
|
24
|
+
}
|
|
25
|
+
export interface RunCanonicalResult {
|
|
26
|
+
entries: Array<Pick<Entry, 'citekey' | 'canonical'>>;
|
|
27
|
+
}
|
|
28
|
+
export declare function runCanonical(deps: RunCanonicalDeps): Promise<RunCanonicalResult>;
|
|
29
|
+
//# sourceMappingURL=canonical.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"canonical.d.ts","sourceRoot":"","sources":["../src/canonical.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,SAAS,IAAI,gBAAgB,EAAE,MAAM,WAAW,CAAC;AAE1D,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAC5C,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,qBAAqB,CAAC;AACjD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,KAAK,EAAE,KAAK,EAAkB,MAAM,oBAAoB,CAAC;AAChE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAoD1C,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,QAAQ,EAAE,CAAC;IACzB,IAAI,EAAE,UAAU,CAAC;IACjB,KAAK,CAAC,EAAE,KAAK,CAAC;IACd,MAAM,EAAE,WAAW,CAAC;IACpB,0EAA0E;IAC1E,SAAS,CAAC,EAAE,OAAO,gBAAgB,CAAC;CACrC;AAED,MAAM,WAAW,kBAAkB;IACjC,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,EAAE,SAAS,GAAG,WAAW,CAAC,CAAC,CAAC;CACtD;AAMD,wBAAsB,YAAY,CAAC,IAAI,EAAE,gBAAgB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAoBtF"}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Canonical-edition URL verification subcommand.
|
|
3
|
+
*
|
|
4
|
+
* For each bibliography entry that lacks a DOI or ISBN (pre-DOI primary
|
|
5
|
+
* sources), verifies that the entry's `url` field points to a trusted
|
|
6
|
+
* canonical-edition host and that the URL is live.
|
|
7
|
+
*
|
|
8
|
+
* Injects `headCheck` for testability; defaults to the real implementation.
|
|
9
|
+
*/
|
|
10
|
+
import { headCheck as defaultHeadCheck } from './http.js';
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
// Edition-discipline host mapping (v0.1 compile-time inline)
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
const CANONICAL_EDITION_HOSTS = {
|
|
15
|
+
'akademie-ausgabe': ['korpora.zim.uni-duisburg-essen.de', 'archive.org'],
|
|
16
|
+
'glasgow': ['oll.libertyfund.org'],
|
|
17
|
+
'clarendon': ['oll.libertyfund.org', 'global.oup.com'],
|
|
18
|
+
'toronto-cw': ['oll.libertyfund.org'],
|
|
19
|
+
};
|
|
20
|
+
/**
|
|
21
|
+
* Detect a canonical-edition signal in a note string. Returns the edition key
|
|
22
|
+
* or null if no known signal is found.
|
|
23
|
+
*/
|
|
24
|
+
function detectEdition(note) {
|
|
25
|
+
if (/Ak\.\s*[IVXLCDM]+/i.test(note) || /Akademie-Ausgabe/i.test(note)) {
|
|
26
|
+
return 'akademie-ausgabe';
|
|
27
|
+
}
|
|
28
|
+
if (/Glasgow\s+(WN|TMS|LJ)/i.test(note) || /Glasgow Edition/i.test(note)) {
|
|
29
|
+
return 'glasgow';
|
|
30
|
+
}
|
|
31
|
+
if (/Clarendon Edition/i.test(note)) {
|
|
32
|
+
return 'clarendon';
|
|
33
|
+
}
|
|
34
|
+
if (/Toronto\s+CW/i.test(note) || /Collected\s+Works\s+of\s+John\s+Stuart\s+Mill/i.test(note)) {
|
|
35
|
+
return 'toronto-cw';
|
|
36
|
+
}
|
|
37
|
+
return null;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Return true if `host` is among the allowed hosts for the given edition
|
|
41
|
+
* (suffix matching: `archive.org` matches `web.archive.org`).
|
|
42
|
+
*/
|
|
43
|
+
function isEditionHostAllowed(host, allowedHosts) {
|
|
44
|
+
const h = host.toLowerCase();
|
|
45
|
+
for (const allowed of allowedHosts) {
|
|
46
|
+
const lw = allowed.toLowerCase();
|
|
47
|
+
if (h === lw || h.endsWith('.' + lw))
|
|
48
|
+
return true;
|
|
49
|
+
}
|
|
50
|
+
return false;
|
|
51
|
+
}
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
// runCanonical
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
export async function runCanonical(deps) {
|
|
56
|
+
const { config, bibliography, http, cache, signal } = deps;
|
|
57
|
+
const doHeadCheck = deps.headCheck ?? defaultHeadCheck;
|
|
58
|
+
if (signal.aborted) {
|
|
59
|
+
throw signal.reason;
|
|
60
|
+
}
|
|
61
|
+
const results = [];
|
|
62
|
+
for (const entry of bibliography) {
|
|
63
|
+
if (signal.aborted) {
|
|
64
|
+
throw signal.reason;
|
|
65
|
+
}
|
|
66
|
+
const canonical = await processEntry(entry, config, http, cache, signal, doHeadCheck);
|
|
67
|
+
results.push({ citekey: entry.citekey, canonical });
|
|
68
|
+
}
|
|
69
|
+
return { entries: results };
|
|
70
|
+
}
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
// Per-entry processing
|
|
73
|
+
// ---------------------------------------------------------------------------
|
|
74
|
+
async function processEntry(entry, config, http, cache, signal, doHeadCheck) {
|
|
75
|
+
// Step 1: applicability check
|
|
76
|
+
if (entry.doi !== undefined || entry.isbn !== undefined) {
|
|
77
|
+
return { status: 'not-applicable', url: entry.url ?? null };
|
|
78
|
+
}
|
|
79
|
+
// Step 2: no-URL case
|
|
80
|
+
if (entry.url === undefined) {
|
|
81
|
+
return { status: 'no-url-on-pre-doi-entry', url: null };
|
|
82
|
+
}
|
|
83
|
+
const url = entry.url;
|
|
84
|
+
// Step 3: URL liveness via headCheck (per-entry errors are caught)
|
|
85
|
+
let checkResult;
|
|
86
|
+
try {
|
|
87
|
+
checkResult = await doHeadCheck(url, {
|
|
88
|
+
http,
|
|
89
|
+
cache,
|
|
90
|
+
trustedHosts: config.trusted_hosts.hosts,
|
|
91
|
+
}, signal);
|
|
92
|
+
}
|
|
93
|
+
catch (err) {
|
|
94
|
+
// AbortSignal abort — rethrow so the entire run aborts
|
|
95
|
+
if (signal.aborted)
|
|
96
|
+
throw err;
|
|
97
|
+
// Any other unexpected error → dead-url
|
|
98
|
+
return { status: 'dead-url', url, redirectChain: [] };
|
|
99
|
+
}
|
|
100
|
+
if (!checkResult.ok) {
|
|
101
|
+
switch (checkResult.reason) {
|
|
102
|
+
case 'wrong-host':
|
|
103
|
+
return { status: 'wrong-host', url, redirectChain: [] };
|
|
104
|
+
case 'dead-url':
|
|
105
|
+
case 'too-many-redirects':
|
|
106
|
+
case 'timeout':
|
|
107
|
+
case 'network-error':
|
|
108
|
+
return { status: 'dead-url', url, redirectChain: [] };
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
// checkResult.ok === true
|
|
112
|
+
const { finalUrl, redirectChain, host } = checkResult;
|
|
113
|
+
// Step 4: SEP archived-snapshot rule
|
|
114
|
+
if (host === 'plato.stanford.edu') {
|
|
115
|
+
if (!finalUrl.includes('/archives/')) {
|
|
116
|
+
return { status: 'live-url-not-archived-snapshot', url: finalUrl, redirectChain };
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
// Step 5: Edition-discipline check
|
|
120
|
+
if (entry.note !== undefined) {
|
|
121
|
+
const edition = detectEdition(entry.note);
|
|
122
|
+
if (edition !== null) {
|
|
123
|
+
const allowedHosts = CANONICAL_EDITION_HOSTS[edition];
|
|
124
|
+
if (allowedHosts !== undefined && !isEditionHostAllowed(host, allowedHosts)) {
|
|
125
|
+
return { status: 'wrong-host', url: finalUrl, redirectChain };
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
// Step 6: verified canonical
|
|
130
|
+
return { status: 'verified-canonical', url: finalUrl, redirectChain };
|
|
131
|
+
}
|
|
132
|
+
//# sourceMappingURL=canonical.js.map
|