@ctxr/skill-llm-wiki 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CHANGELOG.md +134 -0
  2. package/LICENSE +21 -0
  3. package/README.md +484 -0
  4. package/SKILL.md +252 -0
  5. package/guide/basics/concepts.md +74 -0
  6. package/guide/basics/index.md +45 -0
  7. package/guide/basics/schema.md +140 -0
  8. package/guide/cli.md +256 -0
  9. package/guide/correctness/index.md +45 -0
  10. package/guide/correctness/invariants.md +89 -0
  11. package/guide/correctness/safety.md +96 -0
  12. package/guide/history/diff.md +110 -0
  13. package/guide/history/hidden-git.md +130 -0
  14. package/guide/history/index.md +52 -0
  15. package/guide/history/remote-sync.md +113 -0
  16. package/guide/index.md +134 -0
  17. package/guide/isolation/coexistence.md +134 -0
  18. package/guide/isolation/index.md +44 -0
  19. package/guide/isolation/scale.md +251 -0
  20. package/guide/layout/in-place-mode.md +97 -0
  21. package/guide/layout/index.md +53 -0
  22. package/guide/layout/layout-contract.md +131 -0
  23. package/guide/layout/layout-modes.md +115 -0
  24. package/guide/operations/index.md +76 -0
  25. package/guide/operations/ingest/build.md +75 -0
  26. package/guide/operations/ingest/extend.md +61 -0
  27. package/guide/operations/ingest/index.md +54 -0
  28. package/guide/operations/ingest/join.md +65 -0
  29. package/guide/operations/maintain/fix.md +66 -0
  30. package/guide/operations/maintain/index.md +47 -0
  31. package/guide/operations/maintain/rebuild.md +86 -0
  32. package/guide/operations/validate.md +48 -0
  33. package/guide/substrate/index.md +47 -0
  34. package/guide/substrate/operators.md +96 -0
  35. package/guide/substrate/tiered-ai.md +363 -0
  36. package/guide/ux/index.md +44 -0
  37. package/guide/ux/preflight.md +150 -0
  38. package/guide/ux/user-intent.md +135 -0
  39. package/package.json +55 -0
  40. package/scripts/cli.mjs +893 -0
  41. package/scripts/commands/remote.mjs +93 -0
  42. package/scripts/commands/review.mjs +253 -0
  43. package/scripts/commands/sync.mjs +84 -0
  44. package/scripts/lib/chunk.mjs +421 -0
  45. package/scripts/lib/cluster-detect.mjs +516 -0
  46. package/scripts/lib/decision-log.mjs +343 -0
  47. package/scripts/lib/draft.mjs +158 -0
  48. package/scripts/lib/embeddings.mjs +366 -0
  49. package/scripts/lib/frontmatter.mjs +497 -0
  50. package/scripts/lib/git-commands.mjs +155 -0
  51. package/scripts/lib/git.mjs +486 -0
  52. package/scripts/lib/gitignore.mjs +62 -0
  53. package/scripts/lib/history.mjs +331 -0
  54. package/scripts/lib/indices.mjs +510 -0
  55. package/scripts/lib/ingest.mjs +258 -0
  56. package/scripts/lib/intent.mjs +713 -0
  57. package/scripts/lib/interactive.mjs +99 -0
  58. package/scripts/lib/migrate.mjs +126 -0
  59. package/scripts/lib/nest-applier.mjs +260 -0
  60. package/scripts/lib/operators.mjs +1365 -0
  61. package/scripts/lib/orchestrator.mjs +718 -0
  62. package/scripts/lib/paths.mjs +197 -0
  63. package/scripts/lib/preflight.mjs +213 -0
  64. package/scripts/lib/provenance.mjs +672 -0
  65. package/scripts/lib/quality-metric.mjs +269 -0
  66. package/scripts/lib/query-fixture.mjs +71 -0
  67. package/scripts/lib/rollback.mjs +95 -0
  68. package/scripts/lib/shape-check.mjs +172 -0
  69. package/scripts/lib/similarity-cache.mjs +126 -0
  70. package/scripts/lib/similarity.mjs +230 -0
  71. package/scripts/lib/snapshot.mjs +54 -0
  72. package/scripts/lib/source-frontmatter.mjs +85 -0
  73. package/scripts/lib/tier2-protocol.mjs +470 -0
  74. package/scripts/lib/tiered.mjs +453 -0
  75. package/scripts/lib/validate.mjs +362 -0
@@ -0,0 +1,126 @@
1
+ // similarity-cache.mjs — pairwise memoisation of tiered similarity
2
+ // decisions. Keyed by the sorted pair of content hashes so (a,b) and
3
+ // (b,a) resolve to the same entry. Invalidated implicitly when either
4
+ // entry's hash changes — the key simply doesn't match anymore.
5
+ //
6
+ // Cache entries are JSON files under `<wiki>/.llmwiki/similarity-cache/`.
7
+ // One file per pair. The filename is derived from the sorted hashes
8
+ // with sha256 collapsing to keep the name short and filesystem-safe.
9
+ // The payload carries the tier, similarity, decision, and the tier
10
+ // at which the decision was resolved — tests read it back to verify
11
+ // caching prevented redundant work.
12
+
13
+ import { createHash } from "node:crypto";
14
+ import {
15
+ existsSync,
16
+ mkdirSync,
17
+ readFileSync,
18
+ readdirSync,
19
+ renameSync,
20
+ rmSync,
21
+ writeFileSync,
22
+ } from "node:fs";
23
+ import { join } from "node:path";
24
+
25
+ export function cacheDir(wikiRoot) {
26
+ return join(wikiRoot, ".llmwiki", "similarity-cache");
27
+ }
28
+
29
+ // Deterministic filename for a hash pair. Hash prefixes are sorted
30
+ // so the lookup is symmetric regardless of argument order.
31
+ export function cacheKey(hashA, hashB) {
32
+ if (!hashA || !hashB) {
33
+ throw new Error("similarity-cache: both hashes must be non-empty strings");
34
+ }
35
+ const [first, second] = hashA <= hashB ? [hashA, hashB] : [hashB, hashA];
36
+ // sha256 the concatenation so the resulting key is a bounded-
37
+ // length filesystem-safe string. Truncate to 32 hex chars for
38
+ // readability — 128 bits of discriminant is more than enough.
39
+ return createHash("sha256")
40
+ .update(first + "\0" + second)
41
+ .digest("hex")
42
+ .slice(0, 32);
43
+ }
44
+
45
+ export function cacheEntryPath(wikiRoot, hashA, hashB) {
46
+ return join(cacheDir(wikiRoot), cacheKey(hashA, hashB) + ".json");
47
+ }
48
+
49
+ // Read a cached decision. Returns null on miss or malformed file.
50
+ // Does NOT throw on parse errors — a corrupt cache entry is treated
51
+ // as a miss, so the caller re-computes and overwrites it.
52
+ export function readCached(wikiRoot, hashA, hashB) {
53
+ const path = cacheEntryPath(wikiRoot, hashA, hashB);
54
+ if (!existsSync(path)) return null;
55
+ try {
56
+ const raw = readFileSync(path, "utf8");
57
+ const parsed = JSON.parse(raw);
58
+ // Minimal sanity check: must have tier, similarity, decision.
59
+ if (
60
+ typeof parsed !== "object" ||
61
+ parsed === null ||
62
+ typeof parsed.tier !== "number" ||
63
+ typeof parsed.similarity !== "number" ||
64
+ typeof parsed.decision !== "string"
65
+ ) {
66
+ return null;
67
+ }
68
+ return parsed;
69
+ } catch {
70
+ return null;
71
+ }
72
+ }
73
+
74
+ // Write a decision to the cache atomically (temp-file + rename).
75
+ export function writeCached(wikiRoot, hashA, hashB, decision) {
76
+ if (!decision || typeof decision !== "object") {
77
+ throw new Error("similarity-cache: decision must be an object");
78
+ }
79
+ const dir = cacheDir(wikiRoot);
80
+ mkdirSync(dir, { recursive: true });
81
+ const path = cacheEntryPath(wikiRoot, hashA, hashB);
82
+ const payload = JSON.stringify(
83
+ {
84
+ tier: decision.tier,
85
+ similarity: decision.similarity,
86
+ decision: decision.decision,
87
+ confidence_band: decision.confidence_band ?? null,
88
+ cached_at: new Date().toISOString(),
89
+ },
90
+ null,
91
+ 0,
92
+ );
93
+ const tmp = `${path}.tmp.${process.pid}.${Date.now()}`;
94
+ writeFileSync(tmp, payload, "utf8");
95
+ renameSync(tmp, path);
96
+ }
97
+
98
+ // Remove every cache file. Used by tests and by `startCorpus` via
99
+ // the orchestrator when the corpus changes materially. Safe to call
100
+ // when the cache dir doesn't exist.
101
+ export function clearCache(wikiRoot) {
102
+ const dir = cacheDir(wikiRoot);
103
+ if (!existsSync(dir)) return 0;
104
+ let count = 0;
105
+ for (const name of readdirSync(dir)) {
106
+ if (!name.endsWith(".json")) continue;
107
+ try {
108
+ rmSync(join(dir, name), { force: true });
109
+ count++;
110
+ } catch {
111
+ /* best-effort */
112
+ }
113
+ }
114
+ return count;
115
+ }
116
+
117
+ // Count cached entries — convenience for tests and metrics.
118
+ export function cacheSize(wikiRoot) {
119
+ const dir = cacheDir(wikiRoot);
120
+ if (!existsSync(dir)) return 0;
121
+ let n = 0;
122
+ for (const name of readdirSync(dir)) {
123
+ if (name.endsWith(".json")) n++;
124
+ }
125
+ return n;
126
+ }
@@ -0,0 +1,230 @@
1
+ // similarity.mjs — Tier 0 of the tiered AI ladder (methodology §8.5).
2
+ //
3
+ // Pure TF-IDF + cosine similarity over entry frontmatters. No external
4
+ // dependencies. Deterministic. Cheap enough to run on every pairwise
5
+ // check without concern. The ladder escalates to Tier 1 (local
6
+ // embeddings) only when Tier 0's confidence is mid-band.
7
+ //
8
+ // Scope restriction: this module operates on frontmatter fields only
9
+ // (focus + covers[] + tags + id). Bodies are never touched — that is
10
+ // the whole point of Phase 5's chunk iterator, and Phase 6 honours it
11
+ // at the substrate level.
12
+ //
13
+ // Thresholds are tunable via `<wiki>/.llmwiki/config.yaml` in
14
+ // future; Phase 6 ships the defaults as exported constants so tests
15
+ // and `tiered.mjs` can reference them without drift.
16
+
17
+ export const TIER0_DECISIVE_SAME = 0.85;
18
+ export const TIER0_DECISIVE_DIFFERENT = 0.30;
19
+
20
+ // Small embedded English stopword list — intentionally narrow. A
21
+ // fuller list tends to over-aggressive filtering and hides real
22
+ // content signals in short covers[] strings. The covers field uses
23
+ // terse technical phrases; dropping articles and connectives is
24
+ // enough.
25
+ const STOPWORDS = new Set([
26
+ "a", "an", "and", "are", "as", "at", "be", "by", "for", "from",
27
+ "has", "have", "he", "her", "his", "i", "in", "is", "it", "its",
28
+ "of", "on", "or", "she", "that", "the", "their", "they", "this",
29
+ "to", "was", "we", "were", "will", "with",
30
+ ]);
31
+
32
+ // Tokeniser: lowercase, strip punctuation, split on non-word runs,
33
+ // filter stopwords and short tokens. Keeps Unicode letters so
34
+ // non-ASCII frontmatters are first-class. Numbers are kept because
35
+ // version suffixes like "v1" / "v2" are often meaningful signals.
36
+ export function tokenize(text) {
37
+ if (!text || typeof text !== "string") return [];
38
+ // `\p{L}` = any Unicode letter; `\p{N}` = any Unicode number.
39
+ const tokens = text
40
+ .toLowerCase()
41
+ .split(/[^\p{L}\p{N}]+/u)
42
+ .filter((t) => t.length >= 2)
43
+ .filter((t) => !STOPWORDS.has(t));
44
+ return tokens;
45
+ }
46
+
47
+ // Build a tf-vector from a token list. Plain term-frequency map.
48
+ function tf(tokens) {
49
+ const v = new Map();
50
+ for (const t of tokens) v.set(t, (v.get(t) ?? 0) + 1);
51
+ return v;
52
+ }
53
+
54
+ // Compute idf weights from a corpus of token lists using the
55
+ // scikit-learn smoothed form: `log((1 + N) / (1 + df)) + 1`. This
56
+ // keeps ubiquitous terms with lower weight than rare terms while
57
+ // never going negative and behaving sensibly at small N.
58
+ //
59
+ // Round-trip values (verified):
60
+ // N=1, df=1 → log(2/2)+1 = 1.000
61
+ // N=2, df=2 → log(3/3)+1 = 1.000
62
+ // N=2, df=1 → log(3/2)+1 ≈ 1.405
63
+ // N=3, df=3 → log(4/4)+1 = 1.000
64
+ // N=3, df=1 → log(4/2)+1 ≈ 1.693
65
+ //
66
+ // Notice that under this formula terms shared across ALL entries
67
+ // get a BASELINE weight of 1.0, while rarer terms get larger
68
+ // weights. Cosine then correctly down-weights shared-everything
69
+ // terms relative to distinguishing ones — the behaviour the
70
+ // "rare terms distinguish better" intuition implies.
71
+ export function computeIdf(tokenLists) {
72
+ const df = new Map();
73
+ for (const tokens of tokenLists) {
74
+ const seen = new Set();
75
+ for (const t of tokens) {
76
+ if (seen.has(t)) continue;
77
+ seen.add(t);
78
+ df.set(t, (df.get(t) ?? 0) + 1);
79
+ }
80
+ }
81
+ const N = tokenLists.length;
82
+ const idf = new Map();
83
+ for (const [t, dfT] of df) {
84
+ idf.set(t, Math.log((1 + N) / (1 + dfT)) + 1);
85
+ }
86
+ return idf;
87
+ }
88
+
89
+ // Precompute a directory-wide IDF model once per comparison pool.
90
+ // Callers that iterate N² pairs (detectMerge) reuse the returned
91
+ // { idfMap, tokenLists, texts } instead of recomputing IDF for
92
+ // every pair — the difference between O(N³) and O(N²) work.
93
+ export function buildComparisonModel(entries) {
94
+ const texts = entries.map((e) => entryText(e));
95
+ const tokenLists = texts.map((t) => tokenize(t));
96
+ const idfMap = computeIdf(tokenLists);
97
+ return { texts, tokenLists, idfMap };
98
+ }
99
+
100
+ // Convert a tf map into a tf-idf vector given an idf map. Terms
101
+ // absent from idf (i.e., not present in the corpus) contribute 0.
102
+ export function tfidfVector(tokens, idfMap) {
103
+ const tfMap = tf(tokens);
104
+ const out = new Map();
105
+ for (const [term, freq] of tfMap) {
106
+ const idf = idfMap.get(term);
107
+ if (idf === undefined) continue;
108
+ out.set(term, freq * idf);
109
+ }
110
+ return out;
111
+ }
112
+
113
+ // Cosine similarity between two sparse tf-idf vectors. Returns a
114
+ // value in [0, 1] for non-negative inputs. Handles zero vectors by
115
+ // returning 0 (rather than NaN).
116
+ export function cosine(a, b) {
117
+ if (!a || !b) return 0;
118
+ if (a.size === 0 || b.size === 0) return 0;
119
+ let dot = 0;
120
+ let normA = 0;
121
+ let normB = 0;
122
+ // Iterate over the smaller map for the dot product; every entry
123
+ // in the larger map contributes to its norm.
124
+ const [small, large] = a.size <= b.size ? [a, b] : [b, a];
125
+ for (const [term, weight] of small) {
126
+ const other = large.get(term) ?? 0;
127
+ dot += weight * other;
128
+ }
129
+ for (const w of a.values()) normA += w * w;
130
+ for (const w of b.values()) normB += w * w;
131
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
132
+ if (denom === 0) return 0;
133
+ return dot / denom;
134
+ }
135
+
136
+ // Build the text we compare for a single entry. The methodology
137
+ // specifies frontmatter fields only: focus + covers[] + tags +
138
+ // domains. We join them into a single text for tokenisation, with
139
+ // doubled weight on focus (repeat it twice in the concatenation).
140
+ export function entryText(data) {
141
+ if (!data || typeof data !== "object") return "";
142
+ const parts = [];
143
+ // Focus is the most semantically concentrated field; double it.
144
+ if (typeof data.focus === "string") {
145
+ parts.push(data.focus, data.focus);
146
+ }
147
+ if (Array.isArray(data.covers)) {
148
+ parts.push(data.covers.filter((c) => typeof c === "string").join(" "));
149
+ }
150
+ if (Array.isArray(data.tags)) {
151
+ parts.push(data.tags.filter((t) => typeof t === "string").join(" "));
152
+ }
153
+ if (Array.isArray(data.domains)) {
154
+ parts.push(data.domains.filter((d) => typeof d === "string").join(" "));
155
+ }
156
+ return parts.join(" ").trim();
157
+ }
158
+
159
+ // Compare two entries via Tier 0: returns
160
+ // { tier: 0, similarity, decision, confidence_band, reason }
161
+ //
162
+ // `decision` is one of "same" / "different" / "escalate" /
163
+ // "undecidable". "undecidable" is returned when either entry's
164
+ // frontmatter is empty — the caller should NOT escalate this to
165
+ // Tier 1/2 because an empty-text pair embeds to whatever the model
166
+ // emits for empty input, which collapses to near-1.0 cosine and
167
+ // would cause spurious MERGE decisions. Callers must treat
168
+ // "undecidable" as a hard stop for this pair.
169
+ //
170
+ // `corpusContext` is a list of other entry data objects providing
171
+ // the IDF statistics. For sibling-comparison use cases the context
172
+ // is the full set of siblings. `precomputedModel` is the optional
173
+ // result of `buildComparisonModel(corpusContext)` — pass it to
174
+ // reuse IDF/tokens across many pairs (O(N²) vs O(N³)).
175
+ export function compareEntries(
176
+ a,
177
+ b,
178
+ corpusContext = null,
179
+ {
180
+ sameThreshold = TIER0_DECISIVE_SAME,
181
+ differentThreshold = TIER0_DECISIVE_DIFFERENT,
182
+ precomputedModel = null,
183
+ } = {},
184
+ ) {
185
+ const textA = entryText(a);
186
+ const textB = entryText(b);
187
+ if (textA === "" || textB === "") {
188
+ return {
189
+ tier: 0,
190
+ similarity: 0,
191
+ decision: "undecidable",
192
+ confidence_band: "insufficient-text",
193
+ reason: "one or both entries had empty frontmatter text",
194
+ };
195
+ }
196
+ const tokensA = tokenize(textA);
197
+ const tokensB = tokenize(textB);
198
+ let idfMap;
199
+ if (precomputedModel) {
200
+ idfMap = precomputedModel.idfMap;
201
+ } else {
202
+ const contextList =
203
+ corpusContext && corpusContext.length > 0
204
+ ? corpusContext.map((e) => tokenize(entryText(e)))
205
+ : [tokensA, tokensB];
206
+ idfMap = computeIdf(contextList);
207
+ }
208
+ const vecA = tfidfVector(tokensA, idfMap);
209
+ const vecB = tfidfVector(tokensB, idfMap);
210
+ const sim = cosine(vecA, vecB);
211
+ let decision;
212
+ let band;
213
+ if (sim >= sameThreshold) {
214
+ decision = "same";
215
+ band = "decisive-same";
216
+ } else if (sim <= differentThreshold) {
217
+ decision = "different";
218
+ band = "decisive-different";
219
+ } else {
220
+ decision = "escalate";
221
+ band = "mid-band";
222
+ }
223
+ return {
224
+ tier: 0,
225
+ similarity: sim,
226
+ decision,
227
+ confidence_band: band,
228
+ reason: null,
229
+ };
230
+ }
@@ -0,0 +1,54 @@
1
+ // snapshot.mjs — create the pre-op snapshot commit that every top-level
2
+ // operation anchors rollback to. The snapshot captures every byte of every
3
+ // tracked wiki file at the moment before the operation starts.
4
+
5
+ import {
6
+ gitCommit,
7
+ gitHeadSha,
8
+ gitInit,
9
+ gitRunChecked,
10
+ gitTag,
11
+ gitWorkingTreeClean,
12
+ } from "./git.mjs";
13
+ import { ensureWikiGitignore } from "./gitignore.mjs";
14
+
15
+ export { ensureWikiGitignore };
16
+
17
+ // preOpSnapshot(wikiRoot, opId)
18
+ // 1. Ensure the private repo exists (git init + genesis).
19
+ // 2. Ensure the wiki-local .gitignore is present.
20
+ // 3. git add -A.
21
+ // 4. If anything is staged: commit "pre-op <opId>". Otherwise skip commit.
22
+ // 5. Tag HEAD as pre-op/<opId>.
23
+ //
24
+ // Tag naming note: the pre-op anchor lives in the `refs/tags/pre-op/`
25
+ // namespace and the final tag lives in `refs/tags/op/`. Keeping them in
26
+ // separate ref subdirectories avoids git's "cannot create ref X: X/y
27
+ // exists" hierarchy collision — we used to use `op/<id>/pre` + `op/<id>`
28
+ // which DO collide because git treats the slash as a directory
29
+ // boundary. Rollback's `pre-<op-id>` shorthand resolves to `pre-op/<op-id>`.
30
+ //
31
+ // Returns { initialized, tag, committed, sha } — `committed` indicates
32
+ // whether a new pre-op commit was actually written (skipped when the
33
+ // working tree already matched HEAD), and `sha` is the final HEAD SHA
34
+ // the tag points at. The tag creation itself is loud on collision: if
35
+ // `pre-op/<opId>` already exists pointing elsewhere, gitTag throws
36
+ // rather than silently overwriting a prior rollback anchor.
37
+ export function preOpSnapshot(wikiRoot, opId) {
38
+ if (!opId || typeof opId !== "string") {
39
+ throw new Error("preOpSnapshot requires a non-empty opId string");
40
+ }
41
+ const init = gitInit(wikiRoot);
42
+ ensureWikiGitignore(wikiRoot);
43
+ gitRunChecked(wikiRoot, ["add", "-A"]);
44
+ const clean = gitWorkingTreeClean(wikiRoot);
45
+ let committed = false;
46
+ if (!clean) {
47
+ gitCommit(wikiRoot, `pre-op ${opId}`);
48
+ committed = true;
49
+ }
50
+ const tag = `pre-op/${opId}`;
51
+ gitTag(wikiRoot, tag, "HEAD");
52
+ const sha = gitHeadSha(wikiRoot);
53
+ return { initialized: init.initialized, tag, committed, sha };
54
+ }
@@ -0,0 +1,85 @@
1
+ // source-frontmatter.mjs — read frontmatter from source files via
2
+ // `gray-matter`, the de-facto-standard YAML frontmatter library.
3
+ //
4
+ // Why not extend scripts/lib/frontmatter.mjs? That module owns the
5
+ // skill's OUTPUT serialisation, where deterministic byte-identical
6
+ // rendering is a hard requirement. The home-rolled parser only covers
7
+ // the narrow YAML subset the skill writes. When we read an ALREADY-
8
+ // frontmatter'd source file (e.g. a hand-authored wiki guide with
9
+ // `activation`, `covers`, `tags`, `focus`, `shared_covers`, nested
10
+ // sequences of maps, etc.), we want full YAML 1.2 coverage — exactly
11
+ // what gray-matter gives us (it delegates to js-yaml).
12
+ //
13
+ // Pollution guard: gray-matter's parsed object can still contain keys
14
+ // that would poison our internal object pipeline if merged naively
15
+ // (`__proto__`, `constructor`, `prototype`). We strip those here so
16
+ // every downstream caller can treat the returned `data` as a safe
17
+ // plain object. This preserves the security invariant encoded in
18
+ // `scripts/lib/frontmatter.mjs`'s POLLUTION_KEYS list and its test
19
+ // at `tests/unit/frontmatter-pollution.test.mjs`.
20
+
21
+ import matter from "gray-matter";
22
+
23
+ const POLLUTION_KEYS = new Set(["__proto__", "constructor", "prototype"]);
24
+
25
+ // Parse a raw source string. Returns `{ data, body, hasFrontmatter }`.
26
+ //
27
+ // - `data`: the parsed frontmatter as a safe plain object (pollution
28
+ // keys removed recursively). Empty object when there was no fence.
29
+ // - `body`: the source content WITH the frontmatter block stripped.
30
+ // This is what the orchestrator concatenates fresh frontmatter on
31
+ // top of — stripping here is what fixes the double-stack bug.
32
+ // - `hasFrontmatter`: true iff the source opened with `---\n` and a
33
+ // matching close fence. Used by the orchestrator to decide whether
34
+ // to apply the authored-field merge in draftLeafFrontmatter.
35
+ export function parseSourceFrontmatter(raw) {
36
+ if (typeof raw !== "string") {
37
+ throw new TypeError("parseSourceFrontmatter: raw must be a string");
38
+ }
39
+ // Fast path: no leading fence → no frontmatter. Avoids gray-matter
40
+ // having to tokenise the whole file just to confirm there's nothing
41
+ // to parse.
42
+ if (!raw.startsWith("---\n") && raw !== "---\n" && !raw.startsWith("---\r\n")) {
43
+ return { data: {}, body: raw, hasFrontmatter: false };
44
+ }
45
+ let parsed;
46
+ try {
47
+ parsed = matter(raw, { excerpt: false });
48
+ } catch (err) {
49
+ // gray-matter throws on malformed YAML. Surface the underlying
50
+ // message and let the orchestrator decide whether to fall back
51
+ // (it currently treats malformed source frontmatter as empty).
52
+ return {
53
+ data: {},
54
+ body: raw,
55
+ hasFrontmatter: false,
56
+ error: err.message || String(err),
57
+ };
58
+ }
59
+ const safeData = sanitise(parsed.data);
60
+ const body = typeof parsed.content === "string" ? parsed.content : "";
61
+ // gray-matter returns `matter: ""` when there was no fence. Use the
62
+ // `isEmpty` heuristic: any non-empty data object OR a body shorter
63
+ // than the raw input implies a fence was parsed.
64
+ const hasFrontmatter =
65
+ Object.keys(safeData).length > 0 || body.length < raw.length;
66
+ return { data: safeData, body, hasFrontmatter };
67
+ }
68
+
69
+ // Recursively copy a parsed object, refusing any pollution key. Arrays
70
+ // and nested maps are walked; primitives pass through unchanged.
71
+ function sanitise(value) {
72
+ if (value === null || typeof value !== "object") return value;
73
+ if (Array.isArray(value)) return value.map((v) => sanitise(v));
74
+ const out = {};
75
+ for (const [k, v] of Object.entries(value)) {
76
+ if (POLLUTION_KEYS.has(k)) continue;
77
+ Object.defineProperty(out, k, {
78
+ value: sanitise(v),
79
+ writable: true,
80
+ enumerable: true,
81
+ configurable: true,
82
+ });
83
+ }
84
+ return out;
85
+ }