llm-wiki-compiler 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1010 @@
1
+ /**
2
+ * Type definitions for the wiki schema layer.
3
+ *
4
+ * The schema layer turns llmwiki from a flat compiler pipeline into a shaped
5
+ * knowledge system. It declares the kinds of pages a project supports
6
+ * (`concept`, `entity`, `comparison`, `overview`) and the cross-link
7
+ * expectations that lint and review enforce per kind.
8
+ *
9
+ * Types live in their own module so that compile, lint, CLI, and tests can
10
+ * depend on the schema vocabulary without pulling in YAML/JSON loaders.
11
+ */
12
+ /** All page kinds the schema layer recognises. */
13
+ type PageKind = "concept" | "entity" | "comparison" | "overview";
14
+
15
+ /**
16
+ * Type definitions for the wiki linter.
17
+ * Defines the shape of lint results, summaries, and rule functions
18
+ * used across all lint rules and the orchestrator.
19
+ */
20
+ interface LintResult {
21
+ rule: string;
22
+ severity: "error" | "warning" | "info";
23
+ file: string;
24
+ message: string;
25
+ line?: number;
26
+ }
27
+ interface LintSummary {
28
+ errors: number;
29
+ warnings: number;
30
+ info: number;
31
+ results: LintResult[];
32
+ }
33
+
34
+ /**
35
+ * Lifecycle state of a concept or page's provenance.
36
+ * - `extracted`: drawn directly from a source document.
37
+ * - `merged`: synthesised from multiple sources during compilation.
38
+ * - `inferred`: produced by the model from context, not directly cited.
39
+ * - `ambiguous`: sources disagree or evidence is conflicting.
40
+ */
41
+ type ProvenanceState = "extracted" | "merged" | "inferred" | "ambiguous";
42
+ /**
43
+ * Reference to another concept that contradicts the current one.
44
+ * The slug points to the contradicting wiki page.
45
+ */
46
+ interface ContradictionRef {
47
+ slug: string;
48
+ reason?: string;
49
+ }
50
+ /** Structured result returned by the compile pipeline. */
51
+ interface CompileResult {
52
+ compiled: number;
53
+ skipped: number;
54
+ deleted: number;
55
+ concepts: string[];
56
+ pages: string[];
57
+ errors: string[];
58
+ /** Candidate IDs created when the pipeline runs in --review mode. */
59
+ candidates?: string[];
60
+ }
61
+ /** A single chunk citation surfaced as part of a query result. */
62
+ interface ChunkCitation {
63
+ slug: string;
64
+ title: string;
65
+ chunkIndex: number;
66
+ score: number;
67
+ text: string;
68
+ }
69
+ /** Diagnostic snapshot of how the retrieval pipeline picked context. */
70
+ interface RetrievalDebug {
71
+ /** Pages selected after collapsing chunks to their parent slugs. */
72
+ pages: Array<{
73
+ slug: string;
74
+ score: number;
75
+ }>;
76
+ /** Top-ranked chunks before the page-collapse step. */
77
+ chunks: ChunkCitation[];
78
+ /** True when chunk-level entries drove the selection (vs. page-level fallback). */
79
+ usedChunks: boolean;
80
+ /** True when reranking reordered the initial semantic ranking. */
81
+ reranked: boolean;
82
+ }
83
+ /** Structured result returned by the query pipeline. */
84
+ interface QueryResult {
85
+ answer: string;
86
+ selectedPages: string[];
87
+ reasoning: string;
88
+ saved?: string;
89
+ /** Populated when the query was run in debug mode. */
90
+ debug?: RetrievalDebug;
91
+ }
92
+ /** Source type tag persisted in frontmatter to describe the ingest origin. */
93
+ type SourceType = "web" | "file" | "image" | "pdf" | "transcript";
94
+ /** Outcome of a source write: a new file, a content change, or a no-op. */
95
+ type WriteStatus = "created" | "updated" | "unchanged";
96
+ /** Structured result returned by the ingest pipeline. */
97
+ interface IngestResult {
98
+ filename: string;
99
+ charCount: number;
100
+ truncated: boolean;
101
+ source: string;
102
+ /** Detected source type; undefined for legacy results produced before this field was added. */
103
+ sourceType?: SourceType;
104
+ /** Whether the source file was created, updated (content changed), or unchanged (no-op). */
105
+ writeStatus: WriteStatus;
106
+ }
107
+
108
+ /**
109
+ * Provenance helpers for `llmwiki context`.
110
+ *
111
+ * Slice 4 ships two related pieces of work:
112
+ * 1. Flatten `ViewerPage.citations` (`ClaimCitation[]`, each with one
113
+ * or more `SourceSpan` entries) into the documented
114
+ * `ContextPrimary.citations[]` shape: one object per span,
115
+ * `file`/`start`/`end` lifted from `span.lines` when present,
116
+ * paragraph-only citations omit `start`/`end`, de-duped by
117
+ * `(file, start, end)`, preserved in first-seen document order
118
+ * (plan §Provenance And Source Windows).
119
+ * 2. Materialize bounded `ContextSourceWindow[]` for `--include-sources`
120
+ * by reading short line ranges out of `sources/`. Path-confined:
121
+ * traversal, absolute paths, and symlink escapes are rejected.
122
+ * Only claim-level spans (`lines` populated) become windows;
123
+ * paragraph-only citations are intentionally skipped because the
124
+ * caller asked for SPECIFIC line context, not whole files.
125
+ *
126
+ * Citation flattening is the inner contract for `primary[].citations`;
127
+ * source-window materialization is the outer guard rail for
128
+ * `--include-sources` per-pack and per-window caps.
129
+ */
130
+
131
+ /**
132
+ * Flat citation shape consumed by `ContextPrimary.citations[]` AND by
133
+ * the JSON export's `ExportPage.citations[]`. Exported so both
134
+ * surfaces share one normalized shape rather than drifting — consumers
135
+ * can reuse the same flattening rule across `llmwiki context` and
136
+ * `llmwiki export`.
137
+ */
138
+ interface FlatCitation {
139
+ file: string;
140
+ start?: number;
141
+ end?: number;
142
+ }
143
+
144
+ /**
145
+ * Types for the computed source-freshness layer.
146
+ *
147
+ * Freshness is derived on demand from the filesystem + state.json and is never
148
+ * persisted. `FreshnessSnapshot` is built once per command/viewer snapshot and
149
+ * shared by every consumer (lint, export, MCP, context, viewer).
150
+ */
151
+ /** A page's computed freshness on the source-derived axis. */
152
+ type FreshnessStatus = "fresh" | "stale" | "orphaned" | "unverified";
153
+
154
+ /**
155
+ * Shared types for the llmwiki export subsystem.
156
+ *
157
+ * ExportPage is the normalised in-memory representation of a wiki page used
158
+ * by every export format. It is derived from the page's YAML frontmatter plus
159
+ * the wikilink graph extracted from the body.
160
+ *
161
+ * Trust-adjacent fields (`advisoryConfidence`, `provenanceState`,
162
+ * `contradictedBy`) are surfaced as **advisory metadata only** — once the
163
+ * export crosses into any downstream storage (Atomic Memory or otherwise),
164
+ * those fields become mutable and lose their cryptographic tie to this
165
+ * export. Consumers should treat them as the compiler's estimate at
166
+ * export time, not as runtime guarantees.
167
+ */
168
+
169
+ /**
170
+ * Flat citation shape exported alongside each page. Identical to the
171
+ * normalized `FlatCitation` used by `llmwiki context` so adapters that
172
+ * consume both surfaces share one shape. Paragraph-only citations omit
173
+ * `start` and `end`; claim-level citations carry the parsed line range.
174
+ */
175
+ type ExportCitation = FlatCitation;
176
+ /**
177
+ * Which wiki/ subdirectory a page lives in.
178
+ *
179
+ * Intentionally distinct from the schema layer's `PageKind`
180
+ * (concept/entity/comparison/overview) — this is a filesystem location, not
181
+ * a semantic typology. Renaming avoids field collision when JSON export and
182
+ * schema metadata are consumed by the same downstream tooling.
183
+ */
184
+ type PageDirectory = "concepts" | "queries";
185
+ /** A fully-resolved wiki page ready for export serialisation. */
186
+ interface ExportPage {
187
+ /** Human-readable page title (from frontmatter). */
188
+ title: string;
189
+ /** Filesystem slug (filename without .md). */
190
+ slug: string;
191
+ /** Whether this page came from wiki/concepts or wiki/queries. */
192
+ pageDirectory: PageDirectory;
193
+ /**
194
+ * Project-relative path to the source markdown file, e.g.
195
+ * `wiki/concepts/retrieval.md`. Surfaced for the bridge so downstream
196
+ * adapters can deep-link without reconstructing the path themselves.
197
+ */
198
+ path: string;
199
+ /** One-line page summary (from frontmatter). */
200
+ summary: string;
201
+ /** Source filenames cited in the page body. */
202
+ sources: string[];
203
+ /** Taxonomy tags (from frontmatter). */
204
+ tags: string[];
205
+ /** ISO-8601 creation timestamp. */
206
+ createdAt: string;
207
+ /** ISO-8601 last-updated timestamp. */
208
+ updatedAt: string;
209
+ /** Slugs of other pages this page links to via [[wikilinks]]. */
210
+ links: string[];
211
+ /** Full markdown body (without frontmatter). */
212
+ body: string;
213
+ /**
214
+ * Optional typed page kind from frontmatter. Defaults to "concept" in
215
+ * downstream consumers when absent — the export omits the field if no
216
+ * `kind` was set on the wiki page rather than fabricating a default.
217
+ */
218
+ kind?: PageKind;
219
+ /**
220
+ * Compiler's confidence estimate at export time. Advisory only —
221
+ * once imported into any downstream store this field is mutable and
222
+ * not cryptographically bound to the export.
223
+ */
224
+ advisoryConfidence?: number;
225
+ /** Lifecycle state from the compiler's provenance metadata. Advisory only. */
226
+ provenanceState?: ProvenanceState;
227
+ /** Other pages flagged as contradicting this one. Advisory only. */
228
+ contradictedBy?: ContradictionRef[];
229
+ /**
230
+ * Claim citations from the page body, flattened to the shared bridge
231
+ * shape. One entry per `^[file:start-end]` span. Multi-source markers
232
+ * (`^[a.md, b.md]`) expand into multiple entries. Paragraph-only
233
+ * citations carry no line range.
234
+ */
235
+ citations: ExportCitation[];
236
+ /**
237
+ * Prior external IDs this page was known by (e.g. before a slug
238
+ * rename). Downstream importers treat any matching alias as an
239
+ * upsert target so renamed pages do not orphan their prior memory
240
+ * record.
241
+ */
242
+ aliases?: string[];
243
+ /**
244
+ * Advisory per-page source-freshness, computed at export time from
245
+ * `.llmwiki/state.json` + the current `sources/`. A snapshot, not a
246
+ * guarantee. The export is active-page-only, so this is `fresh`, `stale`,
247
+ * or `unverified` — never `orphaned` (orphaned pages are dropped from the
248
+ * export and surfaced by lint/the viewer instead).
249
+ */
250
+ freshnessStatus: FreshnessStatus;
251
+ /** True when the page is disputed by another page (`contradictedBy` non-empty). */
252
+ contradicted: boolean;
253
+ /** True when the page is explicitly archived (`archived: true` frontmatter). */
254
+ archived: boolean;
255
+ /**
256
+ * Deterministic SHA-256 (hex) of {@link ExportPage.body}. Lets a
257
+ * downstream auditor (export provenance) detect content drift and verify that an
258
+ * imported page still matches what the compiler exported, without
259
+ * re-reading the markdown. Stable for identical bodies.
260
+ */
261
+ contentHash: string;
262
+ /**
263
+ * SHA-256 hashes of the source files this page derived from — the same
264
+ * per-source digests the compiler records in `.llmwiki/state.json` for
265
+ * change detection. Resolved from the page's `sources` list; ordered and
266
+ * de-duplicated. Empty when a page has no recorded sources (e.g. seed
267
+ * pages). Lets an auditor tie a page back to exact source bytes.
268
+ */
269
+ sourceHashes: string[];
270
+ /**
271
+ * Model id that produced this page's current content, stamped into the
272
+ * page's frontmatter at compile time (export provenance). Unlike an export-time env
273
+ * read, this is true per-page lineage: a page compiled by model A keeps
274
+ * `modelId: A` even if the exporter's env later points at model B. Absent
275
+ * for pages compiled before provenance stamping shipped.
276
+ */
277
+ modelId?: string;
278
+ /**
279
+ * Named prompt-contract version the page was compiled under (export provenance),
280
+ * stamped at compile time. Absent for pre-provenance pages.
281
+ */
282
+ promptVersion?: string;
283
+ }
284
+
285
+ /**
286
+ * Path-safe page access primitives for the llmwiki in-process SDK.
287
+ *
288
+ * Exposes two public functions:
289
+ * - `getPage(root, ref)` — fetch a single page by directory + slug; returns
290
+ * the full `Page` shape (body included) or null when the file is absent.
291
+ * - `listPages(root, options)` — scan both page directories, read each page's
292
+ * body so wikilinks can be extracted, apply archive/orphan filters, sort,
293
+ * and return a cursor-paged slice.
294
+ *
295
+ * Design notes:
296
+ * - `links` are derived from the Markdown **body** via `extractWikilinkSlugs`,
297
+ * NOT from frontmatter.
298
+ * - `archived` and `orphaned` are boolean **frontmatter** flags.
299
+ * - `scanWikiPages` returns `{ slug, meta }` only (no body), so `listPages`
300
+ * always re-reads each file to extract body links even when `includeBody`
301
+ * is false.
302
+ * - Path safety is enforced at `getPage` entry via `assertSafeSlug`; symlink
303
+ * confinement is handled at a lower level by `scanWikiPages`.
304
+ */
305
+
306
+ /** A reference to a specific page by its directory and slug. */
307
+ interface PageRef {
308
+ pageDirectory: PageDirectory;
309
+ slug: string;
310
+ }
311
+ /** A fully-resolved in-memory representation of a single wiki page. */
312
+ interface Page {
313
+ slug: string;
314
+ pageDirectory: PageDirectory;
315
+ title: string;
316
+ summary: string;
317
+ tags: string[];
318
+ /** Slugs of pages linked via `[[wikilinks]]` in the body. */
319
+ links: string[];
320
+ createdAt?: string;
321
+ updatedAt?: string;
322
+ /** True when frontmatter contains `orphaned: true`. */
323
+ orphaned: boolean;
324
+ /** True when frontmatter contains `archived: true`. */
325
+ archived: boolean;
326
+ /** Full markdown body, present only when `includeBody` is true or via `getPage`. */
327
+ body?: string;
328
+ }
329
+ /** Options for filtering and paginating `listPages`. */
330
+ interface ListPagesOptions {
331
+ cursor?: string;
332
+ limit?: number;
333
+ includeBody?: boolean;
334
+ includeArchived?: boolean;
335
+ includeOrphaned?: boolean;
336
+ }
337
+ /** Result returned by `listPages`. */
338
+ interface ListPagesResult {
339
+ pages: Page[];
340
+ /** Opaque cursor for the next page; absent when the listing is exhausted. */
341
+ cursor?: string;
342
+ }
343
+
344
+ /**
345
+ * JSON export format writer.
346
+ *
347
+ * Produces a structured JSON document containing all wiki pages and their
348
+ * metadata. The schema is intentionally simple and human-readable so it can
349
+ * be consumed directly by scripts, agents, or downstream pipelines without
350
+ * additional transformation.
351
+ *
352
+ * Schema:
353
+ * { schemaVersion, exportedAt, pageCount, projectId?, pages: ExportPage[] }
354
+ *
355
+ * W4 provenance lives PER PAGE (`ExportPage.modelId` / `promptVersion` plus
356
+ * `contentHash` / `sourceHashes`), stamped into each page at compile time.
357
+ * It is deliberately not summarized at the envelope level: a single
358
+ * export-time model id would misattribute pages compiled under a different
359
+ * model, which is exactly the lineage bug this avoids.
360
+ *
361
+ * `schemaVersion` lets downstream consumers (e.g. the rule importer) pin to a known
362
+ * contract. Increment when a breaking field change lands; additive fields
363
+ * do not require a bump.
364
+ *
365
+ * `projectId` is the optional bridge identifier. When present it pins the
366
+ * on-disk export to a stable identity that downstream consumers (the
367
+ * Atomic Memory adapter especially) use to derive deterministic external
368
+ * IDs. Validation happens at the CLI/programmatic boundary, not here —
369
+ * by the time we serialize, the value has been checked.
370
+ */
371
+
372
+ /** Top-level shape of the JSON export file. */
373
+ interface JsonExportDocument {
374
+ /**
375
+ * Contract version for downstream consumers. Start at 1; increment only on
376
+ * breaking envelope changes so consumers can pin a supported range.
377
+ */
378
+ schemaVersion: number;
379
+ exportedAt: string;
380
+ pageCount: number;
381
+ /** Optional bridge identifier. See `src/export/project-id.ts` for the validation rule. */
382
+ projectId?: string;
383
+ pages: ExportPage[];
384
+ }
385
+ /** Options accepted by {@link buildJsonExportDocument}. */
386
+ interface BuildJsonExportOptions {
387
+ /**
388
+ * Optional project identifier. Validated against the bridge contract
389
+ * regex; throws if invalid so a malformed value never reaches disk.
390
+ */
391
+ projectId?: string;
392
+ }
393
+
394
+ /**
395
+ * Single provider-credential guard shared by every entry point that
396
+ * needs an LLM call (CLI compile/query/watch, MCP tools, the upcoming
397
+ * `quickstart` command).
398
+ *
399
+ * The guard throws on failure instead of calling `process.exit(1)`,
400
+ * which lets every caller decide how to surface the failure:
401
+ *
402
+ * - CLI verbs catch the throw and print the message + exit 1.
403
+ * - MCP tools let the throw propagate as a tool error.
404
+ * - `quickstart` catches the throw and translates it into the
405
+ * `compile.error = { code: "provider_unavailable", ... }` shape
406
+ * documented in the next-quickstart implementation plan.
407
+ *
408
+ * Error messages mirror the rich CLI form (with `Set it with: export X=...`
409
+ * hints) so the user always sees actionable guidance no matter which
410
+ * surface fired the guard.
411
+ */
412
+ /** Thrown when the active provider has no usable credentials. */
413
+ declare class ProviderUnavailableError extends Error {
414
+ readonly provider: string;
415
+ readonly missing: string[];
416
+ readonly code: "provider_unavailable";
417
+ constructor(provider: string, missing: string[], message: string);
418
+ }
419
+ /** Thrown when LLMWIKI_PROVIDER names an unsupported provider. */
420
+ declare class UnknownProviderError extends Error {
421
+ readonly provider: string;
422
+ readonly supported: string[];
423
+ readonly code: "unknown_provider";
424
+ constructor(provider: string, supported: string[], message: string);
425
+ }
426
+
427
+ /**
428
+ * Commander action for `llmwiki ingest <source>`.
429
+ *
430
+ * Detects the source type (URL, image, PDF, transcript, or generic file),
431
+ * delegates to the appropriate ingestion module, and saves the result as a
432
+ * markdown file with YAML frontmatter in the sources/ directory.
433
+ *
434
+ * Source type is persisted in frontmatter under the `sourceType` key for
435
+ * downstream tooling and human readers.
436
+ */
437
+
438
+ /** Input shape for raw-text ingestion. */
439
+ interface IngestTextInput {
440
+ title: string;
441
+ text: string;
442
+ source?: string;
443
+ }
444
+
445
+ /**
446
+ * Shared types for the local web viewer.
447
+ *
448
+ * `ViewerPage` is the in-memory page record consumed by the HTTP server's
449
+ * `/api/page/:directory/:slug` endpoint. `ViewerSnapshot` is the immutable
450
+ * project-wide state captured once at viewer startup and served from for
451
+ * every request — v1 deliberately does not live-watch the filesystem.
452
+ *
453
+ * `ViewerWarning` is the only warning surface; the underlying wiki layer
454
+ * (`src/wiki/collect.ts`) returns structural `parseStatus` flags, and the
455
+ * viewer decorator (`src/viewer/collect.ts`) maps those into stable
456
+ * `code`/`message` pairs the UI renders.
457
+ */
458
+
459
+ /**
460
+ * Canonical page identifier: `concepts/<slug>` or `queries/<slug>`. Bare
461
+ * slugs collide between the two directories, so every viewer surface uses
462
+ * the namespaced form.
463
+ */
464
+ type PageId = `${PageDirectory}/${string}`;
465
+
466
+ /**
467
+ * Pure recommendation rules for `llmwiki next`.
468
+ *
469
+ * Classifies a {@link ProjectState} snapshot into exactly one of seven
470
+ * primary states and produces a primary {@link RecommendedAction} plus
471
+ * the per-state `otherActions` table from the implementation plan.
472
+ *
473
+ * Actions with user-supplied input (a source path, a question, a
474
+ * candidate id) are templates: the display `command` carries a
475
+ * `<placeholder>` and `executable.placeholders` lists the slots. Agents
476
+ * must populate placeholders themselves; the contract never returns a
477
+ * shell-ready command line.
478
+ */
479
+
480
+ /** Single recommended action; `command` is for display, `executable` is for agents. */
481
+ interface RecommendedAction {
482
+ command: string | null;
483
+ reason: string;
484
+ executable: ExecutableSpec | null;
485
+ }
486
+ /** Structured form of an executable command. Placeholders are slot names, not literals. */
487
+ interface ExecutableSpec {
488
+ binary: "llmwiki";
489
+ args: string[];
490
+ placeholders?: string[];
491
+ }
492
+
493
+ /**
494
+ * Stable v1 JSON contract for `llmwiki context` and the future
495
+ * `get_context_pack` MCP tool.
496
+ *
497
+ * Every top-level field and every documented nested key is present from
498
+ * Slice 1 onward, even when later-slice features have not populated
499
+ * them yet (see `localdocs/context-graph-packs-implementation-plan.md`
500
+ * §JSON Contract). Unpopulated list fields are empty arrays; absent
501
+ * object fields are `null`. Slices may fill data into these fields,
502
+ * but must NEVER add or remove top-level keys without bumping
503
+ * `version`.
504
+ */
505
+
506
+ /** Closed v1 enum for why a page landed in `primary[]`. */
507
+ type PrimaryReason = "semantic-chunk" | "title-match" | "body-match" | "exact-slug" | "exact-title" | "graph-neighbor";
508
+ /** Closed v1 enum for the edge label used in `neighbors[]`. */
509
+ type NeighborReason = "wikilink";
510
+ /** Closed v1 enum for top-level `warnings[]` codes. */
511
+ type ContextWarningCode = "embedding-store-missing" | "query-embedding-unavailable" | "semantic-retrieval-error" | "lint-errors" | "pending-candidates" | "source-window-unavailable" | "truncated-prompt";
512
+ /** Closed v1 enum for `gaps[]` codes. */
513
+ type ContextGapCode = "dangling-link" | "page-warning";
514
+ /**
515
+ * Budget envelope. `estimatedTokens` uses a tokens ≈ chars/4 heuristic in v1.
516
+ * Because `estimatedTokens` is itself serialized inside the measured JSON, the
517
+ * reported value may differ from `estimatePackTokens(returnedPack)` by at most
518
+ * one token of digit-count drift.
519
+ */
520
+ interface ContextBudget {
521
+ requestedTokens: number;
522
+ estimatedTokens: number;
523
+ truncated: boolean;
524
+ /** Section keys (`primary`, `neighbors`, `sourceWindows`, `chunks`) that lost data. */
525
+ trimmedSections: string[];
526
+ }
527
+ /**
528
+ * Cached lint summary surfaced inside `project.lint`. Matches
529
+ * `LintCacheEntry` in `src/linter/cache.ts` but typed locally so the
530
+ * context contract does not depend on the linter's internal shape.
531
+ */
532
+ interface ContextLintSummary {
533
+ warnings: number;
534
+ errors: number;
535
+ at: string;
536
+ }
537
+ /** Project block. `root` is set to `null` when `--omit-root` is supplied. */
538
+ interface ContextProject {
539
+ root: string | null;
540
+ pages: number;
541
+ pendingCandidates: number;
542
+ lint: ContextLintSummary | null;
543
+ }
544
+ /** One semantic chunk surfaced for a primary page. Slice 2 populates it. */
545
+ interface ContextChunk {
546
+ text: string;
547
+ score: number;
548
+ contentHash?: string;
549
+ }
550
+ /**
551
+ * Flattened citation. Produced by lifting `ClaimCitation.spans` into one
552
+ * object per span. Paragraph-only citations omit `start` and `end`.
553
+ */
554
+ interface ContextCitation {
555
+ file: string;
556
+ start?: number;
557
+ end?: number;
558
+ }
559
+ /** Source line window emitted only when `--include-sources` is set in Slice 4. */
560
+ interface ContextSourceWindow {
561
+ file: string;
562
+ start: number;
563
+ end: number;
564
+ text: string;
565
+ }
566
+ /** Page-local warning surfaced from the viewer collector. */
567
+ interface ContextPageWarning {
568
+ code: string;
569
+ message: string;
570
+ }
571
+ /** One primary page entry. `reasons` is sorted alphabetically for stable output. */
572
+ interface ContextPrimary {
573
+ id: PageId;
574
+ title: string;
575
+ pageDirectory: PageDirectory;
576
+ score: number;
577
+ reasons: PrimaryReason[];
578
+ summary: string;
579
+ chunks: ContextChunk[];
580
+ citations: ContextCitation[];
581
+ sourceWindows: ContextSourceWindow[];
582
+ warnings: ContextPageWarning[];
583
+ /** Computed source-freshness of this page (advisory snapshot, not a guarantee). */
584
+ freshnessStatus: FreshnessStatus;
585
+ /** Disputed by another page (`contradictedBy` non-empty). */
586
+ contradicted: boolean;
587
+ /** Explicitly archived (`archived: true` frontmatter). */
588
+ archived: boolean;
589
+ }
590
+ /** One graph neighbor edge. `distance` is 1 for direct, 2 for second-hop. */
591
+ interface ContextNeighbor {
592
+ from: PageId;
593
+ to: PageId;
594
+ direction: "outgoing" | "incoming";
595
+ distance: number;
596
+ score: number;
597
+ reason: NeighborReason;
598
+ }
599
+ /** Top-level context-pack state warning. */
600
+ interface ContextWarning {
601
+ code: ContextWarningCode;
602
+ message: string;
603
+ }
604
+ /**
605
+ * Missing-knowledge gap. `pageId` is required in v1; every documented
606
+ * gap code (`dangling-link`, `page-warning`) is tied to a specific
607
+ * page. A future project-wide gap would either bump `version` or
608
+ * introduce a new sibling field rather than retrofitting nullability
609
+ * onto this one.
610
+ */
611
+ interface ContextGap {
612
+ code: ContextGapCode;
613
+ message: string;
614
+ pageId: PageId;
615
+ }
616
+ /** Top-level v1 envelope. */
617
+ interface ContextPack {
618
+ version: 1;
619
+ prompt: string;
620
+ budget: ContextBudget;
621
+ project: ContextProject;
622
+ primary: ContextPrimary[];
623
+ neighbors: ContextNeighbor[];
624
+ warnings: ContextWarning[];
625
+ gaps: ContextGap[];
626
+ suggestedActions: RecommendedAction[];
627
+ }
628
+
629
+ /**
630
+ * Type definitions for the llmwiki eval harness.
631
+ *
632
+ * Four metric families:
633
+ * - HealthResult: aggregated lint score (0–100)
634
+ * - CitationCoverageResult: prose paragraph citation rate + precision
635
+ * - CitationSupportResult: LLM-judged citation support quality (full suite only)
636
+ * - StatsResult: corpus size snapshot appended to history.jsonl
637
+ *
638
+ * EvalReport bundles all four plus regression deltas and CI threshold violations.
639
+ */
640
+ interface HealthRuleResult {
641
+ rule: string;
642
+ count: number;
643
+ severity: "error" | "warning" | "info";
644
+ deduction: number;
645
+ }
646
+ interface HealthResult {
647
+ score: number;
648
+ maxScore: 100;
649
+ rules: HealthRuleResult[];
650
+ }
651
+ interface CitationPageResult {
652
+ slug: string;
653
+ proseParagraphs: number;
654
+ citedParagraphs: number;
655
+ }
656
+ interface CitationCoverageResult {
657
+ totalProseParagraphs: number;
658
+ citedParagraphs: number;
659
+ coveragePercent: number;
660
+ totalCitations: number;
661
+ validCitations: number;
662
+ precisionPercent: number;
663
+ perPage: CitationPageResult[];
664
+ }
665
+ /** Per-source citation detail emitted by source-utilization eval. */
666
+ interface SourceUtilizationEntry {
667
+ sourceFile: string;
668
+ citingPageCount: number;
669
+ citingPages: string[];
670
+ }
671
+ interface SourceUtilizationResult {
672
+ totalSources: number;
673
+ citedSources: number;
674
+ uncitedSources: number;
675
+ /** 0.0-1.0, or null when totalSources is 0 (not measured). */
676
+ utilizationRate: number | null;
677
+ /** Non-fatal issues encountered during evaluation (e.g. unreadable files). */
678
+ warnings: string[];
679
+ /** Sorted by citingPageCount descending. */
680
+ perSource: SourceUtilizationEntry[];
681
+ }
682
+ /** Citation depth metrics — how precise are the wiki's citations. */
683
+ interface CitationDepthResult {
684
+ totalCitations: number;
685
+ preciseCitations: number;
686
+ vagueCitations: number;
687
+ /** 0.0-1.0 fraction of citations that include a line range. */
688
+ claimLevelRate: number;
689
+ /** Average number of citation markers per prose paragraph. */
690
+ avgCitationsPerParagraph: number;
691
+ }
692
+ interface CitationJudgement {
693
+ /** First 16 hex chars of SHA-256(claimText + spanText) — stable cache key. */
694
+ claimHash: string;
695
+ pageSlug: string;
696
+ citedFile: string;
697
+ lineStart: number;
698
+ lineEnd: number;
699
+ claimText: string;
700
+ spanText: string;
701
+ score: 0 | 1 | 2;
702
+ reason: string;
703
+ model: string;
704
+ timestamp: string;
705
+ }
706
+ interface CitationSupportResult {
707
+ sampledCount: number;
708
+ /** Ordered list of claimHash values evaluated in this run. Persisted so subsequent runs can retain the same sample as the corpus grows. */
709
+ sampledHashes: string[];
710
+ totalCitations: number;
711
+ meanScore: number;
712
+ fullySupported: number;
713
+ partiallySupported: number;
714
+ unsupported: number;
715
+ /** Number of judge calls that threw (credentials failure, network error, parse error). */
716
+ judgeErrors: number;
717
+ judgements: CitationJudgement[];
718
+ }
719
+ interface StatsResult {
720
+ timestamp: string;
721
+ sourceCount: number;
722
+ pageCount: number;
723
+ totalWikiChars: number;
724
+ embeddingCount: number;
725
+ chunkEmbeddingCount: number;
726
+ avgPageLengthChars: number;
727
+ }
728
+ interface EvalDelta {
729
+ healthScore?: number;
730
+ citationCoveragePercent?: number;
731
+ citationPrecisionPercent?: number;
732
+ citationSupportMean?: number;
733
+ }
734
+ interface EvalReport {
735
+ suite: "fast" | "full";
736
+ timestamp: string;
737
+ health: HealthResult;
738
+ citationCoverage: CitationCoverageResult;
739
+ sourceUtilization: SourceUtilizationResult;
740
+ citationDepth: CitationDepthResult;
741
+ citationSupport?: CitationSupportResult;
742
+ stats: StatsResult;
743
+ delta?: EvalDelta;
744
+ thresholdViolations: string[];
745
+ }
746
+
747
+ /** Shape returned by `collectStatus` and surfaced by the `wiki_status` tool. */
748
+ interface WikiStatus {
749
+ pages: {
750
+ concepts: number;
751
+ queries: number;
752
+ total: number;
753
+ };
754
+ sources: number;
755
+ lastCompiledAt: string | null;
756
+ /**
757
+ * Concept slugs whose source changed or partially disappeared since compile.
758
+ * Capped at MAX_STATUS_LIST (sorted ascending); see staleCount for the true total.
759
+ */
760
+ stalePages: string[];
761
+ /** True total of stale pages (may exceed stalePages.length when capped). */
762
+ staleCount: number;
763
+ /**
764
+ * Concept slugs whose every owning source was deleted, or frontmatter-flagged orphaned.
765
+ * Capped at MAX_STATUS_LIST (sorted ascending); see orphanedCount for the true total.
766
+ */
767
+ orphanedPages: string[];
768
+ /** True total of orphaned pages (may exceed orphanedPages.length when capped). */
769
+ orphanedCount: number;
770
+ /** Readability of .llmwiki/state.json — surfaced so corrupt state is never silent. */
771
+ stateStatus: "ok" | "missing" | "corrupt";
772
+ /** Number of compile candidates awaiting human review. */
773
+ pendingCandidates: number;
774
+ /**
775
+ * Source files with changes since last compile (new/changed/deleted).
776
+ * Capped at MAX_STATUS_LIST (sorted by file); see pendingChangesCount for the true total.
777
+ */
778
+ pendingChanges: Array<{
779
+ file: string;
780
+ status: string;
781
+ }>;
782
+ /** True total of pending changes (may exceed pendingChanges.length when capped). */
783
+ pendingChangesCount: number;
784
+ }
785
+
786
+ /**
787
+ * Page-reading utilities for llmwiki.
788
+ *
789
+ * Exposes `readPageRecord`, which locates a wiki page by slug across the
790
+ * priority-ordered page directories (concepts first, then queries), parses
791
+ * its frontmatter, and returns a structured `PageRecord`. Orphaned pages are
792
+ * silently skipped to match the query pipeline's behaviour.
793
+ *
794
+ * This module is shared between the MCP tool layer and the in-process SDK so
795
+ * both consumers work from identical read semantics.
796
+ */
797
+ /** Shape returned by readPageRecord and search_pages for each matching page. */
798
+ interface PageRecord {
799
+ slug: string;
800
+ title: string;
801
+ summary: string;
802
+ body: string;
803
+ }
804
+
805
+ /** A single source file under `sources/`, with frontmatter metadata. */
806
+ interface SourceRecord {
807
+ id: string;
808
+ title: string;
809
+ source: string;
810
+ sourceType: string;
811
+ ingestedAt?: string;
812
+ body?: string;
813
+ }
814
+ /** Options for paginating `listSources` and opting into source bodies. */
815
+ interface ListSourcesOptions {
816
+ cursor?: string;
817
+ limit?: number;
818
+ includeBody?: boolean;
819
+ }
820
+ /** Result returned by `listSources`. */
821
+ interface ListSourcesResult {
822
+ sources: SourceRecord[];
823
+ cursor?: string;
824
+ }
825
+
826
+ /**
827
+ * @file src/sdk/types.ts
828
+ * @description Public type surface for the llmwiki in-process SDK.
829
+ *
830
+ * Defines the `Wiki` interface returned by `createWiki`, plus the
831
+ * option shapes that callers pass to each method. All concrete result
832
+ * types are imported directly from their owning modules so consumers
833
+ * who need deeper access can follow the same import path.
834
+ */
835
+
836
+ /** Options for `createWiki`. */
837
+ interface CreateWikiOptions {
838
+ /** Absolute or relative path to the project root. Normalized once inside `createWiki`. */
839
+ root: string;
840
+ }
841
+ /** Compile options exposed through the SDK. Mirrors the core CompileOptions shape. */
842
+ interface SdkCompileOptions {
843
+ /** Write generated pages as candidates for review instead of mutating wiki/. */
844
+ review?: boolean;
845
+ }
846
+ /** Options for `getContextPack`. Maps onto the subset of BuildContextPackOptions needed externally. */
847
+ interface ContextPackOptions {
848
+ /** Free-text prompt the agent supplied. */
849
+ prompt: string;
850
+ /** Token budget (tokens ≈ chars/4). */
851
+ budget?: number;
852
+ /** Graph traversal depth (0–2). */
853
+ depth?: number;
854
+ /** Maximum primary pages to include. */
855
+ topPages?: number;
856
+ /** Maximum semantic chunks to include. */
857
+ topChunks?: number;
858
+ }
859
+ /**
860
+ * The facade object returned by `createWiki`. Every method runs silently
861
+ * (no console output) and normalizes all paths against the project root
862
+ * supplied at construction time.
863
+ */
864
+ interface Wiki {
865
+ /**
866
+ * Ingest a file path or URL as a new source document. Requires no LLM credentials.
867
+ *
868
+ * **Trust boundary (SSRF + local-file-read primitive):** this method fetches any URL
869
+ * or reads any local file path the caller supplies — server-side — and writes the
870
+ * resulting content into the wiki. Treat `source` as **trusted input only**. Do not
871
+ * pass user-supplied or otherwise untrusted strings here. For untrusted content, use
872
+ * `ingestText` instead (it accepts pre-extracted `{title, text}` with no fetch or
873
+ * file-read step).
874
+ *
875
+ * **Prompt-injection surface:** ingested content is later processed by an LLM during
876
+ * `compile`. Untrusted or adversarially crafted content in sources is therefore a
877
+ * prompt-injection vector into page generation.
878
+ */
879
+ ingest(input: {
880
+ source: string;
881
+ }): Promise<IngestResult>;
882
+ /**
883
+ * Ingest raw text as a new source document. Requires no LLM credentials.
884
+ *
885
+ * Safe path for untrusted content: no network fetch or local file read is performed.
886
+ * The caller supplies the text directly, so there is no SSRF or path-traversal risk
887
+ * at ingest time (prompt-injection into `compile` still applies if the text itself is
888
+ * adversarial).
889
+ */
890
+ ingestText(input: IngestTextInput): Promise<IngestResult>;
891
+ /**
892
+ * Compile all pending sources into wiki pages. Requires LLM credentials.
893
+ *
894
+ * **Data egress:** source content is sent to the configured LLM provider during
895
+ * compilation. Do not compile wikis that contain confidential data unless the
896
+ * provider's data-handling policies are acceptable for that content.
897
+ *
898
+ * **Silent operation:** progress output is suppressed by the SDK facade. There is no
899
+ * progress callback in v1; structured `onLog` event delivery is planned for v1.x.
900
+ * For long corpora this call may take several minutes with no intermediate feedback.
901
+ */
902
+ compile(options?: SdkCompileOptions): Promise<CompileResult>;
903
+ /**
904
+ * Pick and hydrate the most relevant pages for a question. Requires LLM credentials.
905
+ *
906
+ * **Data egress:** the question (and embedding request) is sent to the configured LLM
907
+ * provider. Wiki page content may also be sent during retrieval scoring.
908
+ */
909
+ search(question: string): Promise<PageRecord[]>;
910
+ /**
911
+ * Generate a grounded answer from the wiki. Requires LLM credentials.
912
+ *
913
+ * **Data egress:** the question and relevant wiki page content are sent to the
914
+ * configured LLM provider to produce the answer.
915
+ *
916
+ * Streaming token delivery (`onToken`) is intentionally NOT exposed by the
917
+ * facade in v1 — only `save` and `debug` are surfaced. Callers needing
918
+ * per-token streaming should use `generateAnswer` directly.
919
+ */
920
+ query(question: string, options?: {
921
+ save?: boolean;
922
+ debug?: boolean;
923
+ }): Promise<QueryResult>;
924
+ /** Fetch a single page by directory and slug. No LLM required. */
925
+ getPage(ref: PageRef): Promise<Page | null>;
926
+ /** List wiki pages with optional filters and cursor-based pagination. No LLM required. */
927
+ listPages(options?: ListPagesOptions): Promise<ListPagesResult>;
928
+ /** List source files under `sources/` with optional cursor pagination. Bodies are opt-in via `includeBody`. No LLM required. */
929
+ listSources(options?: ListSourcesOptions): Promise<ListSourcesResult>;
930
+ /** Fetch a single source record by its basename id (e.g. "note.md"); returns null if absent. Always includes body. No LLM required. */
931
+ getSource(id: string): Promise<SourceRecord | null>;
932
+ /** Delete the source file for the given id (the id is the `IngestResult.filename`, e.g. "note.md").
933
+ * Returns true if deleted, false if not found. The compiled page in `wiki/` is NOT removed
934
+ * immediately — reconciliation happens on the next `compile()`. No LLM required. */
935
+ deleteSource(id: string): Promise<boolean>;
936
+ /**
937
+ * Collect a read-only status snapshot of the wiki. No LLM required.
938
+ *
939
+ * **Per-call cost:** each call hashes and reads the full source corpus —
940
+ * O(total source bytes) — with no cross-call caching. Avoid calling this in a
941
+ * hot loop; an mtime-keyed cache is planned for v1.x.
942
+ */
943
+ status(): Promise<WikiStatus>;
944
+ /**
945
+ * Run all lint rules and return a severity-counted summary. No LLM required.
946
+ *
947
+ * **Per-call cost:** each call hashes and reads the full source corpus —
948
+ * O(total source bytes) — with no cross-call caching. Avoid calling this in a
949
+ * hot loop; an mtime-keyed cache is planned for v1.x.
950
+ */
951
+ lint(): Promise<LintSummary>;
952
+ /**
953
+ * Build a v1 context pack for agent consumption. Lexical retrieval works
954
+ * credential-free; semantic retrieval is opportunistic (skipped when no
955
+ * embeddings are available).
956
+ */
957
+ getContextPack(options: ContextPackOptions): Promise<ContextPack>;
958
+ /**
959
+ * Export the wiki as a structured JSON document. No LLM required.
960
+ *
961
+ * **Per-call cost:** each call hashes and reads the full source corpus —
962
+ * O(total source bytes) — with no cross-call caching. Avoid calling this in a
963
+ * hot loop; an mtime-keyed cache is planned for v1.x.
964
+ */
965
+ exportJson(options?: BuildJsonExportOptions): Promise<JsonExportDocument>;
966
+ /**
967
+ * Run the eval harness. "fast" mode is credential-free; "full" mode
968
+ * requires LLM credentials for citation-support judging.
969
+ *
970
+ * **Silent operation:** the SDK suppresses all progress output. There is no
971
+ * progress callback in v1; structured `onLog` event delivery is planned for v1.x.
972
+ */
973
+ runEval(options: {
974
+ mode: "fast" | "full";
975
+ record?: boolean;
976
+ }): Promise<EvalReport>;
977
+ }
978
+
979
+ /**
980
+ * @file src/sdk/wiki.ts
981
+ * @description In-process SDK facade for llmwiki.
982
+ *
983
+ * `createWiki(options)` returns a `Wiki` object that delegates every
984
+ * method to the SDK-safe core functions built across Tasks 1–9. All
985
+ * methods run silently (no console output) by scoping quiet mode to the
986
+ * async call tree via AsyncLocalStorage, so concurrent calls are fully
987
+ * isolated — no global flag is mutated, eliminating the concurrency caveat
988
+ * described in earlier design drafts.
989
+ *
990
+ * Provider-gating rules:
991
+ * - `compile`, `search`, `query` — always guard (throw ProviderUnavailableError if no creds)
992
+ * - `runEval({ mode: "full" })` — guards only when mode is "full"
993
+ * - All other methods — no credential check; safe to call without an LLM provider
994
+ *
995
+ * Root-path validation: `createWiki` normalizes the root once via `path.resolve`.
996
+ * A non-existent root is accepted — `ingest`/`ingestText` create `sources/` via
997
+ * recursive `mkdir` on first write. If the path already exists but is NOT a
998
+ * directory (e.g. a regular file was passed by mistake), construction throws
999
+ * immediately with a clear error message.
1000
+ */
1001
+
1002
+ /**
1003
+ * Create an in-process wiki facade bound to the given project root.
1004
+ *
1005
+ * @param options - `{ root }` — path to the wiki project directory.
1006
+ * @returns A `Wiki` facade whose methods delegate to the SDK-safe core.
1007
+ */
1008
+ declare function createWiki(options: CreateWikiOptions): Wiki;
1009
+
1010
+ export { type CompileResult, type ContextPack, type ContextPackOptions, type CreateWikiOptions, type EvalReport, type IngestResult, type IngestTextInput, type JsonExportDocument, type LintSummary, type ListPagesOptions, type ListPagesResult, type ListSourcesOptions, type ListSourcesResult, type Page, type PageDirectory, type PageRecord, type PageRef, ProviderUnavailableError, type QueryResult, type SdkCompileOptions, type SourceRecord, UnknownProviderError, type Wiki, type WikiStatus, type WriteStatus, createWiki };