@3030-labs/wotw 0.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +312 -0
- package/LICENSE +36 -0
- package/LICENSE-NOTICES.md +199 -0
- package/README.md +147 -0
- package/SECURITY.md +181 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +14993 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/daemon/entry.d.ts +2 -0
- package/dist/daemon/entry.js +11544 -0
- package/dist/daemon/entry.js.map +1 -0
- package/dist/index.d.ts +617 -0
- package/dist/index.js +1290 -0
- package/dist/index.js.map +1 -0
- package/dist/wiki/templates/CLAUDE.md +87 -0
- package/dist/wiki/templates/getting-started.md +89 -0
- package/dist/wiki/templates/index.md +32 -0
- package/dist/wiki/templates/log.md +9 -0
- package/package.json +127 -0
- package/src/wiki/templates/CLAUDE.md +87 -0
- package/src/wiki/templates/getting-started.md +89 -0
- package/src/wiki/templates/index.md +32 -0
- package/src/wiki/templates/log.md +9 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,617 @@
|
|
|
1
|
+
import { Logger } from 'pino';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Shared type definitions used across the watcher-on-the-wall codebase.
|
|
5
|
+
*/
|
|
6
|
+
/** Supported LLM model identifiers. Kept loose so consumers can supply new IDs. */
|
|
7
|
+
type ModelId = string;
|
|
8
|
+
/** Operation type for an ingestion / provenance record. */
|
|
9
|
+
type OperationType = "ingest" | "query" | "compound" | "archive" | "lint" | "merge" | "heal" | "fact_extraction" | "fact_extracted";
|
|
10
|
+
/**
|
|
11
|
+
* Execution mode. Controls which Claude runtime the daemon invokes:
|
|
12
|
+
*
|
|
13
|
+
* - `auto`: detect at startup — prefer the Claude Code CLI binary on PATH,
|
|
14
|
+
* fall back to the Agent SDK if ANTHROPIC_API_KEY is set.
|
|
15
|
+
* - `cli`: always spawn the `claude` CLI. Free with an existing Claude Pro/Max
|
|
16
|
+
* subscription. All operations use `execution.cli_model`.
|
|
17
|
+
* - `api`: always use the Agent SDK. Requires ANTHROPIC_API_KEY. Operations
|
|
18
|
+
* use the model-router (Haiku for ingest, Sonnet for query, etc.).
|
|
19
|
+
*/
|
|
20
|
+
type ExecutionMode = "auto" | "cli" | "api";
|
|
21
|
+
/** Resolved runtime mode — what the daemon actually ended up using. */
|
|
22
|
+
type RuntimeMode = "cli" | "api";
|
|
23
|
+
/** Confidence level for wiki entries. */
|
|
24
|
+
type ConfidenceLevel = "high" | "medium" | "low";
|
|
25
|
+
/** Category of a wiki page. */
|
|
26
|
+
type WikiCategory = "concept" | "entity" | "source" | "comparison" | "synthesis" | "query";
|
|
27
|
+
/** LLM provider identifier. Must match LLMProviderName in llm/types-vendored.ts. */
|
|
28
|
+
type LlmProviderName = "anthropic" | "openai" | "gemini" | "ollama";
|
|
29
|
+
/** Resolved configuration values with all defaults applied. */
|
|
30
|
+
interface WotwConfig {
|
|
31
|
+
wiki_root: string;
|
|
32
|
+
raw_path: string;
|
|
33
|
+
/**
|
|
34
|
+
* LLM provider selection. Multi-LLM (Phase 10) lets the daemon dispatch
|
|
35
|
+
* to AnthropicProvider / OpenAIProvider / GeminiProvider / OllamaProvider
|
|
36
|
+
* based on `provider`. The `api_key_env` field in `execution` still names
|
|
37
|
+
* the env var the daemon reads for the API key (varies per provider):
|
|
38
|
+
* - anthropic → ANTHROPIC_API_KEY (default)
|
|
39
|
+
* - openai → OPENAI_API_KEY
|
|
40
|
+
* - gemini → GOOGLE_API_KEY
|
|
41
|
+
* - ollama → no key (local inference)
|
|
42
|
+
*/
|
|
43
|
+
llm: {
|
|
44
|
+
provider: LlmProviderName;
|
|
45
|
+
/** Model identifier (provider-specific). */
|
|
46
|
+
model: string;
|
|
47
|
+
/** Ollama-only: base URL of the local Ollama instance. */
|
|
48
|
+
ollama_url?: string;
|
|
49
|
+
};
|
|
50
|
+
execution: {
|
|
51
|
+
/** How to choose the LLM runtime. See {@link ExecutionMode}. */
|
|
52
|
+
mode: ExecutionMode;
|
|
53
|
+
/** Path (or bare name) of the Claude Code CLI binary. */
|
|
54
|
+
cli_path: string;
|
|
55
|
+
/** Model used for ALL operations when running in CLI mode. */
|
|
56
|
+
cli_model: ModelId;
|
|
57
|
+
/** Name of the env var to read the Anthropic API key from in API mode. */
|
|
58
|
+
api_key_env: string;
|
|
59
|
+
};
|
|
60
|
+
models: {
|
|
61
|
+
ingest: ModelId;
|
|
62
|
+
query: ModelId;
|
|
63
|
+
lint: ModelId;
|
|
64
|
+
compound_eval: ModelId;
|
|
65
|
+
};
|
|
66
|
+
watcher: {
|
|
67
|
+
debounce_initial_ms: number;
|
|
68
|
+
debounce_max_ms: number;
|
|
69
|
+
debounce_growth_factor: number;
|
|
70
|
+
burst_threshold: number;
|
|
71
|
+
max_batch_size: number;
|
|
72
|
+
ignore_patterns: string[];
|
|
73
|
+
};
|
|
74
|
+
ingestion: {
|
|
75
|
+
max_turns: number;
|
|
76
|
+
max_budget_per_batch_usd: number;
|
|
77
|
+
resume_session: boolean;
|
|
78
|
+
/**
|
|
79
|
+
* File where permanently-failed batches are recorded (one JSON object
|
|
80
|
+
* per line). Resolved relative to {@link WotwConfig.wiki_root} at
|
|
81
|
+
* config-load time. Set to an empty string to disable.
|
|
82
|
+
*/
|
|
83
|
+
dead_letter_file: string;
|
|
84
|
+
/**
|
|
85
|
+
* When true, ingested pages land in `wiki/candidates/` for human review
|
|
86
|
+
* instead of going directly into category directories. Approved via
|
|
87
|
+
* `wotw approve`, rejected via `wotw reject`.
|
|
88
|
+
*/
|
|
89
|
+
staging: boolean;
|
|
90
|
+
};
|
|
91
|
+
cost: {
|
|
92
|
+
max_daily_usd: number;
|
|
93
|
+
max_per_query_usd: number;
|
|
94
|
+
max_per_ingest_usd: number;
|
|
95
|
+
track_file: string;
|
|
96
|
+
};
|
|
97
|
+
server: {
|
|
98
|
+
port: number;
|
|
99
|
+
host: string;
|
|
100
|
+
auth_token: string | null;
|
|
101
|
+
rate_limit_rpm: number;
|
|
102
|
+
/**
|
|
103
|
+
* When true, trust the `X-Forwarded-For` header for client IP
|
|
104
|
+
* extraction (for rate limiting). Enable when running behind a
|
|
105
|
+
* reverse proxy. Default false — use `req.socket.remoteAddress`.
|
|
106
|
+
*/
|
|
107
|
+
trust_proxy: boolean;
|
|
108
|
+
};
|
|
109
|
+
daemon: {
|
|
110
|
+
pid_file: string;
|
|
111
|
+
lock_file: string;
|
|
112
|
+
log_file: string;
|
|
113
|
+
log_level: "trace" | "debug" | "info" | "warn" | "error" | "fatal";
|
|
114
|
+
};
|
|
115
|
+
compounding: {
|
|
116
|
+
enabled: boolean;
|
|
117
|
+
min_source_pages: number;
|
|
118
|
+
confidence_threshold: number;
|
|
119
|
+
};
|
|
120
|
+
provenance: {
|
|
121
|
+
enabled: boolean;
|
|
122
|
+
chain_file: string;
|
|
123
|
+
verify_on_startup: boolean;
|
|
124
|
+
};
|
|
125
|
+
multi_user: {
|
|
126
|
+
enabled: boolean;
|
|
127
|
+
workspaces_dir: string;
|
|
128
|
+
};
|
|
129
|
+
query: {
|
|
130
|
+
/** Enable LLM-powered query expansion before BM25 search. */
|
|
131
|
+
expand: boolean;
|
|
132
|
+
};
|
|
133
|
+
/**
|
|
134
|
+
* Pass B fact-extraction layer. After every wiki page write, the
|
|
135
|
+
* daemon optionally runs a single LLM call to decompose the page into
|
|
136
|
+
* atomic (entity, statement) facts + a handful of synthetic questions
|
|
137
|
+
* per fact. Indexed in a SQLite + minisearch sidecar at `.wotw/facts.db`.
|
|
138
|
+
*
|
|
139
|
+
* `enabled` semantics:
|
|
140
|
+
* - `"auto"` (default): active when the runtime is *cost-free* —
|
|
141
|
+
* Ollama (local) or Claude Code CLI (subscription). Inactive in
|
|
142
|
+
* API mode (Anthropic / OpenAI / Gemini) so the daemon doesn't
|
|
143
|
+
* silently amplify per-ingest cost.
|
|
144
|
+
* - `true`: always active regardless of runtime.
|
|
145
|
+
* - `false`: never active.
|
|
146
|
+
* `force_enabled` is the API-mode opt-in: when `enabled: "auto"` AND
|
|
147
|
+
* `force_enabled: true`, extraction runs even on metered providers.
|
|
148
|
+
*/
|
|
149
|
+
fact_extraction: {
|
|
150
|
+
enabled: "auto" | boolean;
|
|
151
|
+
force_enabled: boolean;
|
|
152
|
+
questions_per_fact: number;
|
|
153
|
+
/** Optional model override (defaults to `models.lint`). */
|
|
154
|
+
model?: ModelId;
|
|
155
|
+
};
|
|
156
|
+
lint: {
|
|
157
|
+
/** If true, the daemon runs a lint pass on a recurring interval. */
|
|
158
|
+
schedule_enabled: boolean;
|
|
159
|
+
/** Interval between scheduled lint passes, in hours. */
|
|
160
|
+
interval_hours: number;
|
|
161
|
+
/** When true, scheduled lint runs with --fix semantics (auto-heal). */
|
|
162
|
+
auto_fix: boolean;
|
|
163
|
+
};
|
|
164
|
+
health: {
|
|
165
|
+
/** Day thresholds for staleness scoring (ascending). */
|
|
166
|
+
staleness_thresholds: number[];
|
|
167
|
+
/** Scores corresponding to each staleness bucket (one more than thresholds). */
|
|
168
|
+
staleness_scores: number[];
|
|
169
|
+
/** Scoring weights — must sum to 1.0. */
|
|
170
|
+
weights: {
|
|
171
|
+
staleness: number;
|
|
172
|
+
source_availability: number;
|
|
173
|
+
link_health: number;
|
|
174
|
+
duplicate_risk: number;
|
|
175
|
+
contradiction_risk: number;
|
|
176
|
+
};
|
|
177
|
+
/** Similarity score 0-100 above which pages are flagged as duplicates. */
|
|
178
|
+
duplicate_threshold: number;
|
|
179
|
+
/** Pages scoring below this on staleness are auto-fixable. */
|
|
180
|
+
auto_fix_staleness_below: number;
|
|
181
|
+
/** Cap LLM calls per lint --fix pass. */
|
|
182
|
+
max_fixes_per_run: number;
|
|
183
|
+
/**
|
|
184
|
+
* Enable LLM-powered contradiction detection (expensive).
|
|
185
|
+
*
|
|
186
|
+
* TODO: No runtime consumer at v0.2.12. When the LLM contradiction
|
|
187
|
+
* detection pass is implemented, re-visit hosted-mode default —
|
|
188
|
+
* interactive default is false; hosted mode likely wants true
|
|
189
|
+
* (autonomous operation, no human reviewer to opt in). See
|
|
190
|
+
* `applyEnvOverrides()` in `src/daemon/config.ts` for the pattern
|
|
191
|
+
* other hosted-mode defaults follow.
|
|
192
|
+
*/
|
|
193
|
+
detect_contradictions: boolean;
|
|
194
|
+
/** Merge when a topic has more than N pages. */
|
|
195
|
+
consolidation_threshold: number;
|
|
196
|
+
/** Master switch for knowledge consolidation. */
|
|
197
|
+
consolidation_enabled: boolean;
|
|
198
|
+
/** Trigger vocabulary enrichment when zero-hit rate exceeds this (0-1). */
|
|
199
|
+
zero_hit_threshold: number;
|
|
200
|
+
/** Master switch for automated vocabulary enrichment. */
|
|
201
|
+
enrichment_enabled: boolean;
|
|
202
|
+
/** JSONL file for query logging. Resolved relative to wiki_root. */
|
|
203
|
+
query_log_file: string;
|
|
204
|
+
};
|
|
205
|
+
/**
|
|
206
|
+
* Hosted mode configuration. When `enabled: true`, the daemon runs in a
|
|
207
|
+
* multi-tenant cloud environment with per-tenant scheduling, concurrency
|
|
208
|
+
* caps, and kill switches. Default off — single-user mode unchanged.
|
|
209
|
+
*/
|
|
210
|
+
hosted: {
|
|
211
|
+
/** Master switch for hosted mode. Default false. */
|
|
212
|
+
enabled: boolean;
|
|
213
|
+
/** Tenant identifier, set by the cloud control plane. */
|
|
214
|
+
tenant_id: string | null;
|
|
215
|
+
/** Max concurrent jobs for this tenant. */
|
|
216
|
+
concurrency_cap: number;
|
|
217
|
+
/** Kill switch — when true, jobs are held (not dropped). */
|
|
218
|
+
paused: boolean;
|
|
219
|
+
/** Plan name — determines default limits. */
|
|
220
|
+
plan: "founding" | "pro";
|
|
221
|
+
/** Per-tenant resource limits. */
|
|
222
|
+
limits: {
|
|
223
|
+
storage_bytes: number;
|
|
224
|
+
max_files_per_day: number;
|
|
225
|
+
max_file_size_bytes: number;
|
|
226
|
+
max_ingest_bytes_per_day: number;
|
|
227
|
+
heal_cooldown_seconds: number;
|
|
228
|
+
query_rate_limit_per_hour: number;
|
|
229
|
+
onboarding_burst_multiplier: number;
|
|
230
|
+
onboarding_burst_hours: number;
|
|
231
|
+
};
|
|
232
|
+
/** Timezone for daily limit resets. */
|
|
233
|
+
timezone: string;
|
|
234
|
+
/** Workspace creation time (ISO string) for burst calculation. */
|
|
235
|
+
created_at: string | null;
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Page lifecycle status. Present on orphaned pages whose source files
|
|
240
|
+
* have been deleted from `raw/`. Pages without a `status` field are
|
|
241
|
+
* considered active.
|
|
242
|
+
*/
|
|
243
|
+
type WikiPageStatus = "orphaned" | "merged" | "stale" | "consolidated";
|
|
244
|
+
/** Wiki page frontmatter shape. */
|
|
245
|
+
interface WikiFrontmatter {
|
|
246
|
+
title: string;
|
|
247
|
+
category: WikiCategory;
|
|
248
|
+
created: string;
|
|
249
|
+
updated: string;
|
|
250
|
+
sources: string[];
|
|
251
|
+
related: string[];
|
|
252
|
+
tags: string[];
|
|
253
|
+
confidence: ConfidenceLevel;
|
|
254
|
+
/**
|
|
255
|
+
* Optional lifecycle status. Set to `"orphaned"` when every source
|
|
256
|
+
* file that fed this page has been deleted. Never auto-cleared — a
|
|
257
|
+
* later ingestion that touches the page overwrites the frontmatter
|
|
258
|
+
* wholesale, which drops the orphaned fields.
|
|
259
|
+
*/
|
|
260
|
+
status?: WikiPageStatus;
|
|
261
|
+
/** ISO-8601 UTC timestamp when the page was first marked orphaned. */
|
|
262
|
+
orphaned_at?: string;
|
|
263
|
+
/**
|
|
264
|
+
* Wiki-root-relative source paths whose deletion orphaned this page.
|
|
265
|
+
* Appended-to (deduplicated) when multiple sources are deleted across
|
|
266
|
+
* different batches.
|
|
267
|
+
*/
|
|
268
|
+
orphaned_source?: string[];
|
|
269
|
+
/** Wiki-relative path of the page this was merged into (dedup heal). */
|
|
270
|
+
merged_into?: string;
|
|
271
|
+
/** Unresolved factual contradictions detected by the health system. */
|
|
272
|
+
contradictions?: string[];
|
|
273
|
+
/** ISO-8601 timestamp of last compilation by the ingestion pipeline. */
|
|
274
|
+
last_compiled?: string;
|
|
275
|
+
/** Number of raw source files backing this page. */
|
|
276
|
+
source_count?: number;
|
|
277
|
+
/** ISO-8601 timestamp of last source confirmation (re-ingest or corroboration). */
|
|
278
|
+
last_confirmed?: string;
|
|
279
|
+
/** Wiki-relative path of a page that supersedes this one, or null. */
|
|
280
|
+
superseded_by?: string | null;
|
|
281
|
+
/** ISO-8601 timestamp when this candidate page was rejected. */
|
|
282
|
+
rejected_at?: string;
|
|
283
|
+
/** Reason provided when rejecting a candidate page. */
|
|
284
|
+
rejection_note?: string;
|
|
285
|
+
/** Broad knowledge domain (e.g. "ops", "security", "architecture"). */
|
|
286
|
+
domain?: string;
|
|
287
|
+
/** Project or organizational context scope (e.g. project name, "general"). */
|
|
288
|
+
scope?: string;
|
|
289
|
+
/** Keywords and phrases for search findability, including synonyms. */
|
|
290
|
+
key_terms?: string[];
|
|
291
|
+
/** Wiki-relative path of the consolidated page this was merged into. */
|
|
292
|
+
consolidated_into?: string;
|
|
293
|
+
}
|
|
294
|
+
/** A parsed wiki page. */
|
|
295
|
+
interface WikiPage {
|
|
296
|
+
path: string;
|
|
297
|
+
frontmatter: WikiFrontmatter;
|
|
298
|
+
body: string;
|
|
299
|
+
raw: string;
|
|
300
|
+
}
|
|
301
|
+
/** A batch of files to be ingested together. */
|
|
302
|
+
interface IngestionBatch {
|
|
303
|
+
id: string;
|
|
304
|
+
created_at: string;
|
|
305
|
+
files: string[];
|
|
306
|
+
reason: "initial" | "new" | "update";
|
|
307
|
+
}
|
|
308
|
+
/** Provenance record schema. */
|
|
309
|
+
interface ProvenanceRecord {
|
|
310
|
+
id: string;
|
|
311
|
+
seq: number;
|
|
312
|
+
timestamp: string;
|
|
313
|
+
type: OperationType;
|
|
314
|
+
source_files: string[];
|
|
315
|
+
source_hashes: string[];
|
|
316
|
+
prompt_hash: string;
|
|
317
|
+
model_id: ModelId;
|
|
318
|
+
response_hash: string;
|
|
319
|
+
wiki_files_written: string[];
|
|
320
|
+
wiki_file_hashes_after: Record<string, string>;
|
|
321
|
+
merkle_root?: string;
|
|
322
|
+
/**
|
|
323
|
+
* Review item 43 (X2-C): tenant_id is the *security* boundary (per-
|
|
324
|
+
* tenant Fly Machine + Fly secret), while wiki_id is the data boundary.
|
|
325
|
+
* Adding tenant_id to the canonical payload makes cross-tenant chain
|
|
326
|
+
* confusion detectable: if a record from tenant A ever lands in tenant
|
|
327
|
+
* B's chain file, the canonical id diverges from what tenant B would
|
|
328
|
+
* compute. Optional for backwards compat — daemons outside hosted
|
|
329
|
+
* mode have no tenant_id.
|
|
330
|
+
*/
|
|
331
|
+
tenant_id?: string;
|
|
332
|
+
previous_id: string | null;
|
|
333
|
+
previous_chain_hash: string;
|
|
334
|
+
chain_hash: string;
|
|
335
|
+
/**
|
|
336
|
+
* Review item 42: HMAC signature over `id || chain_hash`. From G5
|
|
337
|
+
* closure (Pass 018, v0.8.2) this is signed with the active workspace
|
|
338
|
+
* DEK from the KeyStore. Older chains (pre-G5 or G5-scaffolding only)
|
|
339
|
+
* fall back to the 4-tier resolution at construction (`hmacKey` opts >
|
|
340
|
+
* `WOTW_PROVENANCE_HMAC_KEY` env > derived from tenant_id > undefined).
|
|
341
|
+
* Detects forge / delete attacks even when an attacker can read the
|
|
342
|
+
* chain file — they cannot mint records the verifier would accept.
|
|
343
|
+
*
|
|
344
|
+
* NOT in the canonical payload (canonical-payload-exclusion pattern).
|
|
345
|
+
*/
|
|
346
|
+
hmac?: string;
|
|
347
|
+
/**
|
|
348
|
+
* G5 closure (Pass 018, v0.8.2): identifier of the workspace DEK that
|
|
349
|
+
* signed this record's `hmac`. Used at verify time to look up the right
|
|
350
|
+
* DEK across rotations — records signed under a previous DEK still
|
|
351
|
+
* verify after rotation because their key_id resolves to the
|
|
352
|
+
* `rotating` or `archived` row in workspace_keys.
|
|
353
|
+
*
|
|
354
|
+
* Absent on records produced before v0.8.2 (those used the
|
|
355
|
+
* single-key 4-tier resolution and the verifier falls back to it
|
|
356
|
+
* when key_id is missing).
|
|
357
|
+
*
|
|
358
|
+
* NOT in the canonical payload — canonical-payload-exclusion pattern,
|
|
359
|
+
* same as `fact_hashes_*` and `hmac` itself. Old daemons compute
|
|
360
|
+
* identical id / chain_hash on records carrying this field.
|
|
361
|
+
*/
|
|
362
|
+
key_id?: string;
|
|
363
|
+
/**
|
|
364
|
+
* Pass B (fact extraction): list of fact_hash strings added by this
|
|
365
|
+
* operation. Present on `type: "fact_extracted"` records and (when
|
|
366
|
+
* relevant) on `type: "ingest"` / `type: "heal"` records whose downstream
|
|
367
|
+
* extraction wrote new facts. Optional + backwards-compatible — old
|
|
368
|
+
* daemons reading a chain that contains this field ignore it during
|
|
369
|
+
* verification because the canonical-payload computation reads only the
|
|
370
|
+
* fields it knows about.
|
|
371
|
+
*/
|
|
372
|
+
fact_hashes_added?: string[];
|
|
373
|
+
/** Pass B: list of fact_hash strings superseded by this operation. */
|
|
374
|
+
fact_hashes_superseded?: string[];
|
|
375
|
+
metadata?: Record<string, string | number | boolean>;
|
|
376
|
+
}
|
|
377
|
+
/** A cost log entry persisted one-per-line. */
|
|
378
|
+
interface CostLogEntry {
|
|
379
|
+
timestamp: string;
|
|
380
|
+
operation: OperationType;
|
|
381
|
+
model_id: ModelId;
|
|
382
|
+
cost_usd: number;
|
|
383
|
+
input_tokens?: number;
|
|
384
|
+
output_tokens?: number;
|
|
385
|
+
batch_id?: string;
|
|
386
|
+
}
|
|
387
|
+
/** Daemon status payload. */
|
|
388
|
+
interface DaemonStatus {
|
|
389
|
+
running: boolean;
|
|
390
|
+
pid: number | null;
|
|
391
|
+
started_at: string | null;
|
|
392
|
+
uptime_seconds: number | null;
|
|
393
|
+
config_path: string | null;
|
|
394
|
+
wiki_root: string;
|
|
395
|
+
server: {
|
|
396
|
+
host: string;
|
|
397
|
+
port: number;
|
|
398
|
+
reachable: boolean;
|
|
399
|
+
};
|
|
400
|
+
stats: {
|
|
401
|
+
wiki_pages: number;
|
|
402
|
+
provenance_records: number;
|
|
403
|
+
pending_batches: number;
|
|
404
|
+
cost_today_usd: number;
|
|
405
|
+
};
|
|
406
|
+
}
|
|
407
|
+
/** An event emitted by the daemon. */
|
|
408
|
+
type DaemonEvent = {
|
|
409
|
+
type: "FileDetected";
|
|
410
|
+
path: string;
|
|
411
|
+
timestamp: string;
|
|
412
|
+
} | {
|
|
413
|
+
type: "BatchQueued";
|
|
414
|
+
batch_id: string;
|
|
415
|
+
file_count: number;
|
|
416
|
+
timestamp: string;
|
|
417
|
+
} | {
|
|
418
|
+
type: "IngestionStarted";
|
|
419
|
+
batch_id: string;
|
|
420
|
+
timestamp: string;
|
|
421
|
+
} | {
|
|
422
|
+
type: "IngestionComplete";
|
|
423
|
+
batch_id: string;
|
|
424
|
+
wiki_files_written: string[];
|
|
425
|
+
cost_usd: number;
|
|
426
|
+
timestamp: string;
|
|
427
|
+
} | {
|
|
428
|
+
type: "QueryReceived";
|
|
429
|
+
query: string;
|
|
430
|
+
client: string;
|
|
431
|
+
timestamp: string;
|
|
432
|
+
} | {
|
|
433
|
+
type: "QueryAnswered";
|
|
434
|
+
query: string;
|
|
435
|
+
sources_used: number;
|
|
436
|
+
cost_usd: number;
|
|
437
|
+
timestamp: string;
|
|
438
|
+
} | {
|
|
439
|
+
type: "CompoundFiled";
|
|
440
|
+
page: string;
|
|
441
|
+
sources: string[];
|
|
442
|
+
timestamp: string;
|
|
443
|
+
} | {
|
|
444
|
+
type: "Error";
|
|
445
|
+
phase: string;
|
|
446
|
+
error: string;
|
|
447
|
+
timestamp: string;
|
|
448
|
+
};
|
|
449
|
+
|
|
450
|
+
/**
|
|
451
|
+
* Default configuration applied when no file is found (or when fields are missing).
|
|
452
|
+
*/
|
|
453
|
+
declare function defaultConfig(): WotwConfig;
|
|
454
|
+
/** Result of loading config: resolved value and origin path (if any). */
|
|
455
|
+
interface LoadConfigResult {
|
|
456
|
+
config: WotwConfig;
|
|
457
|
+
path: string | null;
|
|
458
|
+
}
|
|
459
|
+
/**
|
|
460
|
+
* Load configuration from cosmiconfig's discovery of `wotw.config.*`, `.wotwrc`, or
|
|
461
|
+
* a `wotw` key in package.json. If no file is found, the default config is returned.
|
|
462
|
+
*
|
|
463
|
+
* Resolution order (highest to lowest priority):
|
|
464
|
+
* 1. Environment variables (see {@link applyEnvOverrides})
|
|
465
|
+
* 2. User config file
|
|
466
|
+
* 3. Defaults
|
|
467
|
+
*
|
|
468
|
+
* In hosted mode (`WOTW_HOSTED=true` or `hosted.enabled` in the file), the
|
|
469
|
+
* resulting config is additionally checked by {@link validateHostedConfig}
|
|
470
|
+
* which throws when `tenant_id` is missing, malformed, or `wiki_root` is
|
|
471
|
+
* unset.
|
|
472
|
+
*
|
|
473
|
+
* @param searchFrom optional directory to search from (defaults to process.cwd())
|
|
474
|
+
*/
|
|
475
|
+
declare function loadConfig(searchFrom?: string): Promise<LoadConfigResult>;
|
|
476
|
+
/**
|
|
477
|
+
* Deep-merge user config on top of defaults. Unknown keys in user config are dropped
|
|
478
|
+
* to prevent typos from leaking into runtime behavior.
|
|
479
|
+
*/
|
|
480
|
+
declare function mergeConfig(base: WotwConfig, override: Partial<WotwConfig>): WotwConfig;
|
|
481
|
+
/**
|
|
482
|
+
* Expand all path-like fields in a config using {@link resolvePath}.
|
|
483
|
+
* Returns a new config; the input is not mutated.
|
|
484
|
+
*/
|
|
485
|
+
declare function resolveConfigPaths(config: WotwConfig, baseDir?: string): WotwConfig;
|
|
486
|
+
|
|
487
|
+
/** Summary of a successful resolution. */
|
|
488
|
+
interface ResolvedExecutionMode {
|
|
489
|
+
/** Concrete runtime to use. */
|
|
490
|
+
mode: RuntimeMode;
|
|
491
|
+
/** Configured mode that produced this result. */
|
|
492
|
+
configuredMode: ExecutionMode;
|
|
493
|
+
/** Absolute path to the `claude` binary, if CLI mode was resolved. */
|
|
494
|
+
cliPath: string | null;
|
|
495
|
+
/** Name of the env var that supplied the API key, if API mode was resolved. */
|
|
496
|
+
apiKeyEnv: string | null;
|
|
497
|
+
/** Model identifier that will be used (cli_model in CLI mode, router in API mode). */
|
|
498
|
+
effectiveModelHint: string;
|
|
499
|
+
/** One-line human-readable summary, suitable for logging or CLI output. */
|
|
500
|
+
description: string;
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
/** A daemon subsystem that can start and stop cleanly. */
|
|
504
|
+
interface DaemonSubsystem {
|
|
505
|
+
name: string;
|
|
506
|
+
start(): Promise<void>;
|
|
507
|
+
stop(): Promise<void>;
|
|
508
|
+
}
|
|
509
|
+
interface DaemonOptions {
|
|
510
|
+
configPath: string | null;
|
|
511
|
+
workingDir: string;
|
|
512
|
+
}
|
|
513
|
+
/**
|
|
514
|
+
* The Daemon class holds runtime state and coordinates subsystem lifecycle.
|
|
515
|
+
*/
|
|
516
|
+
declare class Daemon {
|
|
517
|
+
private readonly subsystems;
|
|
518
|
+
private shuttingDown;
|
|
519
|
+
private readonly opts;
|
|
520
|
+
private config;
|
|
521
|
+
private executionMode;
|
|
522
|
+
private releaseLock;
|
|
523
|
+
constructor(opts: DaemonOptions);
|
|
524
|
+
/** Resolve config and return it. */
|
|
525
|
+
init(): Promise<WotwConfig>;
|
|
526
|
+
/** Return the resolved execution mode, or null if init() hasn't run yet. */
|
|
527
|
+
getExecutionMode(): ResolvedExecutionMode | null;
|
|
528
|
+
/** Attach a subsystem for start/stop management. */
|
|
529
|
+
attachSubsystem(sub: DaemonSubsystem): void;
|
|
530
|
+
/**
|
|
531
|
+
* Main run loop. Acquires the start lock, writes the PID file, starts all
|
|
532
|
+
* subsystems, installs signal handlers, and blocks until shutdown.
|
|
533
|
+
*/
|
|
534
|
+
run(): Promise<void>;
|
|
535
|
+
/** Install SIGTERM / SIGINT handlers for graceful shutdown. */
|
|
536
|
+
private installSignalHandlers;
|
|
537
|
+
/** Stop all subsystems, release the lock, remove the PID file, and exit. */
|
|
538
|
+
shutdown(exitCode: number): Promise<void>;
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
/** Special value used as `previous_chain_hash` for the first record in a chain. */
|
|
542
|
+
declare const GENESIS_HASH: string;
|
|
543
|
+
/**
|
|
544
|
+
* Produce a canonical JSON string for any JSON-serializable input.
|
|
545
|
+
* Keys in every object are sorted lexicographically, recursively.
|
|
546
|
+
*/
|
|
547
|
+
declare function canonicalJson(value: unknown): string;
|
|
548
|
+
/** SHA-256 of a string or buffer, returned as hex. */
|
|
549
|
+
declare function sha256Hex(input: string | Buffer): string;
|
|
550
|
+
/**
|
|
551
|
+
* Alias for {@link sha256Hex}. Kept for ergonomic call sites
|
|
552
|
+
* (`sha256(contents)` reads more naturally than `sha256Hex(contents)` in
|
|
553
|
+
* non-provenance code) and for the stable public API in `src/index.ts`.
|
|
554
|
+
*/
|
|
555
|
+
declare const sha256: typeof sha256Hex;
|
|
556
|
+
/** SHA-256 of the canonical JSON form of a value. */
|
|
557
|
+
declare function sha256Canonical(value: unknown): string;
|
|
558
|
+
/** Alias for {@link sha256Canonical} — hashes the canonical JSON of a value. */
|
|
559
|
+
declare const sha256Json: typeof sha256Canonical;
|
|
560
|
+
/** Alias for {@link canonicalJson} — kept for the stable public API. */
|
|
561
|
+
declare const stableStringify: typeof canonicalJson;
|
|
562
|
+
/**
|
|
563
|
+
* Synchronous SHA-256 of a file on disk. Reads the whole file into memory,
|
|
564
|
+
* which is fine for wiki pages. Throws if the file does not exist — callers
|
|
565
|
+
* that need ENOENT tolerance should use the async {@link sha256File} which
|
|
566
|
+
* returns null. Used by the watcher's in-memory event classifier where
|
|
567
|
+
* synchronous semantics simplify the seed flow.
|
|
568
|
+
*/
|
|
569
|
+
declare function sha256FileSync(filePath: string): string;
|
|
570
|
+
/**
|
|
571
|
+
* SHA-256 of a file on disk. Returns null if the file does not exist.
|
|
572
|
+
* Reads the whole file into memory — fine for wiki pages which are small.
|
|
573
|
+
*/
|
|
574
|
+
declare function sha256File(path: string): Promise<string | null>;
|
|
575
|
+
/**
|
|
576
|
+
* Hash many files in parallel. Missing files map to null entries.
|
|
577
|
+
* Used when recording the post-ingestion state of the wiki.
|
|
578
|
+
*/
|
|
579
|
+
declare function sha256Files(paths: string[]): Promise<Record<string, string>>;
|
|
580
|
+
|
|
581
|
+
/**
|
|
582
|
+
* Content sanitization: strip credentials and PII patterns from text before LLM ingestion.
|
|
583
|
+
*
|
|
584
|
+
* This is a best-effort redaction layer. Users can extend the patterns list via
|
|
585
|
+
* configuration. The goal is to keep secrets out of logs, prompts, and wiki pages.
|
|
586
|
+
*/
|
|
587
|
+
interface RedactionRule {
|
|
588
|
+
name: string;
|
|
589
|
+
pattern: RegExp;
|
|
590
|
+
replacement: string;
|
|
591
|
+
}
|
|
592
|
+
/**
|
|
593
|
+
* Default redaction rules. Ordered by likely-to-match first for efficiency.
|
|
594
|
+
*/
|
|
595
|
+
declare const DEFAULT_REDACTIONS: readonly RedactionRule[];
|
|
596
|
+
/**
|
|
597
|
+
* Redact sensitive content from a text blob using the supplied rules (or defaults).
|
|
598
|
+
*/
|
|
599
|
+
declare function sanitize(input: string, rules?: readonly RedactionRule[]): string;
|
|
600
|
+
/**
|
|
601
|
+
* Redact and return the list of rule names that triggered.
|
|
602
|
+
*/
|
|
603
|
+
declare function sanitizeWithReport(input: string, rules?: readonly RedactionRule[]): {
|
|
604
|
+
output: string;
|
|
605
|
+
triggered: string[];
|
|
606
|
+
};
|
|
607
|
+
|
|
608
|
+
type LogLevel = "trace" | "debug" | "info" | "warn" | "error" | "fatal";
|
|
609
|
+
declare function initLogger(level?: LogLevel, logFile?: string): Logger;
|
|
610
|
+
/**
|
|
611
|
+
* Return the root logger, initializing with defaults if none exists.
|
|
612
|
+
* When defaultContext has been set (hosted mode), every child logger
|
|
613
|
+
* automatically includes those fields.
|
|
614
|
+
*/
|
|
615
|
+
declare function getLogger(module?: string, extra?: Record<string, unknown>): Logger;
|
|
616
|
+
|
|
617
|
+
export { type ConfidenceLevel, type CostLogEntry, DEFAULT_REDACTIONS, Daemon, type DaemonEvent, type DaemonOptions, type DaemonStatus, type DaemonSubsystem, GENESIS_HASH, type IngestionBatch, type ModelId, type OperationType, type ProvenanceRecord, type WikiCategory, type WikiFrontmatter, type WikiPage, type WotwConfig, canonicalJson, defaultConfig, getLogger, initLogger, loadConfig, mergeConfig, resolveConfigPaths, sanitize, sanitizeWithReport, sha256, sha256Canonical, sha256File, sha256FileSync, sha256Files, sha256Hex, sha256Json, stableStringify };
|