@hasna/knowledge 0.2.27 → 0.2.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +41 -0
  2. package/bin/open-knowledge-mcp.js +15 -7
  3. package/bin/open-knowledge.js +17 -17
  4. package/dist/agent.d.ts +35 -0
  5. package/dist/artifact-store.d.ts +63 -0
  6. package/dist/auth.d.ts +35 -0
  7. package/dist/embeddings.d.ts +77 -0
  8. package/dist/index.d.ts +20 -0
  9. package/dist/index.js +5709 -0
  10. package/dist/knowledge-db.d.ts +27 -0
  11. package/dist/manifest-ingest.d.ts +35 -0
  12. package/dist/outbox-consume.d.ts +25 -0
  13. package/dist/provenance.d.ts +50 -0
  14. package/dist/providers.d.ts +89 -0
  15. package/dist/reindex.d.ts +37 -0
  16. package/dist/remote-client.d.ts +108 -0
  17. package/dist/retrieval.d.ts +71 -0
  18. package/dist/safety.d.ts +70 -0
  19. package/dist/sdk.d.ts +72 -0
  20. package/dist/search.d.ts +65 -0
  21. package/dist/service.d.ts +117 -0
  22. package/dist/source-ingest.d.ts +18 -0
  23. package/dist/source-ref.d.ts +30 -0
  24. package/dist/source-resolver.d.ts +92 -0
  25. package/dist/storage-contract.d.ts +106 -0
  26. package/dist/web-search.d.ts +40 -0
  27. package/dist/wiki-compiler.d.ts +67 -0
  28. package/dist/wiki-layout.d.ts +23 -0
  29. package/dist/workspace.d.ts +111 -0
  30. package/package.json +15 -7
  31. package/src/agent.ts +0 -367
  32. package/src/artifact-store.ts +0 -184
  33. package/src/auth.ts +0 -123
  34. package/src/cli.ts +0 -1184
  35. package/src/embeddings.ts +0 -516
  36. package/src/knowledge-db.ts +0 -354
  37. package/src/manifest-ingest.ts +0 -515
  38. package/src/mcp-http.js +0 -110
  39. package/src/mcp.js +0 -1503
  40. package/src/outbox-consume.ts +0 -463
  41. package/src/provenance.ts +0 -93
  42. package/src/providers.ts +0 -308
  43. package/src/reindex.ts +0 -260
  44. package/src/remote-client.ts +0 -268
  45. package/src/retrieval.ts +0 -326
  46. package/src/safety.ts +0 -265
  47. package/src/schema.js +0 -25
  48. package/src/search.ts +0 -510
  49. package/src/service.ts +0 -443
  50. package/src/source-ingest.ts +0 -268
  51. package/src/source-ref.ts +0 -104
  52. package/src/source-resolver.ts +0 -436
  53. package/src/storage-contract.ts +0 -346
  54. package/src/store.ts +0 -113
  55. package/src/web-search.ts +0 -330
  56. package/src/wiki-compiler.ts +0 -711
  57. package/src/wiki-layout.ts +0 -251
  58. package/src/workspace.ts +0 -251
@@ -1,711 +0,0 @@
1
- import { createHash, randomUUID } from 'node:crypto';
2
- import type { Database } from 'bun:sqlite';
3
- import type { ArtifactStore, ArtifactWrite } from './artifact-store';
4
- import { hashArtifactBody, recordStorageObjects, type GeneratedStorageObject } from './storage-contract';
5
- import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
6
- import { generatedArtifactProvenance } from './provenance';
7
- import type { KnowledgeContextPack } from './retrieval';
8
-
9
- export interface WikiCompileOptions {
10
- dbPath: string;
11
- store: ArtifactStore;
12
- title?: string;
13
- query?: string;
14
- sourceRefs?: string[];
15
- limit?: number;
16
- now?: Date;
17
- }
18
-
19
- export interface WikiCompileResult {
20
- page_id: string;
21
- path: string;
22
- artifact_uri: string;
23
- content_hash: string;
24
- chunks_seen: number;
25
- citations_written: number;
26
- concept_page_id: string | null;
27
- indexes_updated: number;
28
- log_key: string;
29
- warnings: string[];
30
- }
31
-
32
- export interface WikiAnswerFileOptions {
33
- dbPath: string;
34
- store: ArtifactStore;
35
- prompt: string;
36
- answer: string;
37
- context: KnowledgeContextPack;
38
- approveWrite?: boolean;
39
- now?: Date;
40
- }
41
-
42
- export interface WikiAnswerFileResult {
43
- approved: boolean;
44
- durable_writes_performed: boolean;
45
- page_id: string | null;
46
- path: string | null;
47
- artifact_uri: string | null;
48
- citations_written: number;
49
- log_key: string | null;
50
- message: string;
51
- }
52
-
53
- export interface WikiLintIssue {
54
- type:
55
- | 'missing_citation'
56
- | 'stale_citation'
57
- | 'duplicate_page'
58
- | 'orphan_page'
59
- | 'unresolved_source_ref'
60
- | 'contradiction_marker'
61
- | 'new_article_candidate';
62
- severity: 'info' | 'warn' | 'error';
63
- page_id?: string;
64
- path?: string;
65
- source_uri?: string;
66
- chunk_id?: string;
67
- message: string;
68
- }
69
-
70
- export interface WikiLintResult {
71
- ok: boolean;
72
- issue_count: number;
73
- issues: WikiLintIssue[];
74
- counts: {
75
- active_pages: number;
76
- citations: number;
77
- backlinks: number;
78
- new_article_candidates: number;
79
- };
80
- }
81
-
82
- interface SourceChunkRow {
83
- chunk_id: string;
84
- text: string;
85
- start_offset: number | null;
86
- end_offset: number | null;
87
- metadata_json: string;
88
- source_revision_id: string | null;
89
- revision: string | null;
90
- hash: string | null;
91
- source_uri: string | null;
92
- source_title: string | null;
93
- }
94
-
95
- interface CitationInput {
96
- chunk_id: string | null;
97
- source_uri: string;
98
- quote: string | null;
99
- start_offset: number | null;
100
- end_offset: number | null;
101
- metadata: Record<string, unknown>;
102
- }
103
-
104
- function stableId(prefix: string, value: string): string {
105
- return `${prefix}_${createHash('sha256').update(value).digest('hex').slice(0, 20)}`;
106
- }
107
-
108
- function slugify(value: string): string {
109
- const slug = value
110
- .normalize('NFKC')
111
- .toLowerCase()
112
- .replace(/[^a-z0-9]+/g, '-')
113
- .replace(/^-+|-+$/g, '')
114
- .slice(0, 80);
115
- return slug || 'knowledge-page';
116
- }
117
-
118
- function todayParts(now: Date): { year: string; month: string; day: string } {
119
- return {
120
- year: String(now.getUTCFullYear()),
121
- month: String(now.getUTCMonth() + 1).padStart(2, '0'),
122
- day: String(now.getUTCDate()).padStart(2, '0'),
123
- };
124
- }
125
-
126
- function estimateTokenCount(text: string): number {
127
- const words = text.trim().split(/\s+/).filter(Boolean).length;
128
- return Math.max(1, Math.ceil(words * 1.25));
129
- }
130
-
131
- function parseJsonObject(value: string | null | undefined): Record<string, unknown> {
132
- if (!value) return {};
133
- try {
134
- const parsed = JSON.parse(value);
135
- return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? parsed as Record<string, unknown> : {};
136
- } catch {
137
- return {};
138
- }
139
- }
140
-
141
- function queryTerms(query: string | undefined): string[] {
142
- return Array.from(new Set((query ?? '').toLowerCase().match(/[\p{L}\p{N}_]+/gu) ?? [])).slice(0, 12);
143
- }
144
-
145
- function escapeLike(value: string): string {
146
- return value.replace(/[\\%_]/g, (char) => `\\${char}`);
147
- }
148
-
149
- function selectSourceChunks(db: Database, options: WikiCompileOptions): SourceChunkRow[] {
150
- const limit = Math.max(1, Math.min(options.limit ?? 10, 50));
151
- const sourceRefs = options.sourceRefs ?? [];
152
- const terms = queryTerms(options.query);
153
- const where: string[] = ["c.kind = 'source'"];
154
- const params: (string | number)[] = [];
155
-
156
- if (sourceRefs.length > 0) {
157
- where.push(`(${sourceRefs.map(() => '(s.uri = ? OR c.metadata_json LIKE ?)').join(' OR ')})`);
158
- for (const ref of sourceRefs) {
159
- params.push(ref, `%${escapeLike(ref)}%`);
160
- }
161
- }
162
-
163
- if (terms.length > 0) {
164
- where.push(`(${terms.map(() => "lower(c.text) LIKE ? ESCAPE '\\'").join(' OR ')})`);
165
- for (const term of terms) params.push(`%${escapeLike(term)}%`);
166
- }
167
-
168
- params.push(limit);
169
- return db.query<SourceChunkRow, (string | number)[]>(
170
- `SELECT
171
- c.id AS chunk_id,
172
- c.text,
173
- c.start_offset,
174
- c.end_offset,
175
- c.metadata_json,
176
- c.source_revision_id,
177
- sr.revision,
178
- sr.hash,
179
- s.uri AS source_uri,
180
- s.title AS source_title
181
- FROM chunks c
182
- JOIN source_revisions sr ON sr.id = c.source_revision_id
183
- JOIN sources s ON s.id = sr.source_id
184
- WHERE ${where.join(' AND ')}
185
- ORDER BY c.created_at ASC, c.ordinal ASC
186
- LIMIT ?`,
187
- ).all(...params);
188
- }
189
-
190
- function excerpt(text: string, max = 420): string {
191
- const normalized = text.replace(/\s+/g, ' ').trim();
192
- return normalized.length <= max ? normalized : `${normalized.slice(0, max - 1).trim()}...`;
193
- }
194
-
195
- function titleFor(options: WikiCompileOptions, rows: SourceChunkRow[]): string {
196
- if (options.title?.trim()) return options.title.trim();
197
- if (options.query?.trim()) return options.query.trim();
198
- return rows[0]?.source_title ?? 'Compiled Knowledge';
199
- }
200
-
201
- function compileBody(title: string, rows: SourceChunkRow[], now: string): string {
202
- const sourceLines = rows.map((row, index) => {
203
- const label = `S${index + 1}`;
204
- return `- [${label}] ${row.source_title ?? row.source_uri ?? 'Source'} (${row.source_uri ?? 'unknown'}, revision ${row.revision ?? 'unknown'}, hash ${row.hash ?? 'unknown'})`;
205
- });
206
- const noteLines = rows.map((row, index) => {
207
- const label = `S${index + 1}`;
208
- return [
209
- `## ${row.source_title ?? `Source ${index + 1}`}`,
210
- '',
211
- excerpt(row.text),
212
- '',
213
- `Citation: [${label}]`,
214
- ].join('\n');
215
- });
216
- return [
217
- `# ${title}`,
218
- '',
219
- `Generated at: ${now}`,
220
- '',
221
- '## Sources',
222
- '',
223
- ...sourceLines,
224
- '',
225
- ...noteLines,
226
- '',
227
- ].join('\n');
228
- }
229
-
230
- async function writeArtifact(store: ArtifactStore, entry: ArtifactWrite): Promise<GeneratedStorageObject> {
231
- const written = await store.put(entry);
232
- return {
233
- key: written.key,
234
- uri: written.uri,
235
- kind: entry.key.startsWith('logs/') ? 'log' : 'wiki_page',
236
- content_type: entry.content_type,
237
- ...hashArtifactBody(entry.body),
238
- metadata: {
239
- ...(entry.metadata ?? {}),
240
- },
241
- };
242
- }
243
-
244
- async function appendLog(store: ArtifactStore, event: Record<string, unknown>, now: Date): Promise<GeneratedStorageObject> {
245
- const { year, month, day } = todayParts(now);
246
- const key = `logs/${year}/${month}/${day}.jsonl`;
247
- let existing = '';
248
- try {
249
- existing = await store.getText(key);
250
- } catch {
251
- existing = '';
252
- }
253
- return writeArtifact(store, {
254
- key,
255
- body: `${existing}${JSON.stringify(event)}\n`,
256
- content_type: 'application/x-ndjson',
257
- });
258
- }
259
-
260
- function upsertWikiPage(db: Database, input: {
261
- pageId: string;
262
- path: string;
263
- title: string;
264
- artifactUri: string;
265
- contentHash: string;
266
- body: string;
267
- provenance: unknown;
268
- now: string;
269
- }): void {
270
- db.run(
271
- `INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
272
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
273
- ON CONFLICT(path) DO UPDATE SET
274
- title = excluded.title,
275
- artifact_uri = excluded.artifact_uri,
276
- content_hash = excluded.content_hash,
277
- status = excluded.status,
278
- metadata_json = excluded.metadata_json,
279
- updated_at = excluded.updated_at`,
280
- [
281
- input.pageId,
282
- input.path,
283
- input.title,
284
- input.artifactUri,
285
- input.contentHash,
286
- 'active',
287
- JSON.stringify({
288
- artifact_key: input.path,
289
- provenance: input.provenance,
290
- }),
291
- input.now,
292
- input.now,
293
- ],
294
- );
295
-
296
- const existing = db.query<{ id: string }, [string]>('SELECT id FROM chunks WHERE wiki_page_id = ?').all(input.pageId);
297
- for (const row of existing) db.run('DELETE FROM chunks_fts WHERE chunk_id = ?', [row.id]);
298
- db.run('DELETE FROM chunks WHERE wiki_page_id = ?', [input.pageId]);
299
-
300
- const chunkId = stableId('chk', `${input.pageId}\u0000${input.contentHash}`);
301
- db.run(
302
- `INSERT INTO chunks (id, wiki_page_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
303
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
304
- [
305
- chunkId,
306
- input.pageId,
307
- 'wiki',
308
- 0,
309
- input.body,
310
- estimateTokenCount(input.body),
311
- 0,
312
- input.body.length,
313
- JSON.stringify({
314
- artifact_key: input.path,
315
- artifact_uri: input.artifactUri,
316
- content_hash: input.contentHash,
317
- provenance: input.provenance,
318
- }),
319
- input.now,
320
- ],
321
- );
322
- db.run('INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)', [
323
- chunkId,
324
- input.body,
325
- input.title,
326
- input.artifactUri,
327
- ]);
328
- }
329
-
330
- function replacePageCitations(db: Database, pageId: string, citations: CitationInput[], now: string): number {
331
- db.run('DELETE FROM citations WHERE wiki_page_id = ?', [pageId]);
332
- for (const citation of citations) {
333
- db.run(
334
- `INSERT INTO citations (id, wiki_page_id, chunk_id, source_uri, quote, start_offset, end_offset, metadata_json, created_at)
335
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
336
- [
337
- stableId('cit', `${pageId}\u0000${citation.source_uri}\u0000${citation.chunk_id ?? randomUUID()}`),
338
- pageId,
339
- citation.chunk_id,
340
- citation.source_uri,
341
- citation.quote,
342
- citation.start_offset,
343
- citation.end_offset,
344
- JSON.stringify(citation.metadata),
345
- now,
346
- ],
347
- );
348
- }
349
- return citations.length;
350
- }
351
-
352
- function upsertIndex(db: Database, input: { title: string; path: string; artifactUri: string; contentHash: string; now: string }): number {
353
- db.run(
354
- `INSERT INTO knowledge_indexes (id, kind, name, artifact_uri, shard_key, metadata_json, created_at, updated_at)
355
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
356
- ON CONFLICT(kind, name, shard_key) DO UPDATE SET
357
- artifact_uri = excluded.artifact_uri,
358
- metadata_json = excluded.metadata_json,
359
- updated_at = excluded.updated_at`,
360
- [
361
- stableId('idx', `wiki-topic\u0000${input.path}`),
362
- 'wiki_topic',
363
- input.title,
364
- input.artifactUri,
365
- input.path,
366
- JSON.stringify({
367
- artifact_key: input.path,
368
- content_hash: input.contentHash,
369
- }),
370
- input.now,
371
- input.now,
372
- ],
373
- );
374
- return 1;
375
- }
376
-
377
- function firstConcept(title: string): string {
378
- return title.toLowerCase().match(/[a-z0-9][a-z0-9-]{2,}/)?.[0] ?? 'knowledge';
379
- }
380
-
381
- export async function compileWikiPage(options: WikiCompileOptions): Promise<WikiCompileResult> {
382
- const nowDate = options.now ?? new Date();
383
- const now = nowDate.toISOString();
384
- migrateKnowledgeDb(options.dbPath);
385
- const readDb = openKnowledgeDb(options.dbPath);
386
- let rows: SourceChunkRow[];
387
- try {
388
- rows = selectSourceChunks(readDb, options);
389
- } finally {
390
- readDb.close();
391
- }
392
- if (rows.length === 0) throw new Error('No source chunks matched wiki compile input.');
393
-
394
- const title = titleFor(options, rows);
395
- const slug = slugify(title);
396
- const path = `wiki/generated/${slug}.md`;
397
- const body = compileBody(title, rows, now);
398
- const sourceRefs = rows.map((row) => {
399
- const metadata = parseJsonObject(row.metadata_json);
400
- return typeof metadata.source_ref === 'string' ? metadata.source_ref : row.source_uri;
401
- }).filter((ref): ref is string => Boolean(ref));
402
- const provenance = generatedArtifactProvenance({
403
- generated_from: 'wiki_compile',
404
- artifact_key: path,
405
- source_refs: sourceRefs,
406
- });
407
- const pageArtifact = await writeArtifact(options.store, {
408
- key: path,
409
- body,
410
- content_type: 'text/markdown',
411
- metadata: { generated_from: 'wiki_compile' },
412
- });
413
- const pageId = stableId('wiki', path);
414
- const citations: CitationInput[] = rows.map((row) => ({
415
- chunk_id: row.chunk_id,
416
- source_uri: row.source_uri ?? 'unknown',
417
- quote: excerpt(row.text, 240),
418
- start_offset: row.start_offset,
419
- end_offset: row.end_offset,
420
- metadata: {
421
- source_revision_id: row.source_revision_id,
422
- revision: row.revision,
423
- hash: row.hash,
424
- source_ref: parseJsonObject(row.metadata_json).source_ref ?? row.source_uri,
425
- },
426
- }));
427
-
428
- const concept = firstConcept(title);
429
- const conceptPath = `wiki/concepts/${slugify(concept)}.md`;
430
- const conceptBody = [`# ${concept}`, '', `Related page: [[${path}]]`, ''].join('\n');
431
- const conceptProvenance = generatedArtifactProvenance({
432
- generated_from: 'wiki_compile_concept',
433
- artifact_key: conceptPath,
434
- source_refs: sourceRefs,
435
- });
436
- const conceptArtifact = await writeArtifact(options.store, {
437
- key: conceptPath,
438
- body: conceptBody,
439
- content_type: 'text/markdown',
440
- metadata: { generated_from: 'wiki_compile_concept' },
441
- });
442
- const conceptPageId = stableId('wiki', conceptPath);
443
-
444
- const log = await appendLog(options.store, {
445
- ts: now,
446
- event: 'wiki_compile_completed',
447
- page_key: path,
448
- source_refs: sourceRefs,
449
- chunks_seen: rows.length,
450
- }, nowDate);
451
-
452
- const db = openKnowledgeDb(options.dbPath);
453
- try {
454
- recordStorageObjects(db, [pageArtifact, conceptArtifact, log], nowDate);
455
- upsertWikiPage(db, {
456
- pageId,
457
- path,
458
- title,
459
- artifactUri: pageArtifact.uri,
460
- contentHash: pageArtifact.hash ?? '',
461
- body,
462
- provenance,
463
- now,
464
- });
465
- upsertWikiPage(db, {
466
- pageId: conceptPageId,
467
- path: conceptPath,
468
- title: concept,
469
- artifactUri: conceptArtifact.uri,
470
- contentHash: conceptArtifact.hash ?? '',
471
- body: conceptBody,
472
- provenance: conceptProvenance,
473
- now,
474
- });
475
- db.run(
476
- `INSERT OR REPLACE INTO wiki_backlinks (from_page_id, to_page_id, label, created_at)
477
- VALUES (?, ?, ?, ?)`,
478
- [pageId, conceptPageId, 'concept', now],
479
- );
480
- const citationsWritten = replacePageCitations(db, pageId, citations, now);
481
- const indexesUpdated = upsertIndex(db, {
482
- title,
483
- path,
484
- artifactUri: pageArtifact.uri,
485
- contentHash: pageArtifact.hash ?? '',
486
- now,
487
- });
488
- return {
489
- page_id: pageId,
490
- path,
491
- artifact_uri: pageArtifact.uri,
492
- content_hash: pageArtifact.hash ?? '',
493
- chunks_seen: rows.length,
494
- citations_written: citationsWritten,
495
- concept_page_id: conceptPageId,
496
- indexes_updated: indexesUpdated,
497
- log_key: log.key,
498
- warnings: [],
499
- };
500
- } finally {
501
- db.close();
502
- }
503
- }
504
-
505
- export async function fileAnswerToWiki(options: WikiAnswerFileOptions): Promise<WikiAnswerFileResult> {
506
- if (!options.approveWrite) {
507
- return {
508
- approved: false,
509
- durable_writes_performed: false,
510
- page_id: null,
511
- path: null,
512
- artifact_uri: null,
513
- citations_written: 0,
514
- log_key: null,
515
- message: 'Dry-run: answer filing requires --approve-write.',
516
- };
517
- }
518
-
519
- const nowDate = options.now ?? new Date();
520
- const now = nowDate.toISOString();
521
- const title = options.prompt.length > 80 ? `${options.prompt.slice(0, 77)}...` : options.prompt;
522
- const slug = slugify(title);
523
- const path = `wiki/answers/${slug}.md`;
524
- const citations = options.context.citations;
525
- const body = [
526
- `# ${title}`,
527
- '',
528
- options.answer,
529
- '',
530
- '## Citations',
531
- '',
532
- ...citations.map((citation, index) => `- [C${index + 1}] ${citation.source_ref ?? citation.source_uri ?? citation.artifact_path ?? citation.artifact_uri ?? 'unknown'} ${citation.hash ? `(hash ${citation.hash})` : ''}`),
533
- '',
534
- ].join('\n');
535
- const sourceRefs = citations.map((citation) => citation.source_ref ?? citation.source_uri).filter((ref): ref is string => Boolean(ref));
536
- const provenance = generatedArtifactProvenance({
537
- generated_from: 'knowledge_answer',
538
- artifact_key: path,
539
- source_refs: sourceRefs,
540
- });
541
- const artifact = await writeArtifact(options.store, {
542
- key: path,
543
- body,
544
- content_type: 'text/markdown',
545
- metadata: { generated_from: 'knowledge_answer' },
546
- });
547
- const log = await appendLog(options.store, {
548
- ts: now,
549
- event: 'wiki_answer_filed',
550
- page_key: path,
551
- prompt: options.prompt,
552
- citations: citations.length,
553
- }, nowDate);
554
- const pageId = stableId('wiki', path);
555
- const db = openKnowledgeDb(options.dbPath);
556
- try {
557
- recordStorageObjects(db, [artifact, log], nowDate);
558
- upsertWikiPage(db, {
559
- pageId,
560
- path,
561
- title,
562
- artifactUri: artifact.uri,
563
- contentHash: artifact.hash ?? '',
564
- body,
565
- provenance,
566
- now,
567
- });
568
- const written = replacePageCitations(db, pageId, citations.map((citation) => ({
569
- chunk_id: citation.chunk_id,
570
- source_uri: citation.source_uri ?? citation.artifact_uri ?? 'unknown',
571
- quote: citation.quote,
572
- start_offset: citation.start_offset,
573
- end_offset: citation.end_offset,
574
- metadata: {
575
- source_ref: citation.source_ref,
576
- artifact_path: citation.artifact_path,
577
- revision: citation.revision,
578
- hash: citation.hash,
579
- },
580
- })), now);
581
- upsertIndex(db, {
582
- title,
583
- path,
584
- artifactUri: artifact.uri,
585
- contentHash: artifact.hash ?? '',
586
- now,
587
- });
588
- return {
589
- approved: true,
590
- durable_writes_performed: true,
591
- page_id: pageId,
592
- path,
593
- artifact_uri: artifact.uri,
594
- citations_written: written,
595
- log_key: log.key,
596
- message: `Filed answer to ${path}`,
597
- };
598
- } finally {
599
- db.close();
600
- }
601
- }
602
-
603
- function addIssue(issues: WikiLintIssue[], issue: WikiLintIssue): void {
604
- issues.push(issue);
605
- }
606
-
607
- export function lintWiki(options: { dbPath: string }): WikiLintResult {
608
- migrateKnowledgeDb(options.dbPath);
609
- const db = openKnowledgeDb(options.dbPath);
610
- const issues: WikiLintIssue[] = [];
611
- try {
612
- const activePages = db.query<{ n: number }, []>("SELECT COUNT(*) AS n FROM wiki_pages WHERE status = 'active'").get()?.n ?? 0;
613
- const citationCount = db.query<{ n: number }, []>('SELECT COUNT(*) AS n FROM citations').get()?.n ?? 0;
614
- const backlinkCount = db.query<{ n: number }, []>('SELECT COUNT(*) AS n FROM wiki_backlinks').get()?.n ?? 0;
615
-
616
- const missingCitations = db.query<{ id: string; path: string }, []>(
617
- `SELECT wp.id, wp.path
618
- FROM wiki_pages wp
619
- LEFT JOIN citations c ON c.wiki_page_id = wp.id
620
- WHERE wp.status = 'active' AND wp.path LIKE 'wiki/generated/%'
621
- GROUP BY wp.id
622
- HAVING COUNT(c.id) = 0`,
623
- ).all();
624
- for (const page of missingCitations) {
625
- addIssue(issues, { type: 'missing_citation', severity: 'error', page_id: page.id, path: page.path, message: 'Generated wiki page has no citations.' });
626
- }
627
-
628
- const stale = db.query<{ page_id: string; path: string; source_uri: string; chunk_id: string | null }, []>(
629
- `SELECT wp.id AS page_id, wp.path, c.source_uri, c.chunk_id
630
- FROM citations c
631
- JOIN wiki_pages wp ON wp.id = c.wiki_page_id
632
- LEFT JOIN chunks ch ON ch.id = c.chunk_id
633
- WHERE ch.metadata_json LIKE '%"stale":true%' OR ch.metadata_json LIKE '%"status":"stale"%' OR ch.metadata_json LIKE '%"status":"deleted"%'`,
634
- ).all();
635
- for (const row of stale) {
636
- addIssue(issues, { type: 'stale_citation', severity: 'warn', page_id: row.page_id, path: row.path, source_uri: row.source_uri, chunk_id: row.chunk_id ?? undefined, message: 'Page cites a stale or deleted source chunk.' });
637
- }
638
-
639
- const duplicates = db.query<{ title: string; n: number }, []>(
640
- `SELECT lower(title) AS title, COUNT(*) AS n
641
- FROM wiki_pages
642
- WHERE status = 'active'
643
- GROUP BY lower(title)
644
- HAVING COUNT(*) > 1`,
645
- ).all();
646
- for (const row of duplicates) {
647
- addIssue(issues, { type: 'duplicate_page', severity: 'warn', message: `Duplicate active wiki title: ${row.title} (${row.n} pages).` });
648
- }
649
-
650
- const orphans = db.query<{ id: string; path: string }, []>(
651
- `SELECT wp.id, wp.path
652
- FROM wiki_pages wp
653
- LEFT JOIN wiki_backlinks wb1 ON wb1.from_page_id = wp.id
654
- LEFT JOIN wiki_backlinks wb2 ON wb2.to_page_id = wp.id
655
- WHERE wp.status = 'active'
656
- AND wp.path NOT IN ('wiki/README.md')
657
- GROUP BY wp.id
658
- HAVING COUNT(wb1.to_page_id) = 0 AND COUNT(wb2.from_page_id) = 0`,
659
- ).all();
660
- for (const page of orphans) {
661
- addIssue(issues, { type: 'orphan_page', severity: 'info', page_id: page.id, path: page.path, message: 'Wiki page has no backlinks.' });
662
- }
663
-
664
- const unresolved = db.query<{ page_id: string; path: string; source_uri: string }, []>(
665
- `SELECT wp.id AS page_id, wp.path, c.source_uri
666
- FROM citations c
667
- JOIN wiki_pages wp ON wp.id = c.wiki_page_id
668
- LEFT JOIN sources s ON s.uri = c.source_uri
669
- WHERE s.id IS NULL AND c.source_uri NOT LIKE 'file://%' AND c.source_uri NOT LIKE 's3://%' AND c.source_uri NOT LIKE 'https://%' AND c.source_uri NOT LIKE 'open-files://%'`,
670
- ).all();
671
- for (const row of unresolved) {
672
- addIssue(issues, { type: 'unresolved_source_ref', severity: 'error', page_id: row.page_id, path: row.path, source_uri: row.source_uri, message: 'Citation source URI cannot be resolved to a known or allowed source ref.' });
673
- }
674
-
675
- const contradictions = db.query<{ id: string; path: string }, []>(
676
- `SELECT id, path FROM wiki_pages WHERE lower(metadata_json) LIKE '%contradiction%'`,
677
- ).all();
678
- for (const page of contradictions) {
679
- addIssue(issues, { type: 'contradiction_marker', severity: 'warn', page_id: page.id, path: page.path, message: 'Page metadata contains a contradiction marker.' });
680
- }
681
-
682
- const newArticleCandidates = db.query<{ chunk_id: string; source_uri: string | null }, []>(
683
- `SELECT c.id AS chunk_id, s.uri AS source_uri
684
- FROM chunks c
685
- JOIN source_revisions sr ON sr.id = c.source_revision_id
686
- JOIN sources s ON s.id = sr.source_id
687
- LEFT JOIN citations cit ON cit.chunk_id = c.id
688
- WHERE c.kind = 'source'
689
- GROUP BY c.id
690
- HAVING COUNT(cit.id) = 0
691
- LIMIT 25`,
692
- ).all();
693
- for (const row of newArticleCandidates) {
694
- addIssue(issues, { type: 'new_article_candidate', severity: 'info', chunk_id: row.chunk_id, source_uri: row.source_uri ?? undefined, message: 'Source chunk is indexed but not cited by any wiki page yet.' });
695
- }
696
-
697
- return {
698
- ok: issues.every((issue) => issue.severity !== 'error'),
699
- issue_count: issues.length,
700
- issues,
701
- counts: {
702
- active_pages: activePages,
703
- citations: citationCount,
704
- backlinks: backlinkCount,
705
- new_article_candidates: newArticleCandidates.length,
706
- },
707
- };
708
- } finally {
709
- db.close();
710
- }
711
- }