@hasna/knowledge 0.2.14 → 0.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/search.ts ADDED
@@ -0,0 +1,510 @@
1
+ import type { Database } from 'bun:sqlite';
2
+ import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
3
+ import { searchVectorIndex, type EmbeddingRuntimeOptions } from './embeddings';
4
+ import { sourceProvenance, type GeneratedArtifactProvenance, type KnowledgeProvenance } from './provenance';
5
+ import type { KnowledgeConfig } from './workspace';
6
+
7
+ export type SearchResultKind = 'source_chunk' | 'wiki_chunk' | 'wiki_page' | 'knowledge_index';
8
+ export type SearchProvenance = KnowledgeProvenance | GeneratedArtifactProvenance;
9
+
10
+ export interface HybridSearchOptions extends EmbeddingRuntimeOptions {
11
+ dbPath: string;
12
+ query: string;
13
+ limit?: number;
14
+ semantic?: boolean;
15
+ config?: KnowledgeConfig;
16
+ }
17
+
18
+ export interface HybridSearchResult {
19
+ query: string;
20
+ limit: number;
21
+ mode: {
22
+ keyword: true;
23
+ catalog: true;
24
+ semantic: boolean;
25
+ };
26
+ semantic_provider: string | null;
27
+ semantic_model: string | null;
28
+ semantic_dimensions: number | null;
29
+ counts: {
30
+ keyword_results: number;
31
+ catalog_results: number;
32
+ semantic_results: number;
33
+ merged_results: number;
34
+ };
35
+ warnings: string[];
36
+ results: HybridSearchEntry[];
37
+ }
38
+
39
+ export interface HybridSearchEntry {
40
+ kind: SearchResultKind;
41
+ id: string;
42
+ title: string | null;
43
+ text: string | null;
44
+ score: number;
45
+ scores: {
46
+ keyword?: number;
47
+ semantic?: number;
48
+ catalog?: number;
49
+ };
50
+ source: {
51
+ uri: string | null;
52
+ ref: string | null;
53
+ kind: string | null;
54
+ revision: string | null;
55
+ hash: string | null;
56
+ } | null;
57
+ citation: {
58
+ chunk_id: string | null;
59
+ start_offset: number | null;
60
+ end_offset: number | null;
61
+ } | null;
62
+ artifact: {
63
+ uri: string | null;
64
+ path: string | null;
65
+ hash: string | null;
66
+ shard_key: string | null;
67
+ } | null;
68
+ provenance: SearchProvenance | null;
69
+ reasons: string[];
70
+ }
71
+
72
+ interface FtsChunkRow {
73
+ chunk_id: string;
74
+ chunk_kind: string;
75
+ wiki_page_id: string | null;
76
+ text: string;
77
+ token_count: number | null;
78
+ start_offset: number | null;
79
+ end_offset: number | null;
80
+ chunk_metadata_json: string;
81
+ source_revision_id: string | null;
82
+ revision: string | null;
83
+ hash: string | null;
84
+ source_uri: string | null;
85
+ source_kind: string | null;
86
+ source_title: string | null;
87
+ wiki_path: string | null;
88
+ wiki_title: string | null;
89
+ wiki_artifact_uri: string | null;
90
+ wiki_content_hash: string | null;
91
+ wiki_status: string | null;
92
+ wiki_metadata_json: string | null;
93
+ rank: number;
94
+ }
95
+
96
+ interface WikiPageRow {
97
+ id: string;
98
+ path: string;
99
+ title: string;
100
+ artifact_uri: string | null;
101
+ content_hash: string | null;
102
+ status: string;
103
+ metadata_json: string;
104
+ }
105
+
106
+ interface IndexRow {
107
+ id: string;
108
+ kind: string;
109
+ name: string;
110
+ artifact_uri: string | null;
111
+ shard_key: string | null;
112
+ metadata_json: string;
113
+ }
114
+
115
+ function parseJsonObject(value: string | null | undefined): Record<string, unknown> {
116
+ if (!value) return {};
117
+ try {
118
+ const parsed = JSON.parse(value);
119
+ return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? parsed as Record<string, unknown> : {};
120
+ } catch {
121
+ return {};
122
+ }
123
+ }
124
+
125
+ function metadataString(metadata: Record<string, unknown>, keys: string[]): string | null {
126
+ for (const key of keys) {
127
+ const value = metadata[key];
128
+ if (typeof value === 'string' && value.length > 0) return value;
129
+ }
130
+ return null;
131
+ }
132
+
133
+ function metadataNumber(metadata: Record<string, unknown>, keys: string[]): number | null {
134
+ for (const key of keys) {
135
+ const value = metadata[key];
136
+ if (typeof value === 'number' && Number.isFinite(value)) return value;
137
+ }
138
+ return null;
139
+ }
140
+
141
+ function unique<T>(values: T[]): T[] {
142
+ return Array.from(new Set(values));
143
+ }
144
+
145
+ function queryTerms(query: string): string[] {
146
+ const terms = query
147
+ .normalize('NFKC')
148
+ .toLowerCase()
149
+ .match(/[\p{L}\p{N}_]+/gu) ?? [];
150
+ return unique(terms.filter((term) => term.length > 0)).slice(0, 16);
151
+ }
152
+
153
+ function ftsQueryForTerms(terms: string[]): string | null {
154
+ if (terms.length === 0) return null;
155
+ return terms.map((term) => `${term}*`).join(' OR ');
156
+ }
157
+
158
+ function escapeLikeTerm(term: string): string {
159
+ return term.replace(/[\\%_]/g, (char) => `\\${char}`);
160
+ }
161
+
162
+ function likeParams(terms: string[], fieldsPerTerm: number): string[] {
163
+ return terms.flatMap((term) => Array.from({ length: fieldsPerTerm }, () => `%${escapeLikeTerm(term)}%`));
164
+ }
165
+
166
+ function scoreFromRank(rank: number, index: number): number {
167
+ const rankScore = Number.isFinite(rank) ? 1 / (1 + Math.abs(rank)) : 0;
168
+ const orderScore = 1 / (1 + index);
169
+ return roundScore(Math.max(rankScore, orderScore));
170
+ }
171
+
172
+ function catalogScore(haystack: string, terms: string[]): number {
173
+ if (terms.length === 0) return 0;
174
+ const matched = terms.filter((term) => haystack.includes(term)).length;
175
+ if (matched === 0) return 0;
176
+ return roundScore(Math.min(0.85, 0.35 + (matched / terms.length) * 0.5));
177
+ }
178
+
179
+ function semanticScore(score: number): number {
180
+ return roundScore(Math.max(0, Math.min(1, (score + 1) / 2)));
181
+ }
182
+
183
+ function roundScore(score: number): number {
184
+ return Number(score.toFixed(6));
185
+ }
186
+
187
+ function combinedScore(scores: HybridSearchEntry['scores'], citation: HybridSearchEntry['citation']): number {
188
+ const keyword = scores.keyword ?? 0;
189
+ const semantic = scores.semantic ?? 0;
190
+ const catalog = scores.catalog ?? 0;
191
+ const citationBoost = citation?.chunk_id ? 0.05 : 0;
192
+ return roundScore(Math.min(1, keyword * 0.55 + semantic * 0.4 + catalog * 0.35 + citationBoost));
193
+ }
194
+
195
+ function existingProvenance(metadata: Record<string, unknown>): SearchProvenance | null {
196
+ const provenance = metadata.provenance;
197
+ return provenance && typeof provenance === 'object' && !Array.isArray(provenance) ? provenance as SearchProvenance : null;
198
+ }
199
+
200
+ function provenanceForChunk(row: FtsChunkRow): SearchProvenance | null {
201
+ const metadata = parseJsonObject(row.chunk_metadata_json);
202
+ const existing = existingProvenance(metadata);
203
+ if (existing) return existing;
204
+ if (!row.source_revision_id && !row.source_uri) return null;
205
+ return sourceProvenance({
206
+ source_ref: metadataString(metadata, ['source_ref']),
207
+ source_uri: row.source_uri ?? metadataString(metadata, ['source_uri']),
208
+ source_kind: row.source_kind ?? metadataString(metadata, ['source_kind']),
209
+ source_revision_id: row.source_revision_id,
210
+ revision: row.revision ?? metadataString(metadata, ['revision']),
211
+ hash: row.hash ?? metadataString(metadata, ['hash']),
212
+ chunk_id: row.chunk_id,
213
+ start_offset: row.start_offset ?? metadataNumber(metadata, ['start_offset']),
214
+ end_offset: row.end_offset ?? metadataNumber(metadata, ['end_offset']),
215
+ status: metadataString(metadata, ['status']),
216
+ resolver: 'open-files-read-only',
217
+ });
218
+ }
219
+
220
+ function selectFtsChunks(db: Database, ftsQuery: string | null, limit: number): FtsChunkRow[] {
221
+ if (!ftsQuery) return [];
222
+ return db.query<FtsChunkRow, [string, number]>(
223
+ `SELECT
224
+ chunks_fts.chunk_id,
225
+ c.kind AS chunk_kind,
226
+ c.wiki_page_id,
227
+ c.text,
228
+ c.token_count,
229
+ c.start_offset,
230
+ c.end_offset,
231
+ c.metadata_json AS chunk_metadata_json,
232
+ c.source_revision_id,
233
+ sr.revision,
234
+ sr.hash,
235
+ s.uri AS source_uri,
236
+ s.kind AS source_kind,
237
+ s.title AS source_title,
238
+ wp.path AS wiki_path,
239
+ wp.title AS wiki_title,
240
+ wp.artifact_uri AS wiki_artifact_uri,
241
+ wp.content_hash AS wiki_content_hash,
242
+ wp.status AS wiki_status,
243
+ wp.metadata_json AS wiki_metadata_json,
244
+ bm25(chunks_fts) AS rank
245
+ FROM chunks_fts
246
+ JOIN chunks c ON c.id = chunks_fts.chunk_id
247
+ LEFT JOIN source_revisions sr ON sr.id = c.source_revision_id
248
+ LEFT JOIN sources s ON s.id = sr.source_id
249
+ LEFT JOIN wiki_pages wp ON wp.id = c.wiki_page_id
250
+ WHERE chunks_fts MATCH ?
251
+ ORDER BY rank ASC
252
+ LIMIT ?`,
253
+ ).all(ftsQuery, limit);
254
+ }
255
+
256
+ function catalogWhere(fields: string[], terms: string[]): string {
257
+ if (terms.length === 0) return '1 = 0';
258
+ const clauses = terms.map(() => `(${fields.map((field) => `lower(COALESCE(${field}, '')) LIKE ? ESCAPE '\\'`).join(' OR ')})`);
259
+ return clauses.join(' OR ');
260
+ }
261
+
262
+ function selectWikiPages(db: Database, terms: string[], limit: number): WikiPageRow[] {
263
+ const fields = ['path', 'title', 'artifact_uri', 'metadata_json'];
264
+ return db.query<WikiPageRow, [...string[], number]>(
265
+ `SELECT id, path, title, artifact_uri, content_hash, status, metadata_json
266
+ FROM wiki_pages
267
+ WHERE status = 'active' AND (${catalogWhere(fields, terms)})
268
+ ORDER BY updated_at DESC
269
+ LIMIT ?`,
270
+ ).all(...likeParams(terms, fields.length), limit);
271
+ }
272
+
273
+ function selectKnowledgeIndexes(db: Database, terms: string[], limit: number): IndexRow[] {
274
+ const fields = ['kind', 'name', 'shard_key', 'artifact_uri', 'metadata_json'];
275
+ return db.query<IndexRow, [...string[], number]>(
276
+ `SELECT id, kind, name, artifact_uri, shard_key, metadata_json
277
+ FROM knowledge_indexes
278
+ WHERE ${catalogWhere(fields, terms)}
279
+ ORDER BY updated_at DESC
280
+ LIMIT ?`,
281
+ ).all(...likeParams(terms, fields.length), limit);
282
+ }
283
+
284
+ function chunkResult(row: FtsChunkRow, keywordScore: number): HybridSearchEntry {
285
+ const metadata = parseJsonObject(row.chunk_metadata_json);
286
+ const provenance = provenanceForChunk(row);
287
+ const sourceRef = metadataString(metadata, ['source_ref']);
288
+ const sourceUri = row.source_uri ?? metadataString(metadata, ['source_uri']);
289
+ const isWiki = Boolean(row.wiki_page_id);
290
+ const result: HybridSearchEntry = {
291
+ kind: isWiki ? 'wiki_chunk' : 'source_chunk',
292
+ id: row.chunk_id,
293
+ title: isWiki ? row.wiki_title : row.source_title,
294
+ text: row.text,
295
+ score: 0,
296
+ scores: { keyword: keywordScore },
297
+ source: sourceUri || sourceRef ? {
298
+ uri: sourceUri,
299
+ ref: sourceRef,
300
+ kind: row.source_kind ?? metadataString(metadata, ['source_kind']),
301
+ revision: row.revision ?? metadataString(metadata, ['revision']),
302
+ hash: row.hash ?? metadataString(metadata, ['hash']),
303
+ } : null,
304
+ citation: {
305
+ chunk_id: row.chunk_id,
306
+ start_offset: row.start_offset,
307
+ end_offset: row.end_offset,
308
+ },
309
+ artifact: isWiki ? {
310
+ uri: row.wiki_artifact_uri,
311
+ path: row.wiki_path,
312
+ hash: row.wiki_content_hash,
313
+ shard_key: row.wiki_path,
314
+ } : null,
315
+ provenance,
316
+ reasons: ['keyword_match'],
317
+ };
318
+ result.score = combinedScore(result.scores, result.citation);
319
+ return result;
320
+ }
321
+
322
+ function wikiPageResult(row: WikiPageRow, terms: string[]): HybridSearchEntry {
323
+ const metadata = parseJsonObject(row.metadata_json);
324
+ const score = catalogScore(`${row.path} ${row.title} ${row.artifact_uri ?? ''} ${row.metadata_json}`.toLowerCase(), terms);
325
+ const result: HybridSearchEntry = {
326
+ kind: 'wiki_page',
327
+ id: row.id,
328
+ title: row.title,
329
+ text: null,
330
+ score: 0,
331
+ scores: { catalog: score },
332
+ source: null,
333
+ citation: null,
334
+ artifact: {
335
+ uri: row.artifact_uri,
336
+ path: row.path,
337
+ hash: row.content_hash,
338
+ shard_key: row.path,
339
+ },
340
+ provenance: existingProvenance(metadata),
341
+ reasons: ['wiki_catalog_match'],
342
+ };
343
+ result.score = combinedScore(result.scores, result.citation);
344
+ return result;
345
+ }
346
+
347
+ function indexResult(row: IndexRow, terms: string[]): HybridSearchEntry {
348
+ const metadata = parseJsonObject(row.metadata_json);
349
+ const score = catalogScore(`${row.kind} ${row.name} ${row.shard_key ?? ''} ${row.artifact_uri ?? ''} ${row.metadata_json}`.toLowerCase(), terms);
350
+ const result: HybridSearchEntry = {
351
+ kind: 'knowledge_index',
352
+ id: row.id,
353
+ title: row.name,
354
+ text: null,
355
+ score: 0,
356
+ scores: { catalog: score },
357
+ source: null,
358
+ citation: null,
359
+ artifact: {
360
+ uri: row.artifact_uri,
361
+ path: metadataString(metadata, ['artifact_key']),
362
+ hash: metadataString(metadata, ['content_hash']),
363
+ shard_key: row.shard_key,
364
+ },
365
+ provenance: existingProvenance(metadata),
366
+ reasons: ['index_catalog_match'],
367
+ };
368
+ result.score = combinedScore(result.scores, result.citation);
369
+ return result;
370
+ }
371
+
372
+ function mergeResult(results: Map<string, HybridSearchEntry>, entry: HybridSearchEntry): void {
373
+ const key = `${entry.kind}:${entry.id}`;
374
+ const existing = results.get(key);
375
+ if (!existing) {
376
+ results.set(key, entry);
377
+ return;
378
+ }
379
+ existing.scores = {
380
+ keyword: Math.max(existing.scores.keyword ?? 0, entry.scores.keyword ?? 0) || undefined,
381
+ semantic: Math.max(existing.scores.semantic ?? 0, entry.scores.semantic ?? 0) || undefined,
382
+ catalog: Math.max(existing.scores.catalog ?? 0, entry.scores.catalog ?? 0) || undefined,
383
+ };
384
+ existing.reasons = unique([...existing.reasons, ...entry.reasons]);
385
+ existing.text = existing.text ?? entry.text;
386
+ existing.title = existing.title ?? entry.title;
387
+ existing.source = existing.source ?? entry.source;
388
+ existing.citation = existing.citation ?? entry.citation;
389
+ existing.artifact = existing.artifact ?? entry.artifact;
390
+ existing.provenance = existing.provenance ?? entry.provenance;
391
+ existing.score = combinedScore(existing.scores, existing.citation);
392
+ }
393
+
394
+ function sortResults(results: HybridSearchEntry[]): HybridSearchEntry[] {
395
+ const kindOrder: Record<SearchResultKind, number> = {
396
+ source_chunk: 0,
397
+ wiki_chunk: 1,
398
+ wiki_page: 2,
399
+ knowledge_index: 3,
400
+ };
401
+ return results.sort((a, b) => {
402
+ if (b.score !== a.score) return b.score - a.score;
403
+ return kindOrder[a.kind] - kindOrder[b.kind] || a.id.localeCompare(b.id);
404
+ });
405
+ }
406
+
407
+ export async function hybridSearch(options: HybridSearchOptions): Promise<HybridSearchResult> {
408
+ const query = options.query.trim();
409
+ if (!query) throw new Error('Search query is required.');
410
+ const limit = Math.max(1, Math.min(options.limit ?? 10, 100));
411
+ const terms = queryTerms(query);
412
+ const ftsQuery = ftsQueryForTerms(terms);
413
+ const semanticEnabled = options.semantic === true || options.fake === true || Boolean(options.modelRef);
414
+ const warnings: string[] = [];
415
+ let semanticProvider: string | null = null;
416
+ let semanticModel: string | null = null;
417
+ let semanticDimensions: number | null = null;
418
+ let keywordCount = 0;
419
+ let catalogCount = 0;
420
+ let semanticCount = 0;
421
+ const merged = new Map<string, HybridSearchEntry>();
422
+
423
+ migrateKnowledgeDb(options.dbPath);
424
+ const db = openKnowledgeDb(options.dbPath);
425
+ try {
426
+ const ftsRows = selectFtsChunks(db, ftsQuery, Math.max(limit * 3, 20));
427
+ keywordCount = ftsRows.length;
428
+ ftsRows.forEach((row, index) => mergeResult(merged, chunkResult(row, scoreFromRank(row.rank, index))));
429
+
430
+ const wikiRows = selectWikiPages(db, terms, Math.max(limit, 10));
431
+ const indexRows = selectKnowledgeIndexes(db, terms, Math.max(limit, 10));
432
+ catalogCount = wikiRows.length + indexRows.length;
433
+ wikiRows.forEach((row) => mergeResult(merged, wikiPageResult(row, terms)));
434
+ indexRows.forEach((row) => mergeResult(merged, indexResult(row, terms)));
435
+ } finally {
436
+ db.close();
437
+ }
438
+
439
+ if (semanticEnabled) {
440
+ try {
441
+ const semantic = await searchVectorIndex({
442
+ dbPath: options.dbPath,
443
+ query,
444
+ limit: Math.max(limit * 3, 20),
445
+ config: options.config,
446
+ env: options.env,
447
+ modelRef: options.modelRef,
448
+ dimensions: options.dimensions,
449
+ fake: options.fake,
450
+ batchSize: options.batchSize,
451
+ maxParallelCalls: options.maxParallelCalls,
452
+ });
453
+ semanticProvider = semantic.provider;
454
+ semanticModel = semantic.model;
455
+ semanticDimensions = semantic.dimensions;
456
+ semanticCount = semantic.results.length;
457
+ for (const row of semantic.results) {
458
+ const result: HybridSearchEntry = {
459
+ kind: 'source_chunk',
460
+ id: row.chunk_id,
461
+ title: null,
462
+ text: row.text,
463
+ score: 0,
464
+ scores: { semantic: semanticScore(row.score) },
465
+ source: {
466
+ uri: row.source_uri,
467
+ ref: row.source_ref,
468
+ kind: row.provenance?.source_kind ?? null,
469
+ revision: row.revision,
470
+ hash: row.hash,
471
+ },
472
+ citation: {
473
+ chunk_id: row.chunk_id,
474
+ start_offset: row.provenance?.start_offset ?? null,
475
+ end_offset: row.provenance?.end_offset ?? null,
476
+ },
477
+ artifact: null,
478
+ provenance: row.provenance,
479
+ reasons: ['semantic_match'],
480
+ };
481
+ result.score = combinedScore(result.scores, result.citation);
482
+ mergeResult(merged, result);
483
+ }
484
+ } catch (error) {
485
+ warnings.push(`semantic_search_failed: ${error instanceof Error ? error.message : String(error)}`);
486
+ }
487
+ }
488
+
489
+ const results = sortResults(Array.from(merged.values())).slice(0, limit);
490
+ return {
491
+ query,
492
+ limit,
493
+ mode: {
494
+ keyword: true,
495
+ catalog: true,
496
+ semantic: semanticEnabled,
497
+ },
498
+ semantic_provider: semanticProvider,
499
+ semantic_model: semanticModel,
500
+ semantic_dimensions: semanticDimensions,
501
+ counts: {
502
+ keyword_results: keywordCount,
503
+ catalog_results: catalogCount,
504
+ semantic_results: semanticCount,
505
+ merged_results: results.length,
506
+ },
507
+ warnings,
508
+ results,
509
+ };
510
+ }
package/src/service.ts CHANGED
@@ -12,6 +12,7 @@ import { ingestOpenFilesManifest } from './manifest-ingest';
12
12
  import { ingestSourceRef } from './source-ingest';
13
13
  import { resolveOpenFilesSource } from './source-resolver';
14
14
  import { providerStatus, listModelRegistry, type ProviderStatusResult, type ModelRegistryEntry } from './providers';
15
+ import { hybridSearch, type HybridSearchOptions } from './search';
15
16
  import { resolveSafetyPolicy } from './safety';
16
17
  import {
17
18
  recordStorageObjects,
@@ -214,6 +215,15 @@ export class KnowledgeService {
214
215
  config: this.config(),
215
216
  });
216
217
  }
218
+
219
+ async search(options: Omit<HybridSearchOptions, 'dbPath' | 'config'>) {
220
+ const workspace = this.ensureWorkspace();
221
+ return hybridSearch({
222
+ ...options,
223
+ dbPath: workspace.knowledgeDbPath,
224
+ config: this.config(),
225
+ });
226
+ }
217
227
  }
218
228
 
219
229
  export function createKnowledgeService(options: KnowledgeServiceOptions = {}): KnowledgeService {
@@ -35,6 +35,11 @@ function stableId(prefix: string, value: string): string {
35
35
  return `${prefix}_${createHash('sha256').update(value).digest('hex').slice(0, 20)}`;
36
36
  }
37
37
 
38
+ function estimateTokenCount(text: string): number {
39
+ const words = text.trim().split(/\s+/).filter(Boolean).length;
40
+ return Math.max(1, Math.ceil(words * 1.25));
41
+ }
42
+
38
43
  export function agentSchemaTemplate(): string {
39
44
  return `# Knowledge Agent Schema v1
40
45
 
@@ -151,6 +156,39 @@ function provenanceFor(artifact: CatalogArtifact): GeneratedArtifactProvenance {
151
156
  });
152
157
  }
153
158
 
159
+ function recordWikiChunk(db: Database, pageId: string, title: string, artifact: CatalogArtifact, body: string, now: string): void {
160
+ const provenance = provenanceFor(artifact);
161
+ const chunkId = stableId('chk', `${pageId}\u0000${artifact.hash ?? artifact.uri}`);
162
+ const existing = db.query<{ id: string }, [string]>('SELECT id FROM chunks WHERE wiki_page_id = ?').all(pageId);
163
+ for (const row of existing) db.run('DELETE FROM chunks_fts WHERE chunk_id = ?', [row.id]);
164
+ db.run('DELETE FROM chunks WHERE wiki_page_id = ?', [pageId]);
165
+ db.run(
166
+ `INSERT INTO chunks (id, wiki_page_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
167
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
168
+ [
169
+ chunkId,
170
+ pageId,
171
+ 'wiki',
172
+ 0,
173
+ body,
174
+ estimateTokenCount(body),
175
+ 0,
176
+ body.length,
177
+ JSON.stringify({
178
+ artifact_key: artifact.key,
179
+ artifact_uri: artifact.uri,
180
+ content_hash: artifact.hash ?? null,
181
+ provenance,
182
+ }),
183
+ now,
184
+ ],
185
+ );
186
+ db.run(
187
+ 'INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)',
188
+ [chunkId, body, title, artifact.uri],
189
+ );
190
+ }
191
+
154
192
  export function recordWikiLayoutCatalog(db: Database, artifacts: CatalogArtifact[], now = new Date()): void {
155
193
  const timestamp = now.toISOString();
156
194
  const rootIndex = artifacts.find((artifact) => artifact.key.endsWith('indexes/root.md'));
@@ -182,6 +220,7 @@ export function recordWikiLayoutCatalog(db: Database, artifacts: CatalogArtifact
182
220
  }
183
221
 
184
222
  if (wikiReadme) {
223
+ const wikiPageId = stableId('wiki', 'wiki/README.md');
185
224
  db.run(
186
225
  `INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
187
226
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
@@ -193,7 +232,7 @@ export function recordWikiLayoutCatalog(db: Database, artifacts: CatalogArtifact
193
232
  metadata_json = excluded.metadata_json,
194
233
  updated_at = excluded.updated_at`,
195
234
  [
196
- stableId('wiki', 'wiki/README.md'),
235
+ wikiPageId,
197
236
  'wiki/README.md',
198
237
  'Wiki',
199
238
  wikiReadme.uri,
@@ -207,5 +246,6 @@ export function recordWikiLayoutCatalog(db: Database, artifacts: CatalogArtifact
207
246
  timestamp,
208
247
  ],
209
248
  );
249
+ recordWikiChunk(db, wikiPageId, 'Wiki', wikiReadme, wikiReadmeTemplate(), timestamp);
210
250
  }
211
251
  }