@hasna/knowledge 0.2.13 → 0.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13660,7 +13660,7 @@ import { existsSync as existsSync7, readFileSync as readFileSync7, writeFileSync
13660
13660
  // package.json
13661
13661
  var package_default = {
13662
13662
  name: "@hasna/knowledge",
13663
- version: "0.2.13",
13663
+ version: "0.2.15",
13664
13664
  description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
13665
13665
  type: "module",
13666
13666
  bin: {
@@ -13790,6 +13790,12 @@ function defaultKnowledgeConfig() {
13790
13790
  default_model: "deepseek-chat"
13791
13791
  }
13792
13792
  },
13793
+ embeddings: {
13794
+ default_model: "openai:text-embedding-3-small",
13795
+ dimensions: 1536,
13796
+ batch_size: 64,
13797
+ max_parallel_calls: 4
13798
+ },
13793
13799
  safety: {
13794
13800
  network: {
13795
13801
  web_search_enabled: false,
@@ -14128,10 +14134,8 @@ function createArtifactStore(config2, workspace) {
14128
14134
  return new LocalArtifactStore(workspace.artifactsDir);
14129
14135
  }
14130
14136
 
14131
- // src/outbox-consume.ts
14132
- import { createHash as createHash2, randomUUID as randomUUID3 } from "crypto";
14133
- import { existsSync as existsSync4, readFileSync as readFileSync4 } from "fs";
14134
- import { basename } from "path";
14137
+ // src/embeddings.ts
14138
+ import { createHash } from "crypto";
14135
14139
 
14136
14140
  // src/knowledge-db.ts
14137
14141
  import { Database } from "bun:sqlite";
@@ -14349,6 +14353,38 @@ CREATE INDEX IF NOT EXISTS idx_approval_gates_status ON approval_gates(status);
14349
14353
  INSERT OR IGNORE INTO schema_versions(version, applied_at)
14350
14354
  VALUES (3, datetime('now'));
14351
14355
  `;
14356
+ var MIGRATION_4 = `
14357
+ CREATE TABLE IF NOT EXISTS vector_index_entries (
14358
+ id TEXT PRIMARY KEY,
14359
+ chunk_id TEXT NOT NULL REFERENCES chunks(id) ON DELETE CASCADE,
14360
+ source_revision_id TEXT REFERENCES source_revisions(id) ON DELETE CASCADE,
14361
+ provider TEXT NOT NULL,
14362
+ model TEXT NOT NULL,
14363
+ dimensions INTEGER NOT NULL,
14364
+ vector_json TEXT NOT NULL,
14365
+ vector_norm REAL NOT NULL,
14366
+ source_uri TEXT,
14367
+ source_ref TEXT,
14368
+ revision TEXT,
14369
+ hash TEXT,
14370
+ start_offset INTEGER,
14371
+ end_offset INTEGER,
14372
+ token_count INTEGER,
14373
+ status TEXT NOT NULL DEFAULT 'active',
14374
+ metadata_json TEXT NOT NULL DEFAULT '{}',
14375
+ created_at TEXT NOT NULL,
14376
+ updated_at TEXT NOT NULL,
14377
+ UNIQUE(chunk_id, provider, model)
14378
+ );
14379
+
14380
+ CREATE INDEX IF NOT EXISTS idx_vector_index_provider_model ON vector_index_entries(provider, model);
14381
+ CREATE INDEX IF NOT EXISTS idx_vector_index_source_revision ON vector_index_entries(source_revision_id);
14382
+ CREATE INDEX IF NOT EXISTS idx_vector_index_source_uri ON vector_index_entries(source_uri);
14383
+ CREATE INDEX IF NOT EXISTS idx_vector_index_status ON vector_index_entries(status);
14384
+
14385
+ INSERT OR IGNORE INTO schema_versions(version, applied_at)
14386
+ VALUES (4, datetime('now'));
14387
+ `;
14352
14388
  function openKnowledgeDb(path) {
14353
14389
  ensureParentDir(path);
14354
14390
  const db = new Database(path);
@@ -14364,6 +14400,8 @@ function migrateKnowledgeDb(path) {
14364
14400
  db.exec(MIGRATION_2);
14365
14401
  if (getSchemaVersion(db) < 3)
14366
14402
  db.exec(MIGRATION_3);
14403
+ if (getSchemaVersion(db) < 4)
14404
+ db.exec(MIGRATION_4);
14367
14405
  return { path, schema_version: getSchemaVersion(db) };
14368
14406
  } finally {
14369
14407
  db.close();
@@ -14393,15 +14431,530 @@ function getKnowledgeDbStats(path) {
14393
14431
  redaction_findings: count(db, "redaction_findings"),
14394
14432
  audit_events: count(db, "audit_events"),
14395
14433
  approval_gates: count(db, "approval_gates"),
14396
- storage_objects: count(db, "storage_objects")
14434
+ storage_objects: count(db, "storage_objects"),
14435
+ embeddings: count(db, "chunk_embeddings"),
14436
+ vector_entries: count(db, "vector_index_entries")
14437
+ };
14438
+ } finally {
14439
+ db.close();
14440
+ }
14441
+ }
14442
+
14443
+ // src/providers.ts
14444
+ var DEFAULT_PROVIDER_SETTINGS = {
14445
+ openai: {
14446
+ api_key_env: "OPENAI_API_KEY",
14447
+ default_model: "gpt-5.2"
14448
+ },
14449
+ anthropic: {
14450
+ api_key_env: "ANTHROPIC_API_KEY",
14451
+ default_model: "claude-sonnet-4-6"
14452
+ },
14453
+ deepseek: {
14454
+ api_key_env: "DEEPSEEK_API_KEY",
14455
+ default_model: "deepseek-chat"
14456
+ }
14457
+ };
14458
+ var PROVIDER_CAPABILITIES = {
14459
+ openai: {
14460
+ text_generation: true,
14461
+ structured_output: true,
14462
+ tool_usage: true,
14463
+ tool_streaming: true,
14464
+ image_input: true,
14465
+ native_web_search: true,
14466
+ reasoning: true,
14467
+ embeddings: true
14468
+ },
14469
+ anthropic: {
14470
+ text_generation: true,
14471
+ structured_output: true,
14472
+ tool_usage: true,
14473
+ tool_streaming: true,
14474
+ image_input: true,
14475
+ native_web_search: false,
14476
+ reasoning: true,
14477
+ embeddings: false
14478
+ },
14479
+ deepseek: {
14480
+ text_generation: true,
14481
+ structured_output: true,
14482
+ tool_usage: true,
14483
+ tool_streaming: true,
14484
+ image_input: false,
14485
+ native_web_search: false,
14486
+ reasoning: true,
14487
+ embeddings: false
14488
+ }
14489
+ };
14490
+ var BUILTIN_ALIASES = {
14491
+ default: "openai:gpt-5.2",
14492
+ fast: "openai:gpt-5-mini",
14493
+ reasoning: "anthropic:claude-opus-4-6",
14494
+ sonnet: "anthropic:claude-sonnet-4-6",
14495
+ deepseek: "deepseek:deepseek-chat",
14496
+ "deepseek-reasoning": "deepseek:deepseek-reasoner"
14497
+ };
14498
+ function providerConfig(config2) {
14499
+ return config2.providers ?? {};
14500
+ }
14501
+ function providerSettings(config2, provider) {
14502
+ const configured = providerConfig(config2)[provider] ?? {};
14503
+ return {
14504
+ ...DEFAULT_PROVIDER_SETTINGS[provider],
14505
+ ...configured
14506
+ };
14507
+ }
14508
+ function modelAliases(config2) {
14509
+ const configured = providerConfig(config2);
14510
+ return {
14511
+ ...BUILTIN_ALIASES,
14512
+ ...configured.default_model ? { default: configured.default_model } : {},
14513
+ ...configured.aliases ?? {}
14514
+ };
14515
+ }
14516
+ function parseModelRef(modelRef) {
14517
+ const [provider, ...rest] = modelRef.split(":");
14518
+ const model = rest.join(":");
14519
+ if (provider !== "openai" && provider !== "anthropic" && provider !== "deepseek") {
14520
+ throw new Error(`Unsupported AI provider: ${provider}`);
14521
+ }
14522
+ if (!model)
14523
+ throw new Error(`Invalid model ref: ${modelRef}. Expected provider:model.`);
14524
+ return { provider, model };
14525
+ }
14526
+ function resolveModelRef(aliasOrRef, config2) {
14527
+ const aliases = modelAliases(config2);
14528
+ return aliases[aliasOrRef] ?? aliasOrRef;
14529
+ }
14530
+ function listModelRegistry(config2) {
14531
+ const aliases = modelAliases(config2);
14532
+ return Object.entries(aliases).map(([alias, modelRef]) => {
14533
+ const parsed = parseModelRef(modelRef);
14534
+ return {
14535
+ alias,
14536
+ model_ref: modelRef,
14537
+ provider: parsed.provider,
14538
+ model: parsed.model,
14539
+ default: alias === "default",
14540
+ capabilities: PROVIDER_CAPABILITIES[parsed.provider]
14541
+ };
14542
+ });
14543
+ }
14544
+ function providerCredentialStatus(config2, env = process.env) {
14545
+ return Object.keys(DEFAULT_PROVIDER_SETTINGS).map((provider) => {
14546
+ const settings = providerSettings(config2, provider);
14547
+ const configured = Boolean(env[settings.api_key_env]);
14548
+ return {
14549
+ provider,
14550
+ api_key_env: settings.api_key_env,
14551
+ configured,
14552
+ source: configured ? "env" : "missing",
14553
+ base_url: settings.base_url ?? null,
14554
+ default_model: settings.default_model
14555
+ };
14556
+ });
14557
+ }
14558
+ function providerStatus(config2, env = process.env) {
14559
+ return {
14560
+ default_model: resolveModelRef("default", config2),
14561
+ providers: providerCredentialStatus(config2, env),
14562
+ models: listModelRegistry(config2)
14563
+ };
14564
+ }
14565
+ function assertProviderCredentials(provider, config2, env = process.env) {
14566
+ const status = providerCredentialStatus(config2, env).find((entry) => entry.provider === provider);
14567
+ if (!status)
14568
+ throw new Error(`Unsupported AI provider: ${provider}`);
14569
+ if (!status.configured)
14570
+ throw new Error(`Missing ${status.api_key_env} for ${provider}. Set the env var to use this provider.`);
14571
+ return status;
14572
+ }
14573
+
14574
+ // src/provenance.ts
14575
+ function isStaleStatus(status) {
14576
+ return ["deleted", "stale", "invalidated", "reindex_required"].includes((status ?? "").toLowerCase());
14577
+ }
14578
+ function sourceProvenance(input) {
14579
+ const status = input.status ?? null;
14580
+ return {
14581
+ source_owner: "open-files",
14582
+ source_ref: input.source_ref ?? null,
14583
+ source_uri: input.source_uri ?? null,
14584
+ source_kind: input.source_kind ?? null,
14585
+ source_revision_id: input.source_revision_id ?? null,
14586
+ revision: input.revision ?? null,
14587
+ hash: input.hash ?? null,
14588
+ chunk_id: input.chunk_id ?? null,
14589
+ start_offset: input.start_offset ?? null,
14590
+ end_offset: input.end_offset ?? null,
14591
+ status,
14592
+ read_only: true,
14593
+ citation_required: true,
14594
+ resolver: input.resolver ?? null,
14595
+ stale: isStaleStatus(status)
14596
+ };
14597
+ }
14598
+ function generatedArtifactProvenance(input) {
14599
+ return {
14600
+ source_owner: "open-files",
14601
+ generated_from: input.generated_from,
14602
+ artifact_key: input.artifact_key,
14603
+ source_refs: input.source_refs ?? [],
14604
+ read_only_sources: true,
14605
+ citation_required: input.citation_required ?? true,
14606
+ raw_source_bytes_stored_in_open_knowledge: false
14607
+ };
14608
+ }
14609
+ function withProvenance(metadata, provenance) {
14610
+ return {
14611
+ ...metadata,
14612
+ provenance
14613
+ };
14614
+ }
14615
+
14616
+ // src/embeddings.ts
14617
+ var DEFAULT_EMBEDDING_MODEL_REF = "openai:text-embedding-3-small";
14618
+ var DEFAULT_EMBEDDING_DIMENSIONS = 1536;
14619
+ function embeddingConfig(config2) {
14620
+ return config2?.embeddings ?? {};
14621
+ }
14622
+ function stableId(prefix, value) {
14623
+ return `${prefix}_${createHash("sha256").update(value).digest("hex").slice(0, 20)}`;
14624
+ }
14625
+ function parseJsonObject(value) {
14626
+ if (!value)
14627
+ return {};
14628
+ try {
14629
+ const parsed = JSON.parse(value);
14630
+ return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {};
14631
+ } catch {
14632
+ return {};
14633
+ }
14634
+ }
14635
+ function metadataString(metadata, keys) {
14636
+ for (const key of keys) {
14637
+ const value = metadata[key];
14638
+ if (typeof value === "string" && value.length > 0)
14639
+ return value;
14640
+ }
14641
+ return null;
14642
+ }
14643
+ function metadataNumber(metadata, keys) {
14644
+ for (const key of keys) {
14645
+ const value = metadata[key];
14646
+ if (typeof value === "number" && Number.isFinite(value))
14647
+ return value;
14648
+ }
14649
+ return null;
14650
+ }
14651
+ function vectorNorm(vector) {
14652
+ return Math.sqrt(vector.reduce((sum, value) => sum + value * value, 0));
14653
+ }
14654
+ function cosineSimilarity(a, b, bNorm = vectorNorm(b)) {
14655
+ const aNorm = vectorNorm(a);
14656
+ if (aNorm === 0 || bNorm === 0)
14657
+ return 0;
14658
+ const length = Math.min(a.length, b.length);
14659
+ let dot = 0;
14660
+ for (let i = 0;i < length; i += 1)
14661
+ dot += a[i] * b[i];
14662
+ return dot / (aNorm * bNorm);
14663
+ }
14664
+ function deterministicVector(text, dimensions) {
14665
+ const bytes = createHash("sha256").update(text).digest();
14666
+ return Array.from({ length: dimensions }, (_, index) => {
14667
+ const value = bytes[index % bytes.length] / 255;
14668
+ return Number((value * 2 - 1).toFixed(6));
14669
+ });
14670
+ }
14671
+ async function openAiEmbeddingModel(model, config2, env = process.env) {
14672
+ assertProviderCredentials("openai", config2, env);
14673
+ const settings = providerSettings(config2, "openai");
14674
+ const { createOpenAI } = await import("@ai-sdk/openai");
14675
+ const openai = createOpenAI({
14676
+ apiKey: env[settings.api_key_env],
14677
+ baseURL: settings.base_url
14678
+ });
14679
+ if (openai.embeddingModel)
14680
+ return openai.embeddingModel(model);
14681
+ if (openai.textEmbedding)
14682
+ return openai.textEmbedding(model);
14683
+ if (openai.textEmbeddingModel)
14684
+ return openai.textEmbeddingModel(model);
14685
+ throw new Error("OpenAI provider does not expose an embedding model factory.");
14686
+ }
14687
+ function resolveEmbeddingModelRef(modelRef, config2) {
14688
+ if (!modelRef || modelRef === "default" || modelRef === "embedding") {
14689
+ return embeddingConfig(config2).default_model ?? DEFAULT_EMBEDDING_MODEL_REF;
14690
+ }
14691
+ return modelRef;
14692
+ }
14693
+ async function embedTexts(texts, options = {}) {
14694
+ const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
14695
+ const parsed = parseModelRef(modelRef);
14696
+ if (parsed.provider !== "openai") {
14697
+ throw new Error(`Embedding provider ${parsed.provider} is not supported yet. Use openai:text-embedding-3-small.`);
14698
+ }
14699
+ const dimensions = options.dimensions ?? embeddingConfig(options.config).dimensions ?? DEFAULT_EMBEDDING_DIMENSIONS;
14700
+ if (options.fake) {
14701
+ return {
14702
+ provider: parsed.provider,
14703
+ model: parsed.model,
14704
+ dimensions,
14705
+ vectors: texts.map((text) => deterministicVector(text, dimensions)),
14706
+ usage: { input_tokens: texts.reduce((sum, text) => sum + Math.max(1, Math.ceil(text.split(/\s+/).filter(Boolean).length * 1.25)), 0) }
14707
+ };
14708
+ }
14709
+ const { embedMany } = await import("ai");
14710
+ const model = await openAiEmbeddingModel(parsed.model, options.config, options.env);
14711
+ const result = await embedMany({
14712
+ model,
14713
+ values: texts,
14714
+ maxParallelCalls: options.maxParallelCalls ?? embeddingConfig(options.config).max_parallel_calls,
14715
+ providerOptions: {
14716
+ openai: {
14717
+ dimensions
14718
+ }
14719
+ }
14720
+ });
14721
+ const vectors = result.embeddings;
14722
+ return {
14723
+ provider: parsed.provider,
14724
+ model: parsed.model,
14725
+ dimensions: vectors[0]?.length ?? dimensions,
14726
+ vectors,
14727
+ usage: { input_tokens: result.usage?.tokens ?? 0 }
14728
+ };
14729
+ }
14730
+ function selectCandidateChunks(db, options) {
14731
+ const baseQuery = `SELECT
14732
+ c.id,
14733
+ c.text,
14734
+ c.token_count,
14735
+ c.start_offset,
14736
+ c.end_offset,
14737
+ c.metadata_json,
14738
+ c.source_revision_id,
14739
+ sr.revision,
14740
+ sr.hash,
14741
+ s.uri AS source_uri,
14742
+ s.kind AS source_kind
14743
+ FROM chunks c
14744
+ LEFT JOIN source_revisions sr ON sr.id = c.source_revision_id
14745
+ LEFT JOIN sources s ON s.id = sr.source_id
14746
+ LEFT JOIN vector_index_entries v
14747
+ ON v.chunk_id = c.id AND v.provider = ? AND v.model = ?
14748
+ WHERE v.id IS NULL`;
14749
+ const suffix = `
14750
+ ORDER BY c.created_at ASC, c.ordinal ASC
14751
+ LIMIT ?`;
14752
+ if (options.sourceRevisionId) {
14753
+ return db.query(`${baseQuery} AND c.source_revision_id = ?${suffix}`).all(options.provider, options.model, options.sourceRevisionId, options.limit);
14754
+ }
14755
+ return db.query(`${baseQuery}${suffix}`).all(options.provider, options.model, options.limit);
14756
+ }
14757
+ function provenanceForChunk(row) {
14758
+ const metadata = parseJsonObject(row.metadata_json);
14759
+ const existing = metadata.provenance;
14760
+ if (existing && typeof existing === "object" && !Array.isArray(existing))
14761
+ return existing;
14762
+ return sourceProvenance({
14763
+ source_ref: metadataString(metadata, ["source_ref"]),
14764
+ source_uri: row.source_uri ?? metadataString(metadata, ["source_uri"]),
14765
+ source_kind: row.source_kind ?? metadataString(metadata, ["source_kind"]),
14766
+ source_revision_id: row.source_revision_id,
14767
+ revision: row.revision ?? metadataString(metadata, ["revision"]),
14768
+ hash: row.hash ?? metadataString(metadata, ["hash"]),
14769
+ chunk_id: row.id,
14770
+ start_offset: row.start_offset ?? metadataNumber(metadata, ["start_offset"]),
14771
+ end_offset: row.end_offset ?? metadataNumber(metadata, ["end_offset"]),
14772
+ status: metadataString(metadata, ["status"]),
14773
+ resolver: "open-files-read-only"
14774
+ });
14775
+ }
14776
+ function upsertVectors(db, rows, embedding, now) {
14777
+ const insertEmbedding = db.prepare(`
14778
+ INSERT INTO chunk_embeddings (id, chunk_id, provider, model, dimensions, vector_json, created_at)
14779
+ VALUES (?, ?, ?, ?, ?, ?, ?)
14780
+ ON CONFLICT(chunk_id, provider, model) DO UPDATE SET
14781
+ dimensions = excluded.dimensions,
14782
+ vector_json = excluded.vector_json,
14783
+ created_at = excluded.created_at
14784
+ `);
14785
+ const insertVector = db.prepare(`
14786
+ INSERT INTO vector_index_entries (
14787
+ id, chunk_id, source_revision_id, provider, model, dimensions, vector_json, vector_norm,
14788
+ source_uri, source_ref, revision, hash, start_offset, end_offset, token_count, status,
14789
+ metadata_json, created_at, updated_at
14790
+ )
14791
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
14792
+ ON CONFLICT(chunk_id, provider, model) DO UPDATE SET
14793
+ source_revision_id = excluded.source_revision_id,
14794
+ dimensions = excluded.dimensions,
14795
+ vector_json = excluded.vector_json,
14796
+ vector_norm = excluded.vector_norm,
14797
+ source_uri = excluded.source_uri,
14798
+ source_ref = excluded.source_ref,
14799
+ revision = excluded.revision,
14800
+ hash = excluded.hash,
14801
+ start_offset = excluded.start_offset,
14802
+ end_offset = excluded.end_offset,
14803
+ token_count = excluded.token_count,
14804
+ status = excluded.status,
14805
+ metadata_json = excluded.metadata_json,
14806
+ updated_at = excluded.updated_at
14807
+ `);
14808
+ const write = db.transaction(() => {
14809
+ for (let index = 0;index < rows.length; index += 1) {
14810
+ const row = rows[index];
14811
+ const vector = embedding.vectors[index];
14812
+ if (!vector)
14813
+ continue;
14814
+ const metadata = parseJsonObject(row.metadata_json);
14815
+ const provenance = provenanceForChunk(row);
14816
+ const sourceRef = provenance.source_ref ?? metadataString(metadata, ["source_ref"]);
14817
+ const sourceUri = provenance.source_uri ?? row.source_uri ?? metadataString(metadata, ["source_uri"]);
14818
+ const revision = provenance.revision ?? row.revision ?? metadataString(metadata, ["revision"]);
14819
+ const hash2 = provenance.hash ?? row.hash ?? metadataString(metadata, ["hash"]);
14820
+ const status = provenance.status ?? metadataString(metadata, ["status"]) ?? "active";
14821
+ const vectorJson = JSON.stringify(vector);
14822
+ insertEmbedding.run(stableId("emb", `${row.id}\x00${embedding.provider}\x00${embedding.model}`), row.id, embedding.provider, embedding.model, embedding.dimensions, vectorJson, now);
14823
+ insertVector.run(stableId("vec", `${row.id}\x00${embedding.provider}\x00${embedding.model}`), row.id, row.source_revision_id, embedding.provider, embedding.model, embedding.dimensions, vectorJson, vectorNorm(vector), sourceUri, sourceRef, revision, hash2, provenance.start_offset, provenance.end_offset, row.token_count, status, JSON.stringify({
14824
+ ...metadata,
14825
+ provenance,
14826
+ embedded_at: now
14827
+ }), now, now);
14828
+ }
14829
+ });
14830
+ write();
14831
+ return rows.length;
14832
+ }
14833
+ async function indexKnowledgeEmbeddings(options) {
14834
+ const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
14835
+ const parsed = parseModelRef(modelRef);
14836
+ if (parsed.provider !== "openai")
14837
+ throw new Error(`Embedding provider ${parsed.provider} is not supported yet.`);
14838
+ const now = (options.now ?? new Date).toISOString();
14839
+ const limit = Math.max(1, Math.min(options.limit ?? 100, 1000));
14840
+ migrateKnowledgeDb(options.dbPath);
14841
+ const readDb = openKnowledgeDb(options.dbPath);
14842
+ let rows;
14843
+ try {
14844
+ rows = selectCandidateChunks(readDb, {
14845
+ provider: parsed.provider,
14846
+ model: parsed.model,
14847
+ limit,
14848
+ sourceRevisionId: options.sourceRevisionId
14849
+ });
14850
+ } finally {
14851
+ readDb.close();
14852
+ }
14853
+ if (rows.length === 0) {
14854
+ return {
14855
+ provider: parsed.provider,
14856
+ model: parsed.model,
14857
+ dimensions: options.dimensions ?? embeddingConfig(options.config).dimensions ?? DEFAULT_EMBEDDING_DIMENSIONS,
14858
+ chunks_seen: 0,
14859
+ chunks_embedded: 0,
14860
+ embeddings_upserted: 0,
14861
+ vector_entries_upserted: 0,
14862
+ usage: { input_tokens: 0 }
14863
+ };
14864
+ }
14865
+ const embedding = await embedTexts(rows.map((row) => row.text), options);
14866
+ const writeDb = openKnowledgeDb(options.dbPath);
14867
+ try {
14868
+ const upserted = upsertVectors(writeDb, rows, embedding, now);
14869
+ return {
14870
+ provider: embedding.provider,
14871
+ model: embedding.model,
14872
+ dimensions: embedding.dimensions,
14873
+ chunks_seen: rows.length,
14874
+ chunks_embedded: rows.length,
14875
+ embeddings_upserted: upserted,
14876
+ vector_entries_upserted: upserted,
14877
+ usage: embedding.usage
14878
+ };
14879
+ } finally {
14880
+ writeDb.close();
14881
+ }
14882
+ }
14883
+ function embeddingIndexStatus(dbPath) {
14884
+ migrateKnowledgeDb(dbPath);
14885
+ const db = openKnowledgeDb(dbPath);
14886
+ try {
14887
+ const totalEmbeddings = db.query("SELECT COUNT(*) AS n FROM chunk_embeddings").get()?.n ?? 0;
14888
+ const totalVectorEntries = db.query("SELECT COUNT(*) AS n FROM vector_index_entries").get()?.n ?? 0;
14889
+ const indexes = db.query(`SELECT provider, model, dimensions, COUNT(*) AS entries, MAX(updated_at) AS updated_at
14890
+ FROM vector_index_entries
14891
+ GROUP BY provider, model, dimensions
14892
+ ORDER BY provider, model`).all();
14893
+ return {
14894
+ total_embeddings: totalEmbeddings,
14895
+ total_vector_entries: totalVectorEntries,
14896
+ indexes
14897
+ };
14898
+ } finally {
14899
+ db.close();
14900
+ }
14901
+ }
14902
+ async function searchVectorIndex(options) {
14903
+ const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
14904
+ const parsed = parseModelRef(modelRef);
14905
+ const limit = Math.max(1, Math.min(options.limit ?? 10, 100));
14906
+ const embedded = await embedTexts([options.query], options);
14907
+ const queryVector = embedded.vectors[0] ?? [];
14908
+ migrateKnowledgeDb(options.dbPath);
14909
+ const db = openKnowledgeDb(options.dbPath);
14910
+ try {
14911
+ const rows = db.query(`SELECT
14912
+ v.chunk_id,
14913
+ c.text,
14914
+ v.vector_json,
14915
+ v.vector_norm,
14916
+ v.source_uri,
14917
+ v.source_ref,
14918
+ v.revision,
14919
+ v.hash,
14920
+ v.metadata_json
14921
+ FROM vector_index_entries v
14922
+ JOIN chunks c ON c.id = v.chunk_id
14923
+ WHERE v.provider = ? AND v.model = ? AND v.status = 'active'`).all(parsed.provider, parsed.model);
14924
+ const scored = rows.map((row) => {
14925
+ const vector = JSON.parse(row.vector_json);
14926
+ const metadata = parseJsonObject(row.metadata_json);
14927
+ const provenance = metadata.provenance && typeof metadata.provenance === "object" && !Array.isArray(metadata.provenance) ? metadata.provenance : null;
14928
+ return {
14929
+ chunk_id: row.chunk_id,
14930
+ score: cosineSimilarity(queryVector, vector, row.vector_norm),
14931
+ text: row.text,
14932
+ source_uri: row.source_uri,
14933
+ source_ref: row.source_ref,
14934
+ revision: row.revision,
14935
+ hash: row.hash,
14936
+ provenance
14937
+ };
14938
+ }).sort((a, b) => b.score - a.score).slice(0, limit);
14939
+ return {
14940
+ provider: parsed.provider,
14941
+ model: parsed.model,
14942
+ dimensions: embedded.dimensions,
14943
+ query: options.query,
14944
+ results: scored
14397
14945
  };
14398
14946
  } finally {
14399
14947
  db.close();
14400
14948
  }
14401
14949
  }
14402
14950
 
14951
+ // src/outbox-consume.ts
14952
+ import { createHash as createHash3, randomUUID as randomUUID3 } from "crypto";
14953
+ import { existsSync as existsSync4, readFileSync as readFileSync4 } from "fs";
14954
+ import { basename } from "path";
14955
+
14403
14956
  // src/safety.ts
14404
- import { createHash, randomUUID as randomUUID2 } from "crypto";
14957
+ import { createHash as createHash2, randomUUID as randomUUID2 } from "crypto";
14405
14958
  import { relative as relative2, resolve as resolve2, sep as sep2 } from "path";
14406
14959
  function envEnabled(name) {
14407
14960
  const value = process.env[name];
@@ -14496,7 +15049,7 @@ function redactSecrets(text, policy) {
14496
15049
  return { text: output, findings };
14497
15050
  }
14498
15051
  function auditId(input) {
14499
- return `audit_${createHash("sha256").update(`${input.event_type}\x00${input.action}\x00${input.target_uri ?? ""}\x00${input.created_at ?? ""}\x00${JSON.stringify(input.metadata ?? {})}\x00${randomUUID2()}`).digest("hex").slice(0, 24)}`;
15052
+ return `audit_${createHash2("sha256").update(`${input.event_type}\x00${input.action}\x00${input.target_uri ?? ""}\x00${input.created_at ?? ""}\x00${JSON.stringify(input.metadata ?? {})}\x00${randomUUID2()}`).digest("hex").slice(0, 24)}`;
14500
15053
  }
14501
15054
  function recordAuditEvent(db, input) {
14502
15055
  const createdAt = input.created_at ?? new Date().toISOString();
@@ -14531,8 +15084,8 @@ function recordRedactionFindings(db, input) {
14531
15084
  }
14532
15085
 
14533
15086
  // src/outbox-consume.ts
14534
- function stableId(prefix, value) {
14535
- return `${prefix}_${createHash2("sha256").update(value).digest("hex").slice(0, 20)}`;
15087
+ function stableId2(prefix, value) {
15088
+ return `${prefix}_${createHash3("sha256").update(value).digest("hex").slice(0, 20)}`;
14536
15089
  }
14537
15090
  function asObject(value) {
14538
15091
  return value && typeof value === "object" && !Array.isArray(value) ? value : undefined;
@@ -14686,7 +15239,7 @@ function mergeJson(existing, patch) {
14686
15239
  return JSON.stringify({ ...base, ...patch });
14687
15240
  }
14688
15241
  function ensureSource(db, event, now) {
14689
- const id = stableId("src", event.sourceUri);
15242
+ const id = stableId2("src", event.sourceUri);
14690
15243
  db.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
14691
15244
  VALUES (?, ?, ?, ?, ?, ?, ?, ?)
14692
15245
  ON CONFLICT(uri) DO UPDATE SET
@@ -14727,7 +15280,7 @@ function ensureSource(db, event, now) {
14727
15280
  function ensureRevision(db, sourceId, event, now) {
14728
15281
  if (!event.revision)
14729
15282
  return null;
14730
- const id = stableId("rev", `${sourceId}\x00${event.revision}`);
15283
+ const id = stableId2("rev", `${sourceId}\x00${event.revision}`);
14731
15284
  const metadata = {
14732
15285
  source_ref: event.sourceRef,
14733
15286
  source_uri: event.sourceUri,
@@ -14755,16 +15308,20 @@ function revisionIdsForEvent(db, sourceId, event) {
14755
15308
  function invalidateRevision(db, revisionId) {
14756
15309
  const chunks = db.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(revisionId);
14757
15310
  let embeddingsDeleted = 0;
15311
+ let vectorEntriesDeleted = 0;
14758
15312
  for (const chunk of chunks) {
14759
15313
  const row = db.query("SELECT COUNT(*) AS n FROM chunk_embeddings WHERE chunk_id = ?").get(chunk.id);
14760
15314
  embeddingsDeleted += row?.n ?? 0;
15315
+ const vectorRow = db.query("SELECT COUNT(*) AS n FROM vector_index_entries WHERE chunk_id = ?").get(chunk.id);
15316
+ vectorEntriesDeleted += vectorRow?.n ?? 0;
15317
+ db.run("DELETE FROM vector_index_entries WHERE chunk_id = ?", [chunk.id]);
14761
15318
  db.run("DELETE FROM chunk_embeddings WHERE chunk_id = ?", [chunk.id]);
14762
15319
  db.run("DELETE FROM chunks_fts WHERE chunk_id = ?", [chunk.id]);
14763
15320
  }
14764
15321
  db.run("DELETE FROM chunks WHERE source_revision_id = ?", [revisionId]);
14765
15322
  const revision = db.query("SELECT metadata_json FROM source_revisions WHERE id = ?").get(revisionId);
14766
15323
  db.run("UPDATE source_revisions SET metadata_json = ? WHERE id = ?", [mergeJson(revision?.metadata_json, { reindex_required: true, invalidated_at: new Date().toISOString() }), revisionId]);
14767
- return { chunksDeleted: chunks.length, embeddingsDeleted };
15324
+ return { chunksDeleted: chunks.length, embeddingsDeleted, vectorEntriesDeleted };
14768
15325
  }
14769
15326
  function isDeleteEvent(eventType2, status) {
14770
15327
  return status === "deleted" || ["delete", "deleted", "remove", "removed"].includes(eventType2);
@@ -14802,6 +15359,7 @@ async function consumeOpenFilesOutbox(options) {
14802
15359
  const revisionsTouched = new Set;
14803
15360
  let chunksDeleted = 0;
14804
15361
  let embeddingsDeleted = 0;
15362
+ let vectorEntriesDeleted = 0;
14805
15363
  let staleRevisions = 0;
14806
15364
  let deletedSources = 0;
14807
15365
  let movedSources = 0;
@@ -14827,6 +15385,7 @@ async function consumeOpenFilesOutbox(options) {
14827
15385
  const invalidation = invalidateRevision(db, revisionId);
14828
15386
  chunksDeleted += invalidation.chunksDeleted;
14829
15387
  embeddingsDeleted += invalidation.embeddingsDeleted;
15388
+ vectorEntriesDeleted += invalidation.vectorEntriesDeleted;
14830
15389
  staleRevisions += 1;
14831
15390
  }
14832
15391
  if (isDeleteEvent(event.eventType, event.status))
@@ -14837,7 +15396,7 @@ async function consumeOpenFilesOutbox(options) {
14837
15396
  permissionUpdates += 1;
14838
15397
  db.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
14839
15398
  VALUES (?, ?, ?, ?, ?, ?)`, [
14840
- stableId("evt", `${runId}\x00${index}\x00${event.sourceRef}\x00${event.eventType}`),
15399
+ stableId2("evt", `${runId}\x00${index}\x00${event.sourceRef}\x00${event.eventType}`),
14841
15400
  runId,
14842
15401
  "info",
14843
15402
  event.eventType,
@@ -14854,7 +15413,7 @@ async function consumeOpenFilesOutbox(options) {
14854
15413
  });
14855
15414
  db.run(`INSERT INTO provider_usage (id, run_id, provider, model, input_tokens, output_tokens, cost_usd, metadata_json, created_at)
14856
15415
  VALUES (?, ?, ?, ?, 0, 0, 0, ?, ?)`, [
14857
- stableId("usage", runId),
15416
+ stableId2("usage", runId),
14858
15417
  runId,
14859
15418
  "local",
14860
15419
  "open-files-outbox",
@@ -14865,87 +15424,45 @@ async function consumeOpenFilesOutbox(options) {
14865
15424
  event_type: "write",
14866
15425
  action: "knowledge_outbox_invalidation",
14867
15426
  target_uri: options.dbPath,
14868
- decision: "allow",
14869
- metadata: {
14870
- run_id: runId,
14871
- events: events.length,
14872
- sources: sourcesTouched.size,
14873
- revisions: revisionsTouched.size,
14874
- chunks_deleted: chunksDeleted,
14875
- embeddings_deleted: embeddingsDeleted
14876
- },
14877
- created_at: now
14878
- });
14879
- return {
14880
- path: options.input,
14881
- db_path: options.dbPath,
14882
- run_id: runId,
14883
- events_seen: events.length,
14884
- sources_touched: sourcesTouched.size,
14885
- revisions_touched: revisionsTouched.size,
14886
- chunks_deleted: chunksDeleted,
14887
- embeddings_deleted: embeddingsDeleted,
14888
- stale_revisions: staleRevisions,
14889
- deleted_sources: deletedSources,
14890
- moved_sources: movedSources,
14891
- permission_updates: permissionUpdates
14892
- };
14893
- })();
14894
- } finally {
14895
- db.close();
14896
- }
14897
- }
14898
-
14899
- // src/manifest-ingest.ts
14900
- import { createHash as createHash3 } from "crypto";
14901
- import { existsSync as existsSync5, readFileSync as readFileSync5 } from "fs";
14902
- import { basename as basename2 } from "path";
14903
-
14904
- // src/provenance.ts
14905
- function isStaleStatus(status) {
14906
- return ["deleted", "stale", "invalidated", "reindex_required"].includes((status ?? "").toLowerCase());
14907
- }
14908
- function sourceProvenance(input) {
14909
- const status = input.status ?? null;
14910
- return {
14911
- source_owner: "open-files",
14912
- source_ref: input.source_ref ?? null,
14913
- source_uri: input.source_uri ?? null,
14914
- source_kind: input.source_kind ?? null,
14915
- source_revision_id: input.source_revision_id ?? null,
14916
- revision: input.revision ?? null,
14917
- hash: input.hash ?? null,
14918
- chunk_id: input.chunk_id ?? null,
14919
- start_offset: input.start_offset ?? null,
14920
- end_offset: input.end_offset ?? null,
14921
- status,
14922
- read_only: true,
14923
- citation_required: true,
14924
- resolver: input.resolver ?? null,
14925
- stale: isStaleStatus(status)
14926
- };
14927
- }
14928
- function generatedArtifactProvenance(input) {
14929
- return {
14930
- source_owner: "open-files",
14931
- generated_from: input.generated_from,
14932
- artifact_key: input.artifact_key,
14933
- source_refs: input.source_refs ?? [],
14934
- read_only_sources: true,
14935
- citation_required: input.citation_required ?? true,
14936
- raw_source_bytes_stored_in_open_knowledge: false
14937
- };
14938
- }
14939
- function withProvenance(metadata, provenance) {
14940
- return {
14941
- ...metadata,
14942
- provenance
14943
- };
15427
+ decision: "allow",
15428
+ metadata: {
15429
+ run_id: runId,
15430
+ events: events.length,
15431
+ sources: sourcesTouched.size,
15432
+ revisions: revisionsTouched.size,
15433
+ chunks_deleted: chunksDeleted,
15434
+ embeddings_deleted: embeddingsDeleted,
15435
+ vector_entries_deleted: vectorEntriesDeleted
15436
+ },
15437
+ created_at: now
15438
+ });
15439
+ return {
15440
+ path: options.input,
15441
+ db_path: options.dbPath,
15442
+ run_id: runId,
15443
+ events_seen: events.length,
15444
+ sources_touched: sourcesTouched.size,
15445
+ revisions_touched: revisionsTouched.size,
15446
+ chunks_deleted: chunksDeleted,
15447
+ embeddings_deleted: embeddingsDeleted,
15448
+ vector_entries_deleted: vectorEntriesDeleted,
15449
+ stale_revisions: staleRevisions,
15450
+ deleted_sources: deletedSources,
15451
+ moved_sources: movedSources,
15452
+ permission_updates: permissionUpdates
15453
+ };
15454
+ })();
15455
+ } finally {
15456
+ db.close();
15457
+ }
14944
15458
  }
14945
15459
 
14946
15460
  // src/manifest-ingest.ts
14947
- function stableId2(prefix, value) {
14948
- return `${prefix}_${createHash3("sha256").update(value).digest("hex").slice(0, 20)}`;
15461
+ import { createHash as createHash4 } from "crypto";
15462
+ import { existsSync as existsSync5, readFileSync as readFileSync5 } from "fs";
15463
+ import { basename as basename2 } from "path";
15464
+ function stableId3(prefix, value) {
15465
+ return `${prefix}_${createHash4("sha256").update(value).digest("hex").slice(0, 20)}`;
14949
15466
  }
14950
15467
  function asObject2(value) {
14951
15468
  return value && typeof value === "object" && !Array.isArray(value) ? value : undefined;
@@ -15165,7 +15682,7 @@ function deleteChunksForRevision(db, sourceRevisionId) {
15165
15682
  return rows.length;
15166
15683
  }
15167
15684
  function upsertSource(db, item, now) {
15168
- const sourceId = stableId2("src", item.sourceUri);
15685
+ const sourceId = stableId3("src", item.sourceUri);
15169
15686
  db.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
15170
15687
  VALUES (?, ?, ?, ?, ?, ?, ?, ?)
15171
15688
  ON CONFLICT(uri) DO UPDATE SET
@@ -15189,7 +15706,7 @@ function upsertSource(db, item, now) {
15189
15706
  return row.id;
15190
15707
  }
15191
15708
  function upsertRevision(db, sourceId, item, now) {
15192
- const revisionId = stableId2("rev", `${sourceId}\x00${item.revision}`);
15709
+ const revisionId = stableId3("rev", `${sourceId}\x00${item.revision}`);
15193
15710
  db.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
15194
15711
  VALUES (?, ?, ?, ?, ?, ?, ?)
15195
15712
  ON CONFLICT(source_id, revision) DO UPDATE SET
@@ -15231,7 +15748,7 @@ function insertChunks(db, sourceRevisionId, item, now, maxChars, overlapChars, s
15231
15748
  }
15232
15749
  const chunks = chunkText(redacted.text, maxChars, overlapChars);
15233
15750
  for (const chunk of chunks) {
15234
- const chunkId = stableId2("chk", `${sourceRevisionId}\x00${chunk.ordinal}\x00${chunk.text}`);
15751
+ const chunkId = stableId3("chk", `${sourceRevisionId}\x00${chunk.ordinal}\x00${chunk.text}`);
15235
15752
  const provenance = sourceProvenance({
15236
15753
  source_ref: item.sourceRef,
15237
15754
  source_uri: item.sourceUri,
@@ -15359,12 +15876,12 @@ async function ingestOpenFilesManifestItems(options) {
15359
15876
  }
15360
15877
 
15361
15878
  // src/source-ingest.ts
15362
- import { createHash as createHash4 } from "crypto";
15879
+ import { createHash as createHash5 } from "crypto";
15363
15880
  import { existsSync as existsSync6, readFileSync as readFileSync6 } from "fs";
15364
15881
  import { basename as basename3 } from "path";
15365
15882
 
15366
15883
  // src/source-resolver.ts
15367
- function parseJsonObject(value) {
15884
+ function parseJsonObject2(value) {
15368
15885
  if (!value)
15369
15886
  return {};
15370
15887
  try {
@@ -15374,7 +15891,7 @@ function parseJsonObject(value) {
15374
15891
  return {};
15375
15892
  }
15376
15893
  }
15377
- function metadataString(metadata, keys) {
15894
+ function metadataString2(metadata, keys) {
15378
15895
  for (const key of keys) {
15379
15896
  const value = metadata[key];
15380
15897
  if (typeof value === "string" && value.length > 0)
@@ -15382,7 +15899,7 @@ function metadataString(metadata, keys) {
15382
15899
  }
15383
15900
  return null;
15384
15901
  }
15385
- function metadataNumber(metadata, keys) {
15902
+ function metadataNumber2(metadata, keys) {
15386
15903
  for (const key of keys) {
15387
15904
  const value = metadata[key];
15388
15905
  if (typeof value === "number" && Number.isFinite(value))
@@ -15507,8 +16024,8 @@ async function resolveOpenFilesSource(options) {
15507
16024
  citations: []
15508
16025
  };
15509
16026
  }
15510
- const sourceMetadata = parseJsonObject(source.metadata_json);
15511
- const permissions = parseJsonObject(source.acl_json);
16027
+ const sourceMetadata = parseJsonObject2(source.metadata_json);
16028
+ const permissions = parseJsonObject2(source.acl_json);
15512
16029
  try {
15513
16030
  assertPurposeAllowed(permissions, purpose);
15514
16031
  } catch (error48) {
@@ -15528,22 +16045,22 @@ async function resolveOpenFilesSource(options) {
15528
16045
  throw error48;
15529
16046
  }
15530
16047
  const revision = selectRevision(db, source.id, requestedRevision);
15531
- const revisionMetadata = parseJsonObject(revision?.metadata_json);
16048
+ const revisionMetadata = parseJsonObject2(revision?.metadata_json);
15532
16049
  const totalChunks = countChunks(db, revision?.id ?? null);
15533
16050
  const rows = selectChunks(db, revision?.id ?? null, limit);
15534
16051
  const effectiveSourceRef = sourceRevisionRef(source.uri, revision, options.sourceRef);
15535
16052
  const chunks = rows.map((row) => {
15536
- const metadata = parseJsonObject(row.metadata_json);
16053
+ const metadata = parseJsonObject2(row.metadata_json);
15537
16054
  const evidence = {
15538
16055
  resolver: "open-files-read-only",
15539
16056
  mode: "local_catalog",
15540
16057
  purpose,
15541
16058
  read_only: true,
15542
- source_ref: metadataString(metadata, ["source_ref"]) ?? effectiveSourceRef,
16059
+ source_ref: metadataString2(metadata, ["source_ref"]) ?? effectiveSourceRef,
15543
16060
  source_uri: source.uri,
15544
16061
  source_revision_id: revision?.id ?? null,
15545
16062
  revision: revision?.revision ?? null,
15546
- hash: revision?.hash ?? metadataString(metadata, ["hash"]),
16063
+ hash: revision?.hash ?? metadataString2(metadata, ["hash"]),
15547
16064
  chunk_id: row.id,
15548
16065
  start_offset: row.start_offset,
15549
16066
  end_offset: row.end_offset,
@@ -15559,7 +16076,7 @@ async function resolveOpenFilesSource(options) {
15559
16076
  chunk_id: row.id,
15560
16077
  start_offset: row.start_offset,
15561
16078
  end_offset: row.end_offset,
15562
- status: metadataString(metadata, ["status"]),
16079
+ status: metadataString2(metadata, ["status"]),
15563
16080
  resolver: evidence.resolver
15564
16081
  });
15565
16082
  return {
@@ -15600,8 +16117,8 @@ async function resolveOpenFilesSource(options) {
15600
16117
  },
15601
16118
  created_at: resolvedAt
15602
16119
  });
15603
- const mime = metadataString(sourceMetadata, ["mime", "content_type"]) ?? metadataString(revisionMetadata, ["mime", "content_type"]);
15604
- const size = metadataNumber(sourceMetadata, ["size", "size_bytes"]) ?? metadataNumber(revisionMetadata, ["size", "size_bytes"]);
16120
+ const mime = metadataString2(sourceMetadata, ["mime", "content_type"]) ?? metadataString2(revisionMetadata, ["mime", "content_type"]);
16121
+ const size = metadataNumber2(sourceMetadata, ["size", "size_bytes"]) ?? metadataNumber2(revisionMetadata, ["size", "size_bytes"]);
15605
16122
  return {
15606
16123
  source_ref: effectiveSourceRef,
15607
16124
  source_uri: source.uri,
@@ -15634,12 +16151,12 @@ async function resolveOpenFilesSource(options) {
15634
16151
  content: {
15635
16152
  mime,
15636
16153
  size,
15637
- hash: revision?.hash ?? metadataString(sourceMetadata, ["hash", "checksum", "sha256"]),
16154
+ hash: revision?.hash ?? metadataString2(sourceMetadata, ["hash", "checksum", "sha256"]),
15638
16155
  text_available: totalChunks > 0,
15639
16156
  chunks_total: totalChunks,
15640
16157
  chunks_returned: chunks.length,
15641
16158
  char_count_returned: chunks.reduce((sum, chunk) => sum + chunk.text.length, 0),
15642
- extracted_text_ref: revision?.extracted_text_uri ?? metadataString(revisionMetadata, ["extracted_text_ref", "extracted_text_uri"]),
16159
+ extracted_text_ref: revision?.extracted_text_uri ?? metadataString2(revisionMetadata, ["extracted_text_ref", "extracted_text_uri"]),
15643
16160
  bytes_available: false,
15644
16161
  bytes_exposed: false
15645
16162
  },
@@ -15654,7 +16171,7 @@ async function resolveOpenFilesSource(options) {
15654
16171
 
15655
16172
  // src/source-ingest.ts
15656
16173
  function sha256Text(text) {
15657
- return `sha256:${createHash4("sha256").update(text).digest("hex")}`;
16174
+ return `sha256:${createHash5("sha256").update(text).digest("hex")}`;
15658
16175
  }
15659
16176
  function stripHtml(html) {
15660
16177
  return html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ").replace(/<[^>]+>/g, " ").replace(/&nbsp;/g, " ").replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/\s+\n/g, `
@@ -15876,131 +16393,381 @@ async function ingestSourceRef(options) {
15876
16393
  };
15877
16394
  }
15878
16395
 
15879
- // src/providers.ts
15880
- var DEFAULT_PROVIDER_SETTINGS = {
15881
- openai: {
15882
- api_key_env: "OPENAI_API_KEY",
15883
- default_model: "gpt-5.2"
15884
- },
15885
- anthropic: {
15886
- api_key_env: "ANTHROPIC_API_KEY",
15887
- default_model: "claude-sonnet-4-6"
15888
- },
15889
- deepseek: {
15890
- api_key_env: "DEEPSEEK_API_KEY",
15891
- default_model: "deepseek-chat"
16396
+ // src/search.ts
16397
+ function parseJsonObject3(value) {
16398
+ if (!value)
16399
+ return {};
16400
+ try {
16401
+ const parsed = JSON.parse(value);
16402
+ return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {};
16403
+ } catch {
16404
+ return {};
15892
16405
  }
15893
- };
15894
- var PROVIDER_CAPABILITIES = {
15895
- openai: {
15896
- text_generation: true,
15897
- structured_output: true,
15898
- tool_usage: true,
15899
- tool_streaming: true,
15900
- image_input: true,
15901
- native_web_search: true,
15902
- reasoning: true,
15903
- embeddings: true
15904
- },
15905
- anthropic: {
15906
- text_generation: true,
15907
- structured_output: true,
15908
- tool_usage: true,
15909
- tool_streaming: true,
15910
- image_input: true,
15911
- native_web_search: false,
15912
- reasoning: true,
15913
- embeddings: false
15914
- },
15915
- deepseek: {
15916
- text_generation: true,
15917
- structured_output: true,
15918
- tool_usage: true,
15919
- tool_streaming: true,
15920
- image_input: false,
15921
- native_web_search: false,
15922
- reasoning: true,
15923
- embeddings: false
16406
+ }
16407
+ function metadataString3(metadata, keys) {
16408
+ for (const key of keys) {
16409
+ const value = metadata[key];
16410
+ if (typeof value === "string" && value.length > 0)
16411
+ return value;
15924
16412
  }
15925
- };
15926
- var BUILTIN_ALIASES = {
15927
- default: "openai:gpt-5.2",
15928
- fast: "openai:gpt-5-mini",
15929
- reasoning: "anthropic:claude-opus-4-6",
15930
- sonnet: "anthropic:claude-sonnet-4-6",
15931
- deepseek: "deepseek:deepseek-chat",
15932
- "deepseek-reasoning": "deepseek:deepseek-reasoner"
15933
- };
15934
- function providerConfig(config2) {
15935
- return config2.providers ?? {};
16413
+ return null;
15936
16414
  }
15937
- function providerSettings(config2, provider) {
15938
- const configured = providerConfig(config2)[provider] ?? {};
15939
- return {
15940
- ...DEFAULT_PROVIDER_SETTINGS[provider],
15941
- ...configured
15942
- };
16415
+ function metadataNumber3(metadata, keys) {
16416
+ for (const key of keys) {
16417
+ const value = metadata[key];
16418
+ if (typeof value === "number" && Number.isFinite(value))
16419
+ return value;
16420
+ }
16421
+ return null;
15943
16422
  }
15944
- function modelAliases(config2) {
15945
- const configured = providerConfig(config2);
15946
- return {
15947
- ...BUILTIN_ALIASES,
15948
- ...configured.default_model ? { default: configured.default_model } : {},
15949
- ...configured.aliases ?? {}
15950
- };
16423
+ function unique(values) {
16424
+ return Array.from(new Set(values));
15951
16425
  }
15952
- function parseModelRef(modelRef) {
15953
- const [provider, ...rest] = modelRef.split(":");
15954
- const model = rest.join(":");
15955
- if (provider !== "openai" && provider !== "anthropic" && provider !== "deepseek") {
15956
- throw new Error(`Unsupported AI provider: ${provider}`);
15957
- }
15958
- if (!model)
15959
- throw new Error(`Invalid model ref: ${modelRef}. Expected provider:model.`);
15960
- return { provider, model };
16426
+ function queryTerms(query) {
16427
+ const terms = query.normalize("NFKC").toLowerCase().match(/[\p{L}\p{N}_]+/gu) ?? [];
16428
+ return unique(terms.filter((term) => term.length > 0)).slice(0, 16);
15961
16429
  }
15962
- function resolveModelRef(aliasOrRef, config2) {
15963
- const aliases = modelAliases(config2);
15964
- return aliases[aliasOrRef] ?? aliasOrRef;
16430
+ function ftsQueryForTerms(terms) {
16431
+ if (terms.length === 0)
16432
+ return null;
16433
+ return terms.map((term) => `${term}*`).join(" OR ");
15965
16434
  }
15966
- function listModelRegistry(config2) {
15967
- const aliases = modelAliases(config2);
15968
- return Object.entries(aliases).map(([alias, modelRef]) => {
15969
- const parsed = parseModelRef(modelRef);
15970
- return {
15971
- alias,
15972
- model_ref: modelRef,
15973
- provider: parsed.provider,
15974
- model: parsed.model,
15975
- default: alias === "default",
15976
- capabilities: PROVIDER_CAPABILITIES[parsed.provider]
15977
- };
16435
+ function escapeLikeTerm(term) {
16436
+ return term.replace(/[\\%_]/g, (char) => `\\${char}`);
16437
+ }
16438
+ function likeParams(terms, fieldsPerTerm) {
16439
+ return terms.flatMap((term) => Array.from({ length: fieldsPerTerm }, () => `%${escapeLikeTerm(term)}%`));
16440
+ }
16441
+ function scoreFromRank(rank, index) {
16442
+ const rankScore = Number.isFinite(rank) ? 1 / (1 + Math.abs(rank)) : 0;
16443
+ const orderScore = 1 / (1 + index);
16444
+ return roundScore(Math.max(rankScore, orderScore));
16445
+ }
16446
+ function catalogScore(haystack, terms) {
16447
+ if (terms.length === 0)
16448
+ return 0;
16449
+ const matched = terms.filter((term) => haystack.includes(term)).length;
16450
+ if (matched === 0)
16451
+ return 0;
16452
+ return roundScore(Math.min(0.85, 0.35 + matched / terms.length * 0.5));
16453
+ }
16454
+ function semanticScore(score) {
16455
+ return roundScore(Math.max(0, Math.min(1, (score + 1) / 2)));
16456
+ }
16457
+ function roundScore(score) {
16458
+ return Number(score.toFixed(6));
16459
+ }
16460
+ function combinedScore(scores, citation) {
16461
+ const keyword = scores.keyword ?? 0;
16462
+ const semantic = scores.semantic ?? 0;
16463
+ const catalog = scores.catalog ?? 0;
16464
+ const citationBoost = citation?.chunk_id ? 0.05 : 0;
16465
+ return roundScore(Math.min(1, keyword * 0.55 + semantic * 0.4 + catalog * 0.35 + citationBoost));
16466
+ }
16467
+ function existingProvenance(metadata) {
16468
+ const provenance = metadata.provenance;
16469
+ return provenance && typeof provenance === "object" && !Array.isArray(provenance) ? provenance : null;
16470
+ }
16471
+ function provenanceForChunk2(row) {
16472
+ const metadata = parseJsonObject3(row.chunk_metadata_json);
16473
+ const existing = existingProvenance(metadata);
16474
+ if (existing)
16475
+ return existing;
16476
+ if (!row.source_revision_id && !row.source_uri)
16477
+ return null;
16478
+ return sourceProvenance({
16479
+ source_ref: metadataString3(metadata, ["source_ref"]),
16480
+ source_uri: row.source_uri ?? metadataString3(metadata, ["source_uri"]),
16481
+ source_kind: row.source_kind ?? metadataString3(metadata, ["source_kind"]),
16482
+ source_revision_id: row.source_revision_id,
16483
+ revision: row.revision ?? metadataString3(metadata, ["revision"]),
16484
+ hash: row.hash ?? metadataString3(metadata, ["hash"]),
16485
+ chunk_id: row.chunk_id,
16486
+ start_offset: row.start_offset ?? metadataNumber3(metadata, ["start_offset"]),
16487
+ end_offset: row.end_offset ?? metadataNumber3(metadata, ["end_offset"]),
16488
+ status: metadataString3(metadata, ["status"]),
16489
+ resolver: "open-files-read-only"
15978
16490
  });
15979
16491
  }
15980
- function providerCredentialStatus(config2, env = process.env) {
15981
- return Object.keys(DEFAULT_PROVIDER_SETTINGS).map((provider) => {
15982
- const settings = providerSettings(config2, provider);
15983
- const configured = Boolean(env[settings.api_key_env]);
15984
- return {
15985
- provider,
15986
- api_key_env: settings.api_key_env,
15987
- configured,
15988
- source: configured ? "env" : "missing",
15989
- base_url: settings.base_url ?? null,
15990
- default_model: settings.default_model
15991
- };
16492
+ function selectFtsChunks(db, ftsQuery, limit) {
16493
+ if (!ftsQuery)
16494
+ return [];
16495
+ return db.query(`SELECT
16496
+ chunks_fts.chunk_id,
16497
+ c.kind AS chunk_kind,
16498
+ c.wiki_page_id,
16499
+ c.text,
16500
+ c.token_count,
16501
+ c.start_offset,
16502
+ c.end_offset,
16503
+ c.metadata_json AS chunk_metadata_json,
16504
+ c.source_revision_id,
16505
+ sr.revision,
16506
+ sr.hash,
16507
+ s.uri AS source_uri,
16508
+ s.kind AS source_kind,
16509
+ s.title AS source_title,
16510
+ wp.path AS wiki_path,
16511
+ wp.title AS wiki_title,
16512
+ wp.artifact_uri AS wiki_artifact_uri,
16513
+ wp.content_hash AS wiki_content_hash,
16514
+ wp.status AS wiki_status,
16515
+ wp.metadata_json AS wiki_metadata_json,
16516
+ bm25(chunks_fts) AS rank
16517
+ FROM chunks_fts
16518
+ JOIN chunks c ON c.id = chunks_fts.chunk_id
16519
+ LEFT JOIN source_revisions sr ON sr.id = c.source_revision_id
16520
+ LEFT JOIN sources s ON s.id = sr.source_id
16521
+ LEFT JOIN wiki_pages wp ON wp.id = c.wiki_page_id
16522
+ WHERE chunks_fts MATCH ?
16523
+ ORDER BY rank ASC
16524
+ LIMIT ?`).all(ftsQuery, limit);
16525
+ }
16526
+ function catalogWhere(fields, terms) {
16527
+ if (terms.length === 0)
16528
+ return "1 = 0";
16529
+ const clauses = terms.map(() => `(${fields.map((field) => `lower(COALESCE(${field}, '')) LIKE ? ESCAPE '\\'`).join(" OR ")})`);
16530
+ return clauses.join(" OR ");
16531
+ }
16532
+ function selectWikiPages(db, terms, limit) {
16533
+ const fields = ["path", "title", "artifact_uri", "metadata_json"];
16534
+ return db.query(`SELECT id, path, title, artifact_uri, content_hash, status, metadata_json
16535
+ FROM wiki_pages
16536
+ WHERE status = 'active' AND (${catalogWhere(fields, terms)})
16537
+ ORDER BY updated_at DESC
16538
+ LIMIT ?`).all(...likeParams(terms, fields.length), limit);
16539
+ }
16540
+ function selectKnowledgeIndexes(db, terms, limit) {
16541
+ const fields = ["kind", "name", "shard_key", "artifact_uri", "metadata_json"];
16542
+ return db.query(`SELECT id, kind, name, artifact_uri, shard_key, metadata_json
16543
+ FROM knowledge_indexes
16544
+ WHERE ${catalogWhere(fields, terms)}
16545
+ ORDER BY updated_at DESC
16546
+ LIMIT ?`).all(...likeParams(terms, fields.length), limit);
16547
+ }
16548
+ function chunkResult(row, keywordScore) {
16549
+ const metadata = parseJsonObject3(row.chunk_metadata_json);
16550
+ const provenance = provenanceForChunk2(row);
16551
+ const sourceRef = metadataString3(metadata, ["source_ref"]);
16552
+ const sourceUri = row.source_uri ?? metadataString3(metadata, ["source_uri"]);
16553
+ const isWiki = Boolean(row.wiki_page_id);
16554
+ const result = {
16555
+ kind: isWiki ? "wiki_chunk" : "source_chunk",
16556
+ id: row.chunk_id,
16557
+ title: isWiki ? row.wiki_title : row.source_title,
16558
+ text: row.text,
16559
+ score: 0,
16560
+ scores: { keyword: keywordScore },
16561
+ source: sourceUri || sourceRef ? {
16562
+ uri: sourceUri,
16563
+ ref: sourceRef,
16564
+ kind: row.source_kind ?? metadataString3(metadata, ["source_kind"]),
16565
+ revision: row.revision ?? metadataString3(metadata, ["revision"]),
16566
+ hash: row.hash ?? metadataString3(metadata, ["hash"])
16567
+ } : null,
16568
+ citation: {
16569
+ chunk_id: row.chunk_id,
16570
+ start_offset: row.start_offset,
16571
+ end_offset: row.end_offset
16572
+ },
16573
+ artifact: isWiki ? {
16574
+ uri: row.wiki_artifact_uri,
16575
+ path: row.wiki_path,
16576
+ hash: row.wiki_content_hash,
16577
+ shard_key: row.wiki_path
16578
+ } : null,
16579
+ provenance,
16580
+ reasons: ["keyword_match"]
16581
+ };
16582
+ result.score = combinedScore(result.scores, result.citation);
16583
+ return result;
16584
+ }
16585
+ function wikiPageResult(row, terms) {
16586
+ const metadata = parseJsonObject3(row.metadata_json);
16587
+ const score = catalogScore(`${row.path} ${row.title} ${row.artifact_uri ?? ""} ${row.metadata_json}`.toLowerCase(), terms);
16588
+ const result = {
16589
+ kind: "wiki_page",
16590
+ id: row.id,
16591
+ title: row.title,
16592
+ text: null,
16593
+ score: 0,
16594
+ scores: { catalog: score },
16595
+ source: null,
16596
+ citation: null,
16597
+ artifact: {
16598
+ uri: row.artifact_uri,
16599
+ path: row.path,
16600
+ hash: row.content_hash,
16601
+ shard_key: row.path
16602
+ },
16603
+ provenance: existingProvenance(metadata),
16604
+ reasons: ["wiki_catalog_match"]
16605
+ };
16606
+ result.score = combinedScore(result.scores, result.citation);
16607
+ return result;
16608
+ }
16609
+ function indexResult(row, terms) {
16610
+ const metadata = parseJsonObject3(row.metadata_json);
16611
+ const score = catalogScore(`${row.kind} ${row.name} ${row.shard_key ?? ""} ${row.artifact_uri ?? ""} ${row.metadata_json}`.toLowerCase(), terms);
16612
+ const result = {
16613
+ kind: "knowledge_index",
16614
+ id: row.id,
16615
+ title: row.name,
16616
+ text: null,
16617
+ score: 0,
16618
+ scores: { catalog: score },
16619
+ source: null,
16620
+ citation: null,
16621
+ artifact: {
16622
+ uri: row.artifact_uri,
16623
+ path: metadataString3(metadata, ["artifact_key"]),
16624
+ hash: metadataString3(metadata, ["content_hash"]),
16625
+ shard_key: row.shard_key
16626
+ },
16627
+ provenance: existingProvenance(metadata),
16628
+ reasons: ["index_catalog_match"]
16629
+ };
16630
+ result.score = combinedScore(result.scores, result.citation);
16631
+ return result;
16632
+ }
16633
+ function mergeResult(results, entry) {
16634
+ const key = `${entry.kind}:${entry.id}`;
16635
+ const existing = results.get(key);
16636
+ if (!existing) {
16637
+ results.set(key, entry);
16638
+ return;
16639
+ }
16640
+ existing.scores = {
16641
+ keyword: Math.max(existing.scores.keyword ?? 0, entry.scores.keyword ?? 0) || undefined,
16642
+ semantic: Math.max(existing.scores.semantic ?? 0, entry.scores.semantic ?? 0) || undefined,
16643
+ catalog: Math.max(existing.scores.catalog ?? 0, entry.scores.catalog ?? 0) || undefined
16644
+ };
16645
+ existing.reasons = unique([...existing.reasons, ...entry.reasons]);
16646
+ existing.text = existing.text ?? entry.text;
16647
+ existing.title = existing.title ?? entry.title;
16648
+ existing.source = existing.source ?? entry.source;
16649
+ existing.citation = existing.citation ?? entry.citation;
16650
+ existing.artifact = existing.artifact ?? entry.artifact;
16651
+ existing.provenance = existing.provenance ?? entry.provenance;
16652
+ existing.score = combinedScore(existing.scores, existing.citation);
16653
+ }
16654
+ function sortResults(results) {
16655
+ const kindOrder = {
16656
+ source_chunk: 0,
16657
+ wiki_chunk: 1,
16658
+ wiki_page: 2,
16659
+ knowledge_index: 3
16660
+ };
16661
+ return results.sort((a, b) => {
16662
+ if (b.score !== a.score)
16663
+ return b.score - a.score;
16664
+ return kindOrder[a.kind] - kindOrder[b.kind] || a.id.localeCompare(b.id);
15992
16665
  });
15993
16666
  }
15994
- function providerStatus(config2, env = process.env) {
16667
+ async function hybridSearch(options) {
16668
+ const query = options.query.trim();
16669
+ if (!query)
16670
+ throw new Error("Search query is required.");
16671
+ const limit = Math.max(1, Math.min(options.limit ?? 10, 100));
16672
+ const terms = queryTerms(query);
16673
+ const ftsQuery = ftsQueryForTerms(terms);
16674
+ const semanticEnabled = options.semantic === true || options.fake === true || Boolean(options.modelRef);
16675
+ const warnings = [];
16676
+ let semanticProvider = null;
16677
+ let semanticModel = null;
16678
+ let semanticDimensions = null;
16679
+ let keywordCount = 0;
16680
+ let catalogCount = 0;
16681
+ let semanticCount = 0;
16682
+ const merged = new Map;
16683
+ migrateKnowledgeDb(options.dbPath);
16684
+ const db = openKnowledgeDb(options.dbPath);
16685
+ try {
16686
+ const ftsRows = selectFtsChunks(db, ftsQuery, Math.max(limit * 3, 20));
16687
+ keywordCount = ftsRows.length;
16688
+ ftsRows.forEach((row, index) => mergeResult(merged, chunkResult(row, scoreFromRank(row.rank, index))));
16689
+ const wikiRows = selectWikiPages(db, terms, Math.max(limit, 10));
16690
+ const indexRows = selectKnowledgeIndexes(db, terms, Math.max(limit, 10));
16691
+ catalogCount = wikiRows.length + indexRows.length;
16692
+ wikiRows.forEach((row) => mergeResult(merged, wikiPageResult(row, terms)));
16693
+ indexRows.forEach((row) => mergeResult(merged, indexResult(row, terms)));
16694
+ } finally {
16695
+ db.close();
16696
+ }
16697
+ if (semanticEnabled) {
16698
+ try {
16699
+ const semantic = await searchVectorIndex({
16700
+ dbPath: options.dbPath,
16701
+ query,
16702
+ limit: Math.max(limit * 3, 20),
16703
+ config: options.config,
16704
+ env: options.env,
16705
+ modelRef: options.modelRef,
16706
+ dimensions: options.dimensions,
16707
+ fake: options.fake,
16708
+ batchSize: options.batchSize,
16709
+ maxParallelCalls: options.maxParallelCalls
16710
+ });
16711
+ semanticProvider = semantic.provider;
16712
+ semanticModel = semantic.model;
16713
+ semanticDimensions = semantic.dimensions;
16714
+ semanticCount = semantic.results.length;
16715
+ for (const row of semantic.results) {
16716
+ const result = {
16717
+ kind: "source_chunk",
16718
+ id: row.chunk_id,
16719
+ title: null,
16720
+ text: row.text,
16721
+ score: 0,
16722
+ scores: { semantic: semanticScore(row.score) },
16723
+ source: {
16724
+ uri: row.source_uri,
16725
+ ref: row.source_ref,
16726
+ kind: row.provenance?.source_kind ?? null,
16727
+ revision: row.revision,
16728
+ hash: row.hash
16729
+ },
16730
+ citation: {
16731
+ chunk_id: row.chunk_id,
16732
+ start_offset: row.provenance?.start_offset ?? null,
16733
+ end_offset: row.provenance?.end_offset ?? null
16734
+ },
16735
+ artifact: null,
16736
+ provenance: row.provenance,
16737
+ reasons: ["semantic_match"]
16738
+ };
16739
+ result.score = combinedScore(result.scores, result.citation);
16740
+ mergeResult(merged, result);
16741
+ }
16742
+ } catch (error48) {
16743
+ warnings.push(`semantic_search_failed: ${error48 instanceof Error ? error48.message : String(error48)}`);
16744
+ }
16745
+ }
16746
+ const results = sortResults(Array.from(merged.values())).slice(0, limit);
15995
16747
  return {
15996
- default_model: resolveModelRef("default", config2),
15997
- providers: providerCredentialStatus(config2, env),
15998
- models: listModelRegistry(config2)
16748
+ query,
16749
+ limit,
16750
+ mode: {
16751
+ keyword: true,
16752
+ catalog: true,
16753
+ semantic: semanticEnabled
16754
+ },
16755
+ semantic_provider: semanticProvider,
16756
+ semantic_model: semanticModel,
16757
+ semantic_dimensions: semanticDimensions,
16758
+ counts: {
16759
+ keyword_results: keywordCount,
16760
+ catalog_results: catalogCount,
16761
+ semantic_results: semanticCount,
16762
+ merged_results: results.length
16763
+ },
16764
+ warnings,
16765
+ results
15999
16766
  };
16000
16767
  }
16001
16768
 
16002
16769
  // src/storage-contract.ts
16003
- import { createHash as createHash5, randomUUID as randomUUID4 } from "crypto";
16770
+ import { createHash as createHash6, randomUUID as randomUUID4 } from "crypto";
16004
16771
  var GENERATED_ARTIFACTS = [
16005
16772
  {
16006
16773
  kind: "schema",
@@ -16036,7 +16803,7 @@ var GENERATED_ARTIFACTS = [
16036
16803
  function hashArtifactBody(body) {
16037
16804
  const bytes = typeof body === "string" ? Buffer.from(body) : Buffer.from(body);
16038
16805
  return {
16039
- hash: `sha256:${createHash5("sha256").update(bytes).digest("hex")}`,
16806
+ hash: `sha256:${createHash6("sha256").update(bytes).digest("hex")}`,
16040
16807
  size_bytes: bytes.byteLength
16041
16808
  };
16042
16809
  }
@@ -16171,15 +16938,19 @@ function recordStorageObjects(db, objects, now = new Date) {
16171
16938
  }
16172
16939
 
16173
16940
  // src/wiki-layout.ts
16174
- import { createHash as createHash6 } from "crypto";
16941
+ import { createHash as createHash7 } from "crypto";
16175
16942
  function todayParts(now) {
16176
16943
  const year = String(now.getUTCFullYear());
16177
16944
  const month = String(now.getUTCMonth() + 1).padStart(2, "0");
16178
16945
  const day = String(now.getUTCDate()).padStart(2, "0");
16179
16946
  return { year, month, day };
16180
16947
  }
16181
- function stableId3(prefix, value) {
16182
- return `${prefix}_${createHash6("sha256").update(value).digest("hex").slice(0, 20)}`;
16948
+ function stableId4(prefix, value) {
16949
+ return `${prefix}_${createHash7("sha256").update(value).digest("hex").slice(0, 20)}`;
16950
+ }
16951
+ function estimateTokenCount2(text) {
16952
+ const words = text.trim().split(/\s+/).filter(Boolean).length;
16953
+ return Math.max(1, Math.ceil(words * 1.25));
16183
16954
  }
16184
16955
  function agentSchemaTemplate() {
16185
16956
  return `# Knowledge Agent Schema v1
@@ -16291,6 +17062,33 @@ function provenanceFor(artifact) {
16291
17062
  artifact_key: artifact.key
16292
17063
  });
16293
17064
  }
17065
+ function recordWikiChunk(db, pageId, title, artifact, body, now) {
17066
+ const provenance = provenanceFor(artifact);
17067
+ const chunkId = stableId4("chk", `${pageId}\x00${artifact.hash ?? artifact.uri}`);
17068
+ const existing = db.query("SELECT id FROM chunks WHERE wiki_page_id = ?").all(pageId);
17069
+ for (const row of existing)
17070
+ db.run("DELETE FROM chunks_fts WHERE chunk_id = ?", [row.id]);
17071
+ db.run("DELETE FROM chunks WHERE wiki_page_id = ?", [pageId]);
17072
+ db.run(`INSERT INTO chunks (id, wiki_page_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
17073
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
17074
+ chunkId,
17075
+ pageId,
17076
+ "wiki",
17077
+ 0,
17078
+ body,
17079
+ estimateTokenCount2(body),
17080
+ 0,
17081
+ body.length,
17082
+ JSON.stringify({
17083
+ artifact_key: artifact.key,
17084
+ artifact_uri: artifact.uri,
17085
+ content_hash: artifact.hash ?? null,
17086
+ provenance
17087
+ }),
17088
+ now
17089
+ ]);
17090
+ db.run("INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)", [chunkId, body, title, artifact.uri]);
17091
+ }
16294
17092
  function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
16295
17093
  const timestamp = now.toISOString();
16296
17094
  const rootIndex = artifacts.find((artifact) => artifact.key.endsWith("indexes/root.md"));
@@ -16302,7 +17100,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
16302
17100
  artifact_uri = excluded.artifact_uri,
16303
17101
  metadata_json = excluded.metadata_json,
16304
17102
  updated_at = excluded.updated_at`, [
16305
- stableId3("idx", "root:indexes/root.md"),
17103
+ stableId4("idx", "root:indexes/root.md"),
16306
17104
  "root",
16307
17105
  "root",
16308
17106
  rootIndex.uri,
@@ -16317,6 +17115,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
16317
17115
  ]);
16318
17116
  }
16319
17117
  if (wikiReadme) {
17118
+ const wikiPageId = stableId4("wiki", "wiki/README.md");
16320
17119
  db.run(`INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
16321
17120
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
16322
17121
  ON CONFLICT(path) DO UPDATE SET
@@ -16326,7 +17125,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
16326
17125
  status = excluded.status,
16327
17126
  metadata_json = excluded.metadata_json,
16328
17127
  updated_at = excluded.updated_at`, [
16329
- stableId3("wiki", "wiki/README.md"),
17128
+ wikiPageId,
16330
17129
  "wiki/README.md",
16331
17130
  "Wiki",
16332
17131
  wikiReadme.uri,
@@ -16339,6 +17138,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
16339
17138
  timestamp,
16340
17139
  timestamp
16341
17140
  ]);
17141
+ recordWikiChunk(db, wikiPageId, "Wiki", wikiReadme, wikiReadmeTemplate(), timestamp);
16342
17142
  }
16343
17143
  }
16344
17144
 
@@ -16467,6 +17267,34 @@ class KnowledgeService {
16467
17267
  modelRegistry() {
16468
17268
  return listModelRegistry(this.config());
16469
17269
  }
17270
+ embeddingStatus() {
17271
+ const workspace = this.ensureWorkspace();
17272
+ return embeddingIndexStatus(workspace.knowledgeDbPath);
17273
+ }
17274
+ async indexEmbeddings(options = {}) {
17275
+ const workspace = this.ensureWorkspace();
17276
+ return indexKnowledgeEmbeddings({
17277
+ ...options,
17278
+ dbPath: workspace.knowledgeDbPath,
17279
+ config: this.config()
17280
+ });
17281
+ }
17282
+ async semanticSearch(options) {
17283
+ const workspace = this.ensureWorkspace();
17284
+ return searchVectorIndex({
17285
+ ...options,
17286
+ dbPath: workspace.knowledgeDbPath,
17287
+ config: this.config()
17288
+ });
17289
+ }
17290
+ async search(options) {
17291
+ const workspace = this.ensureWorkspace();
17292
+ return hybridSearch({
17293
+ ...options,
17294
+ dbPath: workspace.knowledgeDbPath,
17295
+ config: this.config()
17296
+ });
17297
+ }
16470
17298
  }
16471
17299
  function createKnowledgeService(options = {}) {
16472
17300
  return new KnowledgeService(options);
@@ -16581,6 +17409,57 @@ function buildServer() {
16581
17409
  const service = createKnowledgeService({ scope });
16582
17410
  return jsonText({ ok: true, models: service.modelRegistry() });
16583
17411
  });
17412
+ registerTool(server, "ok_embeddings_status", "Embedding index status", "Inspect local embedding/vector index counts by provider and model", {
17413
+ scope: scopeField
17414
+ }, async ({ scope }) => {
17415
+ const service = createKnowledgeService({ scope });
17416
+ return jsonText({ ok: true, ...service.embeddingStatus() });
17417
+ });
17418
+ registerTool(server, "ok_embeddings_index", "Index embeddings", "Embed unindexed knowledge chunks into the local vector index", {
17419
+ scope: scopeField,
17420
+ limit: exports_external.number().optional().describe("Maximum chunks to embed"),
17421
+ model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
17422
+ dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
17423
+ fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
17424
+ }, async ({ scope, limit, model, dimensions, fake }) => {
17425
+ const service = createKnowledgeService({ scope });
17426
+ try {
17427
+ return jsonText({ ok: true, ...await service.indexEmbeddings({ limit, modelRef: model, dimensions, fake }) });
17428
+ } catch (error48) {
17429
+ return errorText(error48 instanceof Error ? error48.message : String(error48));
17430
+ }
17431
+ });
17432
+ registerTool(server, "ok_semantic_search", "Semantic search", "Search the local vector index and return cited chunks with provenance", {
17433
+ scope: scopeField,
17434
+ query: exports_external.string().describe("Semantic query"),
17435
+ limit: exports_external.number().optional().describe("Maximum results"),
17436
+ model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
17437
+ dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
17438
+ fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
17439
+ }, async ({ scope, query, limit, model, dimensions, fake }) => {
17440
+ const service = createKnowledgeService({ scope });
17441
+ try {
17442
+ return jsonText({ ok: true, ...await service.semanticSearch({ query, limit, modelRef: model, dimensions, fake }) });
17443
+ } catch (error48) {
17444
+ return errorText(error48 instanceof Error ? error48.message : String(error48));
17445
+ }
17446
+ });
17447
+ registerTool(server, "ok_search", "Hybrid knowledge search", "Search source chunks, generated wiki pages, sharded indexes, and optional semantic vectors", {
17448
+ scope: scopeField,
17449
+ query: exports_external.string().describe("Search query"),
17450
+ limit: exports_external.number().optional().describe("Maximum results"),
17451
+ semantic: exports_external.boolean().optional().describe("Include vector semantic results"),
17452
+ model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
17453
+ dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
17454
+ fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
17455
+ }, async ({ scope, query, limit, semantic, model, dimensions, fake }) => {
17456
+ const service = createKnowledgeService({ scope });
17457
+ try {
17458
+ return jsonText({ ok: true, ...await service.search({ query, limit, semantic, modelRef: model, dimensions, fake }) });
17459
+ } catch (error48) {
17460
+ return errorText(error48 instanceof Error ? error48.message : String(error48));
17461
+ }
17462
+ });
16584
17463
  registerTool(server, "ok_add", "Add a knowledge item", "Add a new item to the knowledge store", {
16585
17464
  title: exports_external.string().describe("Item title"),
16586
17465
  content: exports_external.string().describe("Item content/body"),