@hasna/knowledge 0.2.13 → 0.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13660,7 +13660,7 @@ import { existsSync as existsSync7, readFileSync as readFileSync7, writeFileSync
13660
13660
  // package.json
13661
13661
  var package_default = {
13662
13662
  name: "@hasna/knowledge",
13663
- version: "0.2.13",
13663
+ version: "0.2.14",
13664
13664
  description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
13665
13665
  type: "module",
13666
13666
  bin: {
@@ -13790,6 +13790,12 @@ function defaultKnowledgeConfig() {
13790
13790
  default_model: "deepseek-chat"
13791
13791
  }
13792
13792
  },
13793
+ embeddings: {
13794
+ default_model: "openai:text-embedding-3-small",
13795
+ dimensions: 1536,
13796
+ batch_size: 64,
13797
+ max_parallel_calls: 4
13798
+ },
13793
13799
  safety: {
13794
13800
  network: {
13795
13801
  web_search_enabled: false,
@@ -14128,10 +14134,8 @@ function createArtifactStore(config2, workspace) {
14128
14134
  return new LocalArtifactStore(workspace.artifactsDir);
14129
14135
  }
14130
14136
 
14131
- // src/outbox-consume.ts
14132
- import { createHash as createHash2, randomUUID as randomUUID3 } from "crypto";
14133
- import { existsSync as existsSync4, readFileSync as readFileSync4 } from "fs";
14134
- import { basename } from "path";
14137
+ // src/embeddings.ts
14138
+ import { createHash } from "crypto";
14135
14139
 
14136
14140
  // src/knowledge-db.ts
14137
14141
  import { Database } from "bun:sqlite";
@@ -14349,6 +14353,38 @@ CREATE INDEX IF NOT EXISTS idx_approval_gates_status ON approval_gates(status);
14349
14353
  INSERT OR IGNORE INTO schema_versions(version, applied_at)
14350
14354
  VALUES (3, datetime('now'));
14351
14355
  `;
14356
+ var MIGRATION_4 = `
14357
+ CREATE TABLE IF NOT EXISTS vector_index_entries (
14358
+ id TEXT PRIMARY KEY,
14359
+ chunk_id TEXT NOT NULL REFERENCES chunks(id) ON DELETE CASCADE,
14360
+ source_revision_id TEXT REFERENCES source_revisions(id) ON DELETE CASCADE,
14361
+ provider TEXT NOT NULL,
14362
+ model TEXT NOT NULL,
14363
+ dimensions INTEGER NOT NULL,
14364
+ vector_json TEXT NOT NULL,
14365
+ vector_norm REAL NOT NULL,
14366
+ source_uri TEXT,
14367
+ source_ref TEXT,
14368
+ revision TEXT,
14369
+ hash TEXT,
14370
+ start_offset INTEGER,
14371
+ end_offset INTEGER,
14372
+ token_count INTEGER,
14373
+ status TEXT NOT NULL DEFAULT 'active',
14374
+ metadata_json TEXT NOT NULL DEFAULT '{}',
14375
+ created_at TEXT NOT NULL,
14376
+ updated_at TEXT NOT NULL,
14377
+ UNIQUE(chunk_id, provider, model)
14378
+ );
14379
+
14380
+ CREATE INDEX IF NOT EXISTS idx_vector_index_provider_model ON vector_index_entries(provider, model);
14381
+ CREATE INDEX IF NOT EXISTS idx_vector_index_source_revision ON vector_index_entries(source_revision_id);
14382
+ CREATE INDEX IF NOT EXISTS idx_vector_index_source_uri ON vector_index_entries(source_uri);
14383
+ CREATE INDEX IF NOT EXISTS idx_vector_index_status ON vector_index_entries(status);
14384
+
14385
+ INSERT OR IGNORE INTO schema_versions(version, applied_at)
14386
+ VALUES (4, datetime('now'));
14387
+ `;
14352
14388
  function openKnowledgeDb(path) {
14353
14389
  ensureParentDir(path);
14354
14390
  const db = new Database(path);
@@ -14364,6 +14400,8 @@ function migrateKnowledgeDb(path) {
14364
14400
  db.exec(MIGRATION_2);
14365
14401
  if (getSchemaVersion(db) < 3)
14366
14402
  db.exec(MIGRATION_3);
14403
+ if (getSchemaVersion(db) < 4)
14404
+ db.exec(MIGRATION_4);
14367
14405
  return { path, schema_version: getSchemaVersion(db) };
14368
14406
  } finally {
14369
14407
  db.close();
@@ -14393,15 +14431,530 @@ function getKnowledgeDbStats(path) {
14393
14431
  redaction_findings: count(db, "redaction_findings"),
14394
14432
  audit_events: count(db, "audit_events"),
14395
14433
  approval_gates: count(db, "approval_gates"),
14396
- storage_objects: count(db, "storage_objects")
14434
+ storage_objects: count(db, "storage_objects"),
14435
+ embeddings: count(db, "chunk_embeddings"),
14436
+ vector_entries: count(db, "vector_index_entries")
14437
+ };
14438
+ } finally {
14439
+ db.close();
14440
+ }
14441
+ }
14442
+
14443
+ // src/providers.ts
14444
+ var DEFAULT_PROVIDER_SETTINGS = {
14445
+ openai: {
14446
+ api_key_env: "OPENAI_API_KEY",
14447
+ default_model: "gpt-5.2"
14448
+ },
14449
+ anthropic: {
14450
+ api_key_env: "ANTHROPIC_API_KEY",
14451
+ default_model: "claude-sonnet-4-6"
14452
+ },
14453
+ deepseek: {
14454
+ api_key_env: "DEEPSEEK_API_KEY",
14455
+ default_model: "deepseek-chat"
14456
+ }
14457
+ };
14458
+ var PROVIDER_CAPABILITIES = {
14459
+ openai: {
14460
+ text_generation: true,
14461
+ structured_output: true,
14462
+ tool_usage: true,
14463
+ tool_streaming: true,
14464
+ image_input: true,
14465
+ native_web_search: true,
14466
+ reasoning: true,
14467
+ embeddings: true
14468
+ },
14469
+ anthropic: {
14470
+ text_generation: true,
14471
+ structured_output: true,
14472
+ tool_usage: true,
14473
+ tool_streaming: true,
14474
+ image_input: true,
14475
+ native_web_search: false,
14476
+ reasoning: true,
14477
+ embeddings: false
14478
+ },
14479
+ deepseek: {
14480
+ text_generation: true,
14481
+ structured_output: true,
14482
+ tool_usage: true,
14483
+ tool_streaming: true,
14484
+ image_input: false,
14485
+ native_web_search: false,
14486
+ reasoning: true,
14487
+ embeddings: false
14488
+ }
14489
+ };
14490
+ var BUILTIN_ALIASES = {
14491
+ default: "openai:gpt-5.2",
14492
+ fast: "openai:gpt-5-mini",
14493
+ reasoning: "anthropic:claude-opus-4-6",
14494
+ sonnet: "anthropic:claude-sonnet-4-6",
14495
+ deepseek: "deepseek:deepseek-chat",
14496
+ "deepseek-reasoning": "deepseek:deepseek-reasoner"
14497
+ };
14498
+ function providerConfig(config2) {
14499
+ return config2.providers ?? {};
14500
+ }
14501
+ function providerSettings(config2, provider) {
14502
+ const configured = providerConfig(config2)[provider] ?? {};
14503
+ return {
14504
+ ...DEFAULT_PROVIDER_SETTINGS[provider],
14505
+ ...configured
14506
+ };
14507
+ }
14508
+ function modelAliases(config2) {
14509
+ const configured = providerConfig(config2);
14510
+ return {
14511
+ ...BUILTIN_ALIASES,
14512
+ ...configured.default_model ? { default: configured.default_model } : {},
14513
+ ...configured.aliases ?? {}
14514
+ };
14515
+ }
14516
+ function parseModelRef(modelRef) {
14517
+ const [provider, ...rest] = modelRef.split(":");
14518
+ const model = rest.join(":");
14519
+ if (provider !== "openai" && provider !== "anthropic" && provider !== "deepseek") {
14520
+ throw new Error(`Unsupported AI provider: ${provider}`);
14521
+ }
14522
+ if (!model)
14523
+ throw new Error(`Invalid model ref: ${modelRef}. Expected provider:model.`);
14524
+ return { provider, model };
14525
+ }
14526
+ function resolveModelRef(aliasOrRef, config2) {
14527
+ const aliases = modelAliases(config2);
14528
+ return aliases[aliasOrRef] ?? aliasOrRef;
14529
+ }
14530
+ function listModelRegistry(config2) {
14531
+ const aliases = modelAliases(config2);
14532
+ return Object.entries(aliases).map(([alias, modelRef]) => {
14533
+ const parsed = parseModelRef(modelRef);
14534
+ return {
14535
+ alias,
14536
+ model_ref: modelRef,
14537
+ provider: parsed.provider,
14538
+ model: parsed.model,
14539
+ default: alias === "default",
14540
+ capabilities: PROVIDER_CAPABILITIES[parsed.provider]
14541
+ };
14542
+ });
14543
+ }
14544
+ function providerCredentialStatus(config2, env = process.env) {
14545
+ return Object.keys(DEFAULT_PROVIDER_SETTINGS).map((provider) => {
14546
+ const settings = providerSettings(config2, provider);
14547
+ const configured = Boolean(env[settings.api_key_env]);
14548
+ return {
14549
+ provider,
14550
+ api_key_env: settings.api_key_env,
14551
+ configured,
14552
+ source: configured ? "env" : "missing",
14553
+ base_url: settings.base_url ?? null,
14554
+ default_model: settings.default_model
14555
+ };
14556
+ });
14557
+ }
14558
+ function providerStatus(config2, env = process.env) {
14559
+ return {
14560
+ default_model: resolveModelRef("default", config2),
14561
+ providers: providerCredentialStatus(config2, env),
14562
+ models: listModelRegistry(config2)
14563
+ };
14564
+ }
14565
+ function assertProviderCredentials(provider, config2, env = process.env) {
14566
+ const status = providerCredentialStatus(config2, env).find((entry) => entry.provider === provider);
14567
+ if (!status)
14568
+ throw new Error(`Unsupported AI provider: ${provider}`);
14569
+ if (!status.configured)
14570
+ throw new Error(`Missing ${status.api_key_env} for ${provider}. Set the env var to use this provider.`);
14571
+ return status;
14572
+ }
14573
+
14574
+ // src/provenance.ts
14575
+ function isStaleStatus(status) {
14576
+ return ["deleted", "stale", "invalidated", "reindex_required"].includes((status ?? "").toLowerCase());
14577
+ }
14578
+ function sourceProvenance(input) {
14579
+ const status = input.status ?? null;
14580
+ return {
14581
+ source_owner: "open-files",
14582
+ source_ref: input.source_ref ?? null,
14583
+ source_uri: input.source_uri ?? null,
14584
+ source_kind: input.source_kind ?? null,
14585
+ source_revision_id: input.source_revision_id ?? null,
14586
+ revision: input.revision ?? null,
14587
+ hash: input.hash ?? null,
14588
+ chunk_id: input.chunk_id ?? null,
14589
+ start_offset: input.start_offset ?? null,
14590
+ end_offset: input.end_offset ?? null,
14591
+ status,
14592
+ read_only: true,
14593
+ citation_required: true,
14594
+ resolver: input.resolver ?? null,
14595
+ stale: isStaleStatus(status)
14596
+ };
14597
+ }
14598
+ function generatedArtifactProvenance(input) {
14599
+ return {
14600
+ source_owner: "open-files",
14601
+ generated_from: input.generated_from,
14602
+ artifact_key: input.artifact_key,
14603
+ source_refs: input.source_refs ?? [],
14604
+ read_only_sources: true,
14605
+ citation_required: input.citation_required ?? true,
14606
+ raw_source_bytes_stored_in_open_knowledge: false
14607
+ };
14608
+ }
14609
+ function withProvenance(metadata, provenance) {
14610
+ return {
14611
+ ...metadata,
14612
+ provenance
14613
+ };
14614
+ }
14615
+
14616
+ // src/embeddings.ts
14617
+ var DEFAULT_EMBEDDING_MODEL_REF = "openai:text-embedding-3-small";
14618
+ var DEFAULT_EMBEDDING_DIMENSIONS = 1536;
14619
+ function embeddingConfig(config2) {
14620
+ return config2?.embeddings ?? {};
14621
+ }
14622
+ function stableId(prefix, value) {
14623
+ return `${prefix}_${createHash("sha256").update(value).digest("hex").slice(0, 20)}`;
14624
+ }
14625
+ function parseJsonObject(value) {
14626
+ if (!value)
14627
+ return {};
14628
+ try {
14629
+ const parsed = JSON.parse(value);
14630
+ return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {};
14631
+ } catch {
14632
+ return {};
14633
+ }
14634
+ }
14635
+ function metadataString(metadata, keys) {
14636
+ for (const key of keys) {
14637
+ const value = metadata[key];
14638
+ if (typeof value === "string" && value.length > 0)
14639
+ return value;
14640
+ }
14641
+ return null;
14642
+ }
14643
+ function metadataNumber(metadata, keys) {
14644
+ for (const key of keys) {
14645
+ const value = metadata[key];
14646
+ if (typeof value === "number" && Number.isFinite(value))
14647
+ return value;
14648
+ }
14649
+ return null;
14650
+ }
14651
+ function vectorNorm(vector) {
14652
+ return Math.sqrt(vector.reduce((sum, value) => sum + value * value, 0));
14653
+ }
14654
+ function cosineSimilarity(a, b, bNorm = vectorNorm(b)) {
14655
+ const aNorm = vectorNorm(a);
14656
+ if (aNorm === 0 || bNorm === 0)
14657
+ return 0;
14658
+ const length = Math.min(a.length, b.length);
14659
+ let dot = 0;
14660
+ for (let i = 0;i < length; i += 1)
14661
+ dot += a[i] * b[i];
14662
+ return dot / (aNorm * bNorm);
14663
+ }
14664
+ function deterministicVector(text, dimensions) {
14665
+ const bytes = createHash("sha256").update(text).digest();
14666
+ return Array.from({ length: dimensions }, (_, index) => {
14667
+ const value = bytes[index % bytes.length] / 255;
14668
+ return Number((value * 2 - 1).toFixed(6));
14669
+ });
14670
+ }
14671
+ async function openAiEmbeddingModel(model, config2, env = process.env) {
14672
+ assertProviderCredentials("openai", config2, env);
14673
+ const settings = providerSettings(config2, "openai");
14674
+ const { createOpenAI } = await import("@ai-sdk/openai");
14675
+ const openai = createOpenAI({
14676
+ apiKey: env[settings.api_key_env],
14677
+ baseURL: settings.base_url
14678
+ });
14679
+ if (openai.embeddingModel)
14680
+ return openai.embeddingModel(model);
14681
+ if (openai.textEmbedding)
14682
+ return openai.textEmbedding(model);
14683
+ if (openai.textEmbeddingModel)
14684
+ return openai.textEmbeddingModel(model);
14685
+ throw new Error("OpenAI provider does not expose an embedding model factory.");
14686
+ }
14687
+ function resolveEmbeddingModelRef(modelRef, config2) {
14688
+ if (!modelRef || modelRef === "default" || modelRef === "embedding") {
14689
+ return embeddingConfig(config2).default_model ?? DEFAULT_EMBEDDING_MODEL_REF;
14690
+ }
14691
+ return modelRef;
14692
+ }
14693
+ async function embedTexts(texts, options = {}) {
14694
+ const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
14695
+ const parsed = parseModelRef(modelRef);
14696
+ if (parsed.provider !== "openai") {
14697
+ throw new Error(`Embedding provider ${parsed.provider} is not supported yet. Use openai:text-embedding-3-small.`);
14698
+ }
14699
+ const dimensions = options.dimensions ?? embeddingConfig(options.config).dimensions ?? DEFAULT_EMBEDDING_DIMENSIONS;
14700
+ if (options.fake) {
14701
+ return {
14702
+ provider: parsed.provider,
14703
+ model: parsed.model,
14704
+ dimensions,
14705
+ vectors: texts.map((text) => deterministicVector(text, dimensions)),
14706
+ usage: { input_tokens: texts.reduce((sum, text) => sum + Math.max(1, Math.ceil(text.split(/\s+/).filter(Boolean).length * 1.25)), 0) }
14707
+ };
14708
+ }
14709
+ const { embedMany } = await import("ai");
14710
+ const model = await openAiEmbeddingModel(parsed.model, options.config, options.env);
14711
+ const result = await embedMany({
14712
+ model,
14713
+ values: texts,
14714
+ maxParallelCalls: options.maxParallelCalls ?? embeddingConfig(options.config).max_parallel_calls,
14715
+ providerOptions: {
14716
+ openai: {
14717
+ dimensions
14718
+ }
14719
+ }
14720
+ });
14721
+ const vectors = result.embeddings;
14722
+ return {
14723
+ provider: parsed.provider,
14724
+ model: parsed.model,
14725
+ dimensions: vectors[0]?.length ?? dimensions,
14726
+ vectors,
14727
+ usage: { input_tokens: result.usage?.tokens ?? 0 }
14728
+ };
14729
+ }
14730
+ function selectCandidateChunks(db, options) {
14731
+ const baseQuery = `SELECT
14732
+ c.id,
14733
+ c.text,
14734
+ c.token_count,
14735
+ c.start_offset,
14736
+ c.end_offset,
14737
+ c.metadata_json,
14738
+ c.source_revision_id,
14739
+ sr.revision,
14740
+ sr.hash,
14741
+ s.uri AS source_uri,
14742
+ s.kind AS source_kind
14743
+ FROM chunks c
14744
+ LEFT JOIN source_revisions sr ON sr.id = c.source_revision_id
14745
+ LEFT JOIN sources s ON s.id = sr.source_id
14746
+ LEFT JOIN vector_index_entries v
14747
+ ON v.chunk_id = c.id AND v.provider = ? AND v.model = ?
14748
+ WHERE v.id IS NULL`;
14749
+ const suffix = `
14750
+ ORDER BY c.created_at ASC, c.ordinal ASC
14751
+ LIMIT ?`;
14752
+ if (options.sourceRevisionId) {
14753
+ return db.query(`${baseQuery} AND c.source_revision_id = ?${suffix}`).all(options.provider, options.model, options.sourceRevisionId, options.limit);
14754
+ }
14755
+ return db.query(`${baseQuery}${suffix}`).all(options.provider, options.model, options.limit);
14756
+ }
14757
+ function provenanceForChunk(row) {
14758
+ const metadata = parseJsonObject(row.metadata_json);
14759
+ const existing = metadata.provenance;
14760
+ if (existing && typeof existing === "object" && !Array.isArray(existing))
14761
+ return existing;
14762
+ return sourceProvenance({
14763
+ source_ref: metadataString(metadata, ["source_ref"]),
14764
+ source_uri: row.source_uri ?? metadataString(metadata, ["source_uri"]),
14765
+ source_kind: row.source_kind ?? metadataString(metadata, ["source_kind"]),
14766
+ source_revision_id: row.source_revision_id,
14767
+ revision: row.revision ?? metadataString(metadata, ["revision"]),
14768
+ hash: row.hash ?? metadataString(metadata, ["hash"]),
14769
+ chunk_id: row.id,
14770
+ start_offset: row.start_offset ?? metadataNumber(metadata, ["start_offset"]),
14771
+ end_offset: row.end_offset ?? metadataNumber(metadata, ["end_offset"]),
14772
+ status: metadataString(metadata, ["status"]),
14773
+ resolver: "open-files-read-only"
14774
+ });
14775
+ }
14776
+ function upsertVectors(db, rows, embedding, now) {
14777
+ const insertEmbedding = db.prepare(`
14778
+ INSERT INTO chunk_embeddings (id, chunk_id, provider, model, dimensions, vector_json, created_at)
14779
+ VALUES (?, ?, ?, ?, ?, ?, ?)
14780
+ ON CONFLICT(chunk_id, provider, model) DO UPDATE SET
14781
+ dimensions = excluded.dimensions,
14782
+ vector_json = excluded.vector_json,
14783
+ created_at = excluded.created_at
14784
+ `);
14785
+ const insertVector = db.prepare(`
14786
+ INSERT INTO vector_index_entries (
14787
+ id, chunk_id, source_revision_id, provider, model, dimensions, vector_json, vector_norm,
14788
+ source_uri, source_ref, revision, hash, start_offset, end_offset, token_count, status,
14789
+ metadata_json, created_at, updated_at
14790
+ )
14791
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
14792
+ ON CONFLICT(chunk_id, provider, model) DO UPDATE SET
14793
+ source_revision_id = excluded.source_revision_id,
14794
+ dimensions = excluded.dimensions,
14795
+ vector_json = excluded.vector_json,
14796
+ vector_norm = excluded.vector_norm,
14797
+ source_uri = excluded.source_uri,
14798
+ source_ref = excluded.source_ref,
14799
+ revision = excluded.revision,
14800
+ hash = excluded.hash,
14801
+ start_offset = excluded.start_offset,
14802
+ end_offset = excluded.end_offset,
14803
+ token_count = excluded.token_count,
14804
+ status = excluded.status,
14805
+ metadata_json = excluded.metadata_json,
14806
+ updated_at = excluded.updated_at
14807
+ `);
14808
+ const write = db.transaction(() => {
14809
+ for (let index = 0;index < rows.length; index += 1) {
14810
+ const row = rows[index];
14811
+ const vector = embedding.vectors[index];
14812
+ if (!vector)
14813
+ continue;
14814
+ const metadata = parseJsonObject(row.metadata_json);
14815
+ const provenance = provenanceForChunk(row);
14816
+ const sourceRef = provenance.source_ref ?? metadataString(metadata, ["source_ref"]);
14817
+ const sourceUri = provenance.source_uri ?? row.source_uri ?? metadataString(metadata, ["source_uri"]);
14818
+ const revision = provenance.revision ?? row.revision ?? metadataString(metadata, ["revision"]);
14819
+ const hash2 = provenance.hash ?? row.hash ?? metadataString(metadata, ["hash"]);
14820
+ const status = provenance.status ?? metadataString(metadata, ["status"]) ?? "active";
14821
+ const vectorJson = JSON.stringify(vector);
14822
+ insertEmbedding.run(stableId("emb", `${row.id}\x00${embedding.provider}\x00${embedding.model}`), row.id, embedding.provider, embedding.model, embedding.dimensions, vectorJson, now);
14823
+ insertVector.run(stableId("vec", `${row.id}\x00${embedding.provider}\x00${embedding.model}`), row.id, row.source_revision_id, embedding.provider, embedding.model, embedding.dimensions, vectorJson, vectorNorm(vector), sourceUri, sourceRef, revision, hash2, provenance.start_offset, provenance.end_offset, row.token_count, status, JSON.stringify({
14824
+ ...metadata,
14825
+ provenance,
14826
+ embedded_at: now
14827
+ }), now, now);
14828
+ }
14829
+ });
14830
+ write();
14831
+ return rows.length;
14832
+ }
14833
+ async function indexKnowledgeEmbeddings(options) {
14834
+ const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
14835
+ const parsed = parseModelRef(modelRef);
14836
+ if (parsed.provider !== "openai")
14837
+ throw new Error(`Embedding provider ${parsed.provider} is not supported yet.`);
14838
+ const now = (options.now ?? new Date).toISOString();
14839
+ const limit = Math.max(1, Math.min(options.limit ?? 100, 1000));
14840
+ migrateKnowledgeDb(options.dbPath);
14841
+ const readDb = openKnowledgeDb(options.dbPath);
14842
+ let rows;
14843
+ try {
14844
+ rows = selectCandidateChunks(readDb, {
14845
+ provider: parsed.provider,
14846
+ model: parsed.model,
14847
+ limit,
14848
+ sourceRevisionId: options.sourceRevisionId
14849
+ });
14850
+ } finally {
14851
+ readDb.close();
14852
+ }
14853
+ if (rows.length === 0) {
14854
+ return {
14855
+ provider: parsed.provider,
14856
+ model: parsed.model,
14857
+ dimensions: options.dimensions ?? embeddingConfig(options.config).dimensions ?? DEFAULT_EMBEDDING_DIMENSIONS,
14858
+ chunks_seen: 0,
14859
+ chunks_embedded: 0,
14860
+ embeddings_upserted: 0,
14861
+ vector_entries_upserted: 0,
14862
+ usage: { input_tokens: 0 }
14863
+ };
14864
+ }
14865
+ const embedding = await embedTexts(rows.map((row) => row.text), options);
14866
+ const writeDb = openKnowledgeDb(options.dbPath);
14867
+ try {
14868
+ const upserted = upsertVectors(writeDb, rows, embedding, now);
14869
+ return {
14870
+ provider: embedding.provider,
14871
+ model: embedding.model,
14872
+ dimensions: embedding.dimensions,
14873
+ chunks_seen: rows.length,
14874
+ chunks_embedded: rows.length,
14875
+ embeddings_upserted: upserted,
14876
+ vector_entries_upserted: upserted,
14877
+ usage: embedding.usage
14878
+ };
14879
+ } finally {
14880
+ writeDb.close();
14881
+ }
14882
+ }
14883
+ function embeddingIndexStatus(dbPath) {
14884
+ migrateKnowledgeDb(dbPath);
14885
+ const db = openKnowledgeDb(dbPath);
14886
+ try {
14887
+ const totalEmbeddings = db.query("SELECT COUNT(*) AS n FROM chunk_embeddings").get()?.n ?? 0;
14888
+ const totalVectorEntries = db.query("SELECT COUNT(*) AS n FROM vector_index_entries").get()?.n ?? 0;
14889
+ const indexes = db.query(`SELECT provider, model, dimensions, COUNT(*) AS entries, MAX(updated_at) AS updated_at
14890
+ FROM vector_index_entries
14891
+ GROUP BY provider, model, dimensions
14892
+ ORDER BY provider, model`).all();
14893
+ return {
14894
+ total_embeddings: totalEmbeddings,
14895
+ total_vector_entries: totalVectorEntries,
14896
+ indexes
14397
14897
  };
14398
14898
  } finally {
14399
14899
  db.close();
14400
14900
  }
14401
14901
  }
14902
+ async function searchVectorIndex(options) {
14903
+ const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
14904
+ const parsed = parseModelRef(modelRef);
14905
+ const limit = Math.max(1, Math.min(options.limit ?? 10, 100));
14906
+ const embedded = await embedTexts([options.query], options);
14907
+ const queryVector = embedded.vectors[0] ?? [];
14908
+ migrateKnowledgeDb(options.dbPath);
14909
+ const db = openKnowledgeDb(options.dbPath);
14910
+ try {
14911
+ const rows = db.query(`SELECT
14912
+ v.chunk_id,
14913
+ c.text,
14914
+ v.vector_json,
14915
+ v.vector_norm,
14916
+ v.source_uri,
14917
+ v.source_ref,
14918
+ v.revision,
14919
+ v.hash,
14920
+ v.metadata_json
14921
+ FROM vector_index_entries v
14922
+ JOIN chunks c ON c.id = v.chunk_id
14923
+ WHERE v.provider = ? AND v.model = ? AND v.status = 'active'`).all(parsed.provider, parsed.model);
14924
+ const scored = rows.map((row) => {
14925
+ const vector = JSON.parse(row.vector_json);
14926
+ const metadata = parseJsonObject(row.metadata_json);
14927
+ const provenance = metadata.provenance && typeof metadata.provenance === "object" && !Array.isArray(metadata.provenance) ? metadata.provenance : null;
14928
+ return {
14929
+ chunk_id: row.chunk_id,
14930
+ score: cosineSimilarity(queryVector, vector, row.vector_norm),
14931
+ text: row.text,
14932
+ source_uri: row.source_uri,
14933
+ source_ref: row.source_ref,
14934
+ revision: row.revision,
14935
+ hash: row.hash,
14936
+ provenance
14937
+ };
14938
+ }).sort((a, b) => b.score - a.score).slice(0, limit);
14939
+ return {
14940
+ provider: parsed.provider,
14941
+ model: parsed.model,
14942
+ dimensions: embedded.dimensions,
14943
+ query: options.query,
14944
+ results: scored
14945
+ };
14946
+ } finally {
14947
+ db.close();
14948
+ }
14949
+ }
14950
+
14951
+ // src/outbox-consume.ts
14952
+ import { createHash as createHash3, randomUUID as randomUUID3 } from "crypto";
14953
+ import { existsSync as existsSync4, readFileSync as readFileSync4 } from "fs";
14954
+ import { basename } from "path";
14402
14955
 
14403
14956
  // src/safety.ts
14404
- import { createHash, randomUUID as randomUUID2 } from "crypto";
14957
+ import { createHash as createHash2, randomUUID as randomUUID2 } from "crypto";
14405
14958
  import { relative as relative2, resolve as resolve2, sep as sep2 } from "path";
14406
14959
  function envEnabled(name) {
14407
14960
  const value = process.env[name];
@@ -14496,7 +15049,7 @@ function redactSecrets(text, policy) {
14496
15049
  return { text: output, findings };
14497
15050
  }
14498
15051
  function auditId(input) {
14499
- return `audit_${createHash("sha256").update(`${input.event_type}\x00${input.action}\x00${input.target_uri ?? ""}\x00${input.created_at ?? ""}\x00${JSON.stringify(input.metadata ?? {})}\x00${randomUUID2()}`).digest("hex").slice(0, 24)}`;
15052
+ return `audit_${createHash2("sha256").update(`${input.event_type}\x00${input.action}\x00${input.target_uri ?? ""}\x00${input.created_at ?? ""}\x00${JSON.stringify(input.metadata ?? {})}\x00${randomUUID2()}`).digest("hex").slice(0, 24)}`;
14500
15053
  }
14501
15054
  function recordAuditEvent(db, input) {
14502
15055
  const createdAt = input.created_at ?? new Date().toISOString();
@@ -14531,8 +15084,8 @@ function recordRedactionFindings(db, input) {
14531
15084
  }
14532
15085
 
14533
15086
  // src/outbox-consume.ts
14534
- function stableId(prefix, value) {
14535
- return `${prefix}_${createHash2("sha256").update(value).digest("hex").slice(0, 20)}`;
15087
+ function stableId2(prefix, value) {
15088
+ return `${prefix}_${createHash3("sha256").update(value).digest("hex").slice(0, 20)}`;
14536
15089
  }
14537
15090
  function asObject(value) {
14538
15091
  return value && typeof value === "object" && !Array.isArray(value) ? value : undefined;
@@ -14686,7 +15239,7 @@ function mergeJson(existing, patch) {
14686
15239
  return JSON.stringify({ ...base, ...patch });
14687
15240
  }
14688
15241
  function ensureSource(db, event, now) {
14689
- const id = stableId("src", event.sourceUri);
15242
+ const id = stableId2("src", event.sourceUri);
14690
15243
  db.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
14691
15244
  VALUES (?, ?, ?, ?, ?, ?, ?, ?)
14692
15245
  ON CONFLICT(uri) DO UPDATE SET
@@ -14727,7 +15280,7 @@ function ensureSource(db, event, now) {
14727
15280
  function ensureRevision(db, sourceId, event, now) {
14728
15281
  if (!event.revision)
14729
15282
  return null;
14730
- const id = stableId("rev", `${sourceId}\x00${event.revision}`);
15283
+ const id = stableId2("rev", `${sourceId}\x00${event.revision}`);
14731
15284
  const metadata = {
14732
15285
  source_ref: event.sourceRef,
14733
15286
  source_uri: event.sourceUri,
@@ -14755,16 +15308,20 @@ function revisionIdsForEvent(db, sourceId, event) {
14755
15308
  function invalidateRevision(db, revisionId) {
14756
15309
  const chunks = db.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(revisionId);
14757
15310
  let embeddingsDeleted = 0;
15311
+ let vectorEntriesDeleted = 0;
14758
15312
  for (const chunk of chunks) {
14759
15313
  const row = db.query("SELECT COUNT(*) AS n FROM chunk_embeddings WHERE chunk_id = ?").get(chunk.id);
14760
15314
  embeddingsDeleted += row?.n ?? 0;
15315
+ const vectorRow = db.query("SELECT COUNT(*) AS n FROM vector_index_entries WHERE chunk_id = ?").get(chunk.id);
15316
+ vectorEntriesDeleted += vectorRow?.n ?? 0;
15317
+ db.run("DELETE FROM vector_index_entries WHERE chunk_id = ?", [chunk.id]);
14761
15318
  db.run("DELETE FROM chunk_embeddings WHERE chunk_id = ?", [chunk.id]);
14762
15319
  db.run("DELETE FROM chunks_fts WHERE chunk_id = ?", [chunk.id]);
14763
15320
  }
14764
15321
  db.run("DELETE FROM chunks WHERE source_revision_id = ?", [revisionId]);
14765
15322
  const revision = db.query("SELECT metadata_json FROM source_revisions WHERE id = ?").get(revisionId);
14766
15323
  db.run("UPDATE source_revisions SET metadata_json = ? WHERE id = ?", [mergeJson(revision?.metadata_json, { reindex_required: true, invalidated_at: new Date().toISOString() }), revisionId]);
14767
- return { chunksDeleted: chunks.length, embeddingsDeleted };
15324
+ return { chunksDeleted: chunks.length, embeddingsDeleted, vectorEntriesDeleted };
14768
15325
  }
14769
15326
  function isDeleteEvent(eventType2, status) {
14770
15327
  return status === "deleted" || ["delete", "deleted", "remove", "removed"].includes(eventType2);
@@ -14802,6 +15359,7 @@ async function consumeOpenFilesOutbox(options) {
14802
15359
  const revisionsTouched = new Set;
14803
15360
  let chunksDeleted = 0;
14804
15361
  let embeddingsDeleted = 0;
15362
+ let vectorEntriesDeleted = 0;
14805
15363
  let staleRevisions = 0;
14806
15364
  let deletedSources = 0;
14807
15365
  let movedSources = 0;
@@ -14827,6 +15385,7 @@ async function consumeOpenFilesOutbox(options) {
14827
15385
  const invalidation = invalidateRevision(db, revisionId);
14828
15386
  chunksDeleted += invalidation.chunksDeleted;
14829
15387
  embeddingsDeleted += invalidation.embeddingsDeleted;
15388
+ vectorEntriesDeleted += invalidation.vectorEntriesDeleted;
14830
15389
  staleRevisions += 1;
14831
15390
  }
14832
15391
  if (isDeleteEvent(event.eventType, event.status))
@@ -14837,7 +15396,7 @@ async function consumeOpenFilesOutbox(options) {
14837
15396
  permissionUpdates += 1;
14838
15397
  db.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
14839
15398
  VALUES (?, ?, ?, ?, ?, ?)`, [
14840
- stableId("evt", `${runId}\x00${index}\x00${event.sourceRef}\x00${event.eventType}`),
15399
+ stableId2("evt", `${runId}\x00${index}\x00${event.sourceRef}\x00${event.eventType}`),
14841
15400
  runId,
14842
15401
  "info",
14843
15402
  event.eventType,
@@ -14854,7 +15413,7 @@ async function consumeOpenFilesOutbox(options) {
14854
15413
  });
14855
15414
  db.run(`INSERT INTO provider_usage (id, run_id, provider, model, input_tokens, output_tokens, cost_usd, metadata_json, created_at)
14856
15415
  VALUES (?, ?, ?, ?, 0, 0, 0, ?, ?)`, [
14857
- stableId("usage", runId),
15416
+ stableId2("usage", runId),
14858
15417
  runId,
14859
15418
  "local",
14860
15419
  "open-files-outbox",
@@ -14872,7 +15431,8 @@ async function consumeOpenFilesOutbox(options) {
14872
15431
  sources: sourcesTouched.size,
14873
15432
  revisions: revisionsTouched.size,
14874
15433
  chunks_deleted: chunksDeleted,
14875
- embeddings_deleted: embeddingsDeleted
15434
+ embeddings_deleted: embeddingsDeleted,
15435
+ vector_entries_deleted: vectorEntriesDeleted
14876
15436
  },
14877
15437
  created_at: now
14878
15438
  });
@@ -14885,6 +15445,7 @@ async function consumeOpenFilesOutbox(options) {
14885
15445
  revisions_touched: revisionsTouched.size,
14886
15446
  chunks_deleted: chunksDeleted,
14887
15447
  embeddings_deleted: embeddingsDeleted,
15448
+ vector_entries_deleted: vectorEntriesDeleted,
14888
15449
  stale_revisions: staleRevisions,
14889
15450
  deleted_sources: deletedSources,
14890
15451
  moved_sources: movedSources,
@@ -14897,55 +15458,11 @@ async function consumeOpenFilesOutbox(options) {
14897
15458
  }
14898
15459
 
14899
15460
  // src/manifest-ingest.ts
14900
- import { createHash as createHash3 } from "crypto";
15461
+ import { createHash as createHash4 } from "crypto";
14901
15462
  import { existsSync as existsSync5, readFileSync as readFileSync5 } from "fs";
14902
15463
  import { basename as basename2 } from "path";
14903
-
14904
- // src/provenance.ts
14905
- function isStaleStatus(status) {
14906
- return ["deleted", "stale", "invalidated", "reindex_required"].includes((status ?? "").toLowerCase());
14907
- }
14908
- function sourceProvenance(input) {
14909
- const status = input.status ?? null;
14910
- return {
14911
- source_owner: "open-files",
14912
- source_ref: input.source_ref ?? null,
14913
- source_uri: input.source_uri ?? null,
14914
- source_kind: input.source_kind ?? null,
14915
- source_revision_id: input.source_revision_id ?? null,
14916
- revision: input.revision ?? null,
14917
- hash: input.hash ?? null,
14918
- chunk_id: input.chunk_id ?? null,
14919
- start_offset: input.start_offset ?? null,
14920
- end_offset: input.end_offset ?? null,
14921
- status,
14922
- read_only: true,
14923
- citation_required: true,
14924
- resolver: input.resolver ?? null,
14925
- stale: isStaleStatus(status)
14926
- };
14927
- }
14928
- function generatedArtifactProvenance(input) {
14929
- return {
14930
- source_owner: "open-files",
14931
- generated_from: input.generated_from,
14932
- artifact_key: input.artifact_key,
14933
- source_refs: input.source_refs ?? [],
14934
- read_only_sources: true,
14935
- citation_required: input.citation_required ?? true,
14936
- raw_source_bytes_stored_in_open_knowledge: false
14937
- };
14938
- }
14939
- function withProvenance(metadata, provenance) {
14940
- return {
14941
- ...metadata,
14942
- provenance
14943
- };
14944
- }
14945
-
14946
- // src/manifest-ingest.ts
14947
- function stableId2(prefix, value) {
14948
- return `${prefix}_${createHash3("sha256").update(value).digest("hex").slice(0, 20)}`;
15464
+ function stableId3(prefix, value) {
15465
+ return `${prefix}_${createHash4("sha256").update(value).digest("hex").slice(0, 20)}`;
14949
15466
  }
14950
15467
  function asObject2(value) {
14951
15468
  return value && typeof value === "object" && !Array.isArray(value) ? value : undefined;
@@ -15165,7 +15682,7 @@ function deleteChunksForRevision(db, sourceRevisionId) {
15165
15682
  return rows.length;
15166
15683
  }
15167
15684
  function upsertSource(db, item, now) {
15168
- const sourceId = stableId2("src", item.sourceUri);
15685
+ const sourceId = stableId3("src", item.sourceUri);
15169
15686
  db.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
15170
15687
  VALUES (?, ?, ?, ?, ?, ?, ?, ?)
15171
15688
  ON CONFLICT(uri) DO UPDATE SET
@@ -15189,7 +15706,7 @@ function upsertSource(db, item, now) {
15189
15706
  return row.id;
15190
15707
  }
15191
15708
  function upsertRevision(db, sourceId, item, now) {
15192
- const revisionId = stableId2("rev", `${sourceId}\x00${item.revision}`);
15709
+ const revisionId = stableId3("rev", `${sourceId}\x00${item.revision}`);
15193
15710
  db.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
15194
15711
  VALUES (?, ?, ?, ?, ?, ?, ?)
15195
15712
  ON CONFLICT(source_id, revision) DO UPDATE SET
@@ -15231,7 +15748,7 @@ function insertChunks(db, sourceRevisionId, item, now, maxChars, overlapChars, s
15231
15748
  }
15232
15749
  const chunks = chunkText(redacted.text, maxChars, overlapChars);
15233
15750
  for (const chunk of chunks) {
15234
- const chunkId = stableId2("chk", `${sourceRevisionId}\x00${chunk.ordinal}\x00${chunk.text}`);
15751
+ const chunkId = stableId3("chk", `${sourceRevisionId}\x00${chunk.ordinal}\x00${chunk.text}`);
15235
15752
  const provenance = sourceProvenance({
15236
15753
  source_ref: item.sourceRef,
15237
15754
  source_uri: item.sourceUri,
@@ -15359,12 +15876,12 @@ async function ingestOpenFilesManifestItems(options) {
15359
15876
  }
15360
15877
 
15361
15878
  // src/source-ingest.ts
15362
- import { createHash as createHash4 } from "crypto";
15879
+ import { createHash as createHash5 } from "crypto";
15363
15880
  import { existsSync as existsSync6, readFileSync as readFileSync6 } from "fs";
15364
15881
  import { basename as basename3 } from "path";
15365
15882
 
15366
15883
  // src/source-resolver.ts
15367
- function parseJsonObject(value) {
15884
+ function parseJsonObject2(value) {
15368
15885
  if (!value)
15369
15886
  return {};
15370
15887
  try {
@@ -15374,7 +15891,7 @@ function parseJsonObject(value) {
15374
15891
  return {};
15375
15892
  }
15376
15893
  }
15377
- function metadataString(metadata, keys) {
15894
+ function metadataString2(metadata, keys) {
15378
15895
  for (const key of keys) {
15379
15896
  const value = metadata[key];
15380
15897
  if (typeof value === "string" && value.length > 0)
@@ -15382,7 +15899,7 @@ function metadataString(metadata, keys) {
15382
15899
  }
15383
15900
  return null;
15384
15901
  }
15385
- function metadataNumber(metadata, keys) {
15902
+ function metadataNumber2(metadata, keys) {
15386
15903
  for (const key of keys) {
15387
15904
  const value = metadata[key];
15388
15905
  if (typeof value === "number" && Number.isFinite(value))
@@ -15507,8 +16024,8 @@ async function resolveOpenFilesSource(options) {
15507
16024
  citations: []
15508
16025
  };
15509
16026
  }
15510
- const sourceMetadata = parseJsonObject(source.metadata_json);
15511
- const permissions = parseJsonObject(source.acl_json);
16027
+ const sourceMetadata = parseJsonObject2(source.metadata_json);
16028
+ const permissions = parseJsonObject2(source.acl_json);
15512
16029
  try {
15513
16030
  assertPurposeAllowed(permissions, purpose);
15514
16031
  } catch (error48) {
@@ -15528,22 +16045,22 @@ async function resolveOpenFilesSource(options) {
15528
16045
  throw error48;
15529
16046
  }
15530
16047
  const revision = selectRevision(db, source.id, requestedRevision);
15531
- const revisionMetadata = parseJsonObject(revision?.metadata_json);
16048
+ const revisionMetadata = parseJsonObject2(revision?.metadata_json);
15532
16049
  const totalChunks = countChunks(db, revision?.id ?? null);
15533
16050
  const rows = selectChunks(db, revision?.id ?? null, limit);
15534
16051
  const effectiveSourceRef = sourceRevisionRef(source.uri, revision, options.sourceRef);
15535
16052
  const chunks = rows.map((row) => {
15536
- const metadata = parseJsonObject(row.metadata_json);
16053
+ const metadata = parseJsonObject2(row.metadata_json);
15537
16054
  const evidence = {
15538
16055
  resolver: "open-files-read-only",
15539
16056
  mode: "local_catalog",
15540
16057
  purpose,
15541
16058
  read_only: true,
15542
- source_ref: metadataString(metadata, ["source_ref"]) ?? effectiveSourceRef,
16059
+ source_ref: metadataString2(metadata, ["source_ref"]) ?? effectiveSourceRef,
15543
16060
  source_uri: source.uri,
15544
16061
  source_revision_id: revision?.id ?? null,
15545
16062
  revision: revision?.revision ?? null,
15546
- hash: revision?.hash ?? metadataString(metadata, ["hash"]),
16063
+ hash: revision?.hash ?? metadataString2(metadata, ["hash"]),
15547
16064
  chunk_id: row.id,
15548
16065
  start_offset: row.start_offset,
15549
16066
  end_offset: row.end_offset,
@@ -15559,7 +16076,7 @@ async function resolveOpenFilesSource(options) {
15559
16076
  chunk_id: row.id,
15560
16077
  start_offset: row.start_offset,
15561
16078
  end_offset: row.end_offset,
15562
- status: metadataString(metadata, ["status"]),
16079
+ status: metadataString2(metadata, ["status"]),
15563
16080
  resolver: evidence.resolver
15564
16081
  });
15565
16082
  return {
@@ -15600,8 +16117,8 @@ async function resolveOpenFilesSource(options) {
15600
16117
  },
15601
16118
  created_at: resolvedAt
15602
16119
  });
15603
- const mime = metadataString(sourceMetadata, ["mime", "content_type"]) ?? metadataString(revisionMetadata, ["mime", "content_type"]);
15604
- const size = metadataNumber(sourceMetadata, ["size", "size_bytes"]) ?? metadataNumber(revisionMetadata, ["size", "size_bytes"]);
16120
+ const mime = metadataString2(sourceMetadata, ["mime", "content_type"]) ?? metadataString2(revisionMetadata, ["mime", "content_type"]);
16121
+ const size = metadataNumber2(sourceMetadata, ["size", "size_bytes"]) ?? metadataNumber2(revisionMetadata, ["size", "size_bytes"]);
15605
16122
  return {
15606
16123
  source_ref: effectiveSourceRef,
15607
16124
  source_uri: source.uri,
@@ -15634,12 +16151,12 @@ async function resolveOpenFilesSource(options) {
15634
16151
  content: {
15635
16152
  mime,
15636
16153
  size,
15637
- hash: revision?.hash ?? metadataString(sourceMetadata, ["hash", "checksum", "sha256"]),
16154
+ hash: revision?.hash ?? metadataString2(sourceMetadata, ["hash", "checksum", "sha256"]),
15638
16155
  text_available: totalChunks > 0,
15639
16156
  chunks_total: totalChunks,
15640
16157
  chunks_returned: chunks.length,
15641
16158
  char_count_returned: chunks.reduce((sum, chunk) => sum + chunk.text.length, 0),
15642
- extracted_text_ref: revision?.extracted_text_uri ?? metadataString(revisionMetadata, ["extracted_text_ref", "extracted_text_uri"]),
16159
+ extracted_text_ref: revision?.extracted_text_uri ?? metadataString2(revisionMetadata, ["extracted_text_ref", "extracted_text_uri"]),
15643
16160
  bytes_available: false,
15644
16161
  bytes_exposed: false
15645
16162
  },
@@ -15654,7 +16171,7 @@ async function resolveOpenFilesSource(options) {
15654
16171
 
15655
16172
  // src/source-ingest.ts
15656
16173
  function sha256Text(text) {
15657
- return `sha256:${createHash4("sha256").update(text).digest("hex")}`;
16174
+ return `sha256:${createHash5("sha256").update(text).digest("hex")}`;
15658
16175
  }
15659
16176
  function stripHtml(html) {
15660
16177
  return html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ").replace(/<[^>]+>/g, " ").replace(/&nbsp;/g, " ").replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/\s+\n/g, `
@@ -15876,131 +16393,8 @@ async function ingestSourceRef(options) {
15876
16393
  };
15877
16394
  }
15878
16395
 
15879
- // src/providers.ts
15880
- var DEFAULT_PROVIDER_SETTINGS = {
15881
- openai: {
15882
- api_key_env: "OPENAI_API_KEY",
15883
- default_model: "gpt-5.2"
15884
- },
15885
- anthropic: {
15886
- api_key_env: "ANTHROPIC_API_KEY",
15887
- default_model: "claude-sonnet-4-6"
15888
- },
15889
- deepseek: {
15890
- api_key_env: "DEEPSEEK_API_KEY",
15891
- default_model: "deepseek-chat"
15892
- }
15893
- };
15894
- var PROVIDER_CAPABILITIES = {
15895
- openai: {
15896
- text_generation: true,
15897
- structured_output: true,
15898
- tool_usage: true,
15899
- tool_streaming: true,
15900
- image_input: true,
15901
- native_web_search: true,
15902
- reasoning: true,
15903
- embeddings: true
15904
- },
15905
- anthropic: {
15906
- text_generation: true,
15907
- structured_output: true,
15908
- tool_usage: true,
15909
- tool_streaming: true,
15910
- image_input: true,
15911
- native_web_search: false,
15912
- reasoning: true,
15913
- embeddings: false
15914
- },
15915
- deepseek: {
15916
- text_generation: true,
15917
- structured_output: true,
15918
- tool_usage: true,
15919
- tool_streaming: true,
15920
- image_input: false,
15921
- native_web_search: false,
15922
- reasoning: true,
15923
- embeddings: false
15924
- }
15925
- };
15926
- var BUILTIN_ALIASES = {
15927
- default: "openai:gpt-5.2",
15928
- fast: "openai:gpt-5-mini",
15929
- reasoning: "anthropic:claude-opus-4-6",
15930
- sonnet: "anthropic:claude-sonnet-4-6",
15931
- deepseek: "deepseek:deepseek-chat",
15932
- "deepseek-reasoning": "deepseek:deepseek-reasoner"
15933
- };
15934
- function providerConfig(config2) {
15935
- return config2.providers ?? {};
15936
- }
15937
- function providerSettings(config2, provider) {
15938
- const configured = providerConfig(config2)[provider] ?? {};
15939
- return {
15940
- ...DEFAULT_PROVIDER_SETTINGS[provider],
15941
- ...configured
15942
- };
15943
- }
15944
- function modelAliases(config2) {
15945
- const configured = providerConfig(config2);
15946
- return {
15947
- ...BUILTIN_ALIASES,
15948
- ...configured.default_model ? { default: configured.default_model } : {},
15949
- ...configured.aliases ?? {}
15950
- };
15951
- }
15952
- function parseModelRef(modelRef) {
15953
- const [provider, ...rest] = modelRef.split(":");
15954
- const model = rest.join(":");
15955
- if (provider !== "openai" && provider !== "anthropic" && provider !== "deepseek") {
15956
- throw new Error(`Unsupported AI provider: ${provider}`);
15957
- }
15958
- if (!model)
15959
- throw new Error(`Invalid model ref: ${modelRef}. Expected provider:model.`);
15960
- return { provider, model };
15961
- }
15962
- function resolveModelRef(aliasOrRef, config2) {
15963
- const aliases = modelAliases(config2);
15964
- return aliases[aliasOrRef] ?? aliasOrRef;
15965
- }
15966
- function listModelRegistry(config2) {
15967
- const aliases = modelAliases(config2);
15968
- return Object.entries(aliases).map(([alias, modelRef]) => {
15969
- const parsed = parseModelRef(modelRef);
15970
- return {
15971
- alias,
15972
- model_ref: modelRef,
15973
- provider: parsed.provider,
15974
- model: parsed.model,
15975
- default: alias === "default",
15976
- capabilities: PROVIDER_CAPABILITIES[parsed.provider]
15977
- };
15978
- });
15979
- }
15980
- function providerCredentialStatus(config2, env = process.env) {
15981
- return Object.keys(DEFAULT_PROVIDER_SETTINGS).map((provider) => {
15982
- const settings = providerSettings(config2, provider);
15983
- const configured = Boolean(env[settings.api_key_env]);
15984
- return {
15985
- provider,
15986
- api_key_env: settings.api_key_env,
15987
- configured,
15988
- source: configured ? "env" : "missing",
15989
- base_url: settings.base_url ?? null,
15990
- default_model: settings.default_model
15991
- };
15992
- });
15993
- }
15994
- function providerStatus(config2, env = process.env) {
15995
- return {
15996
- default_model: resolveModelRef("default", config2),
15997
- providers: providerCredentialStatus(config2, env),
15998
- models: listModelRegistry(config2)
15999
- };
16000
- }
16001
-
16002
16396
  // src/storage-contract.ts
16003
- import { createHash as createHash5, randomUUID as randomUUID4 } from "crypto";
16397
+ import { createHash as createHash6, randomUUID as randomUUID4 } from "crypto";
16004
16398
  var GENERATED_ARTIFACTS = [
16005
16399
  {
16006
16400
  kind: "schema",
@@ -16036,7 +16430,7 @@ var GENERATED_ARTIFACTS = [
16036
16430
  function hashArtifactBody(body) {
16037
16431
  const bytes = typeof body === "string" ? Buffer.from(body) : Buffer.from(body);
16038
16432
  return {
16039
- hash: `sha256:${createHash5("sha256").update(bytes).digest("hex")}`,
16433
+ hash: `sha256:${createHash6("sha256").update(bytes).digest("hex")}`,
16040
16434
  size_bytes: bytes.byteLength
16041
16435
  };
16042
16436
  }
@@ -16171,15 +16565,15 @@ function recordStorageObjects(db, objects, now = new Date) {
16171
16565
  }
16172
16566
 
16173
16567
  // src/wiki-layout.ts
16174
- import { createHash as createHash6 } from "crypto";
16568
+ import { createHash as createHash7 } from "crypto";
16175
16569
  function todayParts(now) {
16176
16570
  const year = String(now.getUTCFullYear());
16177
16571
  const month = String(now.getUTCMonth() + 1).padStart(2, "0");
16178
16572
  const day = String(now.getUTCDate()).padStart(2, "0");
16179
16573
  return { year, month, day };
16180
16574
  }
16181
- function stableId3(prefix, value) {
16182
- return `${prefix}_${createHash6("sha256").update(value).digest("hex").slice(0, 20)}`;
16575
+ function stableId4(prefix, value) {
16576
+ return `${prefix}_${createHash7("sha256").update(value).digest("hex").slice(0, 20)}`;
16183
16577
  }
16184
16578
  function agentSchemaTemplate() {
16185
16579
  return `# Knowledge Agent Schema v1
@@ -16302,7 +16696,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
16302
16696
  artifact_uri = excluded.artifact_uri,
16303
16697
  metadata_json = excluded.metadata_json,
16304
16698
  updated_at = excluded.updated_at`, [
16305
- stableId3("idx", "root:indexes/root.md"),
16699
+ stableId4("idx", "root:indexes/root.md"),
16306
16700
  "root",
16307
16701
  "root",
16308
16702
  rootIndex.uri,
@@ -16326,7 +16720,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
16326
16720
  status = excluded.status,
16327
16721
  metadata_json = excluded.metadata_json,
16328
16722
  updated_at = excluded.updated_at`, [
16329
- stableId3("wiki", "wiki/README.md"),
16723
+ stableId4("wiki", "wiki/README.md"),
16330
16724
  "wiki/README.md",
16331
16725
  "Wiki",
16332
16726
  wikiReadme.uri,
@@ -16467,6 +16861,26 @@ class KnowledgeService {
16467
16861
  modelRegistry() {
16468
16862
  return listModelRegistry(this.config());
16469
16863
  }
16864
+ embeddingStatus() {
16865
+ const workspace = this.ensureWorkspace();
16866
+ return embeddingIndexStatus(workspace.knowledgeDbPath);
16867
+ }
16868
+ async indexEmbeddings(options = {}) {
16869
+ const workspace = this.ensureWorkspace();
16870
+ return indexKnowledgeEmbeddings({
16871
+ ...options,
16872
+ dbPath: workspace.knowledgeDbPath,
16873
+ config: this.config()
16874
+ });
16875
+ }
16876
+ async semanticSearch(options) {
16877
+ const workspace = this.ensureWorkspace();
16878
+ return searchVectorIndex({
16879
+ ...options,
16880
+ dbPath: workspace.knowledgeDbPath,
16881
+ config: this.config()
16882
+ });
16883
+ }
16470
16884
  }
16471
16885
  function createKnowledgeService(options = {}) {
16472
16886
  return new KnowledgeService(options);
@@ -16581,6 +16995,41 @@ function buildServer() {
16581
16995
  const service = createKnowledgeService({ scope });
16582
16996
  return jsonText({ ok: true, models: service.modelRegistry() });
16583
16997
  });
16998
+ registerTool(server, "ok_embeddings_status", "Embedding index status", "Inspect local embedding/vector index counts by provider and model", {
16999
+ scope: scopeField
17000
+ }, async ({ scope }) => {
17001
+ const service = createKnowledgeService({ scope });
17002
+ return jsonText({ ok: true, ...service.embeddingStatus() });
17003
+ });
17004
+ registerTool(server, "ok_embeddings_index", "Index embeddings", "Embed unindexed knowledge chunks into the local vector index", {
17005
+ scope: scopeField,
17006
+ limit: exports_external.number().optional().describe("Maximum chunks to embed"),
17007
+ model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
17008
+ dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
17009
+ fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
17010
+ }, async ({ scope, limit, model, dimensions, fake }) => {
17011
+ const service = createKnowledgeService({ scope });
17012
+ try {
17013
+ return jsonText({ ok: true, ...await service.indexEmbeddings({ limit, modelRef: model, dimensions, fake }) });
17014
+ } catch (error48) {
17015
+ return errorText(error48 instanceof Error ? error48.message : String(error48));
17016
+ }
17017
+ });
17018
+ registerTool(server, "ok_semantic_search", "Semantic search", "Search the local vector index and return cited chunks with provenance", {
17019
+ scope: scopeField,
17020
+ query: exports_external.string().describe("Semantic query"),
17021
+ limit: exports_external.number().optional().describe("Maximum results"),
17022
+ model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
17023
+ dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
17024
+ fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
17025
+ }, async ({ scope, query, limit, model, dimensions, fake }) => {
17026
+ const service = createKnowledgeService({ scope });
17027
+ try {
17028
+ return jsonText({ ok: true, ...await service.semanticSearch({ query, limit, modelRef: model, dimensions, fake }) });
17029
+ } catch (error48) {
17030
+ return errorText(error48 instanceof Error ? error48.message : String(error48));
17031
+ }
17032
+ });
16584
17033
  registerTool(server, "ok_add", "Add a knowledge item", "Add a new item to the knowledge store", {
16585
17034
  title: exports_external.string().describe("Item title"),
16586
17035
  content: exports_external.string().describe("Item content/body"),