@hasna/knowledge 0.2.12 → 0.2.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -3
- package/bin/open-knowledge-mcp.js +763 -166
- package/bin/open-knowledge.js +161 -31
- package/docs/architecture/ai-native-knowledge-base.md +24 -0
- package/docs/architecture/hybrid-semantic-search.md +17 -0
- package/package.json +1 -1
- package/src/cli.ts +48 -4
- package/src/embeddings.ts +516 -0
- package/src/knowledge-db.ts +40 -1
- package/src/manifest-ingest.ts +19 -2
- package/src/mcp.js +38 -0
- package/src/outbox-consume.ts +11 -2
- package/src/provenance.ts +93 -0
- package/src/service.ts +32 -1
- package/src/source-resolver.ts +18 -0
- package/src/wiki-layout.ts +91 -0
- package/src/workspace.ts +12 -0
|
@@ -13660,7 +13660,7 @@ import { existsSync as existsSync7, readFileSync as readFileSync7, writeFileSync
|
|
|
13660
13660
|
// package.json
|
|
13661
13661
|
var package_default = {
|
|
13662
13662
|
name: "@hasna/knowledge",
|
|
13663
|
-
version: "0.2.
|
|
13663
|
+
version: "0.2.14",
|
|
13664
13664
|
description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
|
|
13665
13665
|
type: "module",
|
|
13666
13666
|
bin: {
|
|
@@ -13790,6 +13790,12 @@ function defaultKnowledgeConfig() {
|
|
|
13790
13790
|
default_model: "deepseek-chat"
|
|
13791
13791
|
}
|
|
13792
13792
|
},
|
|
13793
|
+
embeddings: {
|
|
13794
|
+
default_model: "openai:text-embedding-3-small",
|
|
13795
|
+
dimensions: 1536,
|
|
13796
|
+
batch_size: 64,
|
|
13797
|
+
max_parallel_calls: 4
|
|
13798
|
+
},
|
|
13793
13799
|
safety: {
|
|
13794
13800
|
network: {
|
|
13795
13801
|
web_search_enabled: false,
|
|
@@ -14128,10 +14134,8 @@ function createArtifactStore(config2, workspace) {
|
|
|
14128
14134
|
return new LocalArtifactStore(workspace.artifactsDir);
|
|
14129
14135
|
}
|
|
14130
14136
|
|
|
14131
|
-
// src/
|
|
14132
|
-
import { createHash
|
|
14133
|
-
import { existsSync as existsSync4, readFileSync as readFileSync4 } from "fs";
|
|
14134
|
-
import { basename } from "path";
|
|
14137
|
+
// src/embeddings.ts
|
|
14138
|
+
import { createHash } from "crypto";
|
|
14135
14139
|
|
|
14136
14140
|
// src/knowledge-db.ts
|
|
14137
14141
|
import { Database } from "bun:sqlite";
|
|
@@ -14349,10 +14353,43 @@ CREATE INDEX IF NOT EXISTS idx_approval_gates_status ON approval_gates(status);
|
|
|
14349
14353
|
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
14350
14354
|
VALUES (3, datetime('now'));
|
|
14351
14355
|
`;
|
|
14356
|
+
var MIGRATION_4 = `
|
|
14357
|
+
CREATE TABLE IF NOT EXISTS vector_index_entries (
|
|
14358
|
+
id TEXT PRIMARY KEY,
|
|
14359
|
+
chunk_id TEXT NOT NULL REFERENCES chunks(id) ON DELETE CASCADE,
|
|
14360
|
+
source_revision_id TEXT REFERENCES source_revisions(id) ON DELETE CASCADE,
|
|
14361
|
+
provider TEXT NOT NULL,
|
|
14362
|
+
model TEXT NOT NULL,
|
|
14363
|
+
dimensions INTEGER NOT NULL,
|
|
14364
|
+
vector_json TEXT NOT NULL,
|
|
14365
|
+
vector_norm REAL NOT NULL,
|
|
14366
|
+
source_uri TEXT,
|
|
14367
|
+
source_ref TEXT,
|
|
14368
|
+
revision TEXT,
|
|
14369
|
+
hash TEXT,
|
|
14370
|
+
start_offset INTEGER,
|
|
14371
|
+
end_offset INTEGER,
|
|
14372
|
+
token_count INTEGER,
|
|
14373
|
+
status TEXT NOT NULL DEFAULT 'active',
|
|
14374
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
14375
|
+
created_at TEXT NOT NULL,
|
|
14376
|
+
updated_at TEXT NOT NULL,
|
|
14377
|
+
UNIQUE(chunk_id, provider, model)
|
|
14378
|
+
);
|
|
14379
|
+
|
|
14380
|
+
CREATE INDEX IF NOT EXISTS idx_vector_index_provider_model ON vector_index_entries(provider, model);
|
|
14381
|
+
CREATE INDEX IF NOT EXISTS idx_vector_index_source_revision ON vector_index_entries(source_revision_id);
|
|
14382
|
+
CREATE INDEX IF NOT EXISTS idx_vector_index_source_uri ON vector_index_entries(source_uri);
|
|
14383
|
+
CREATE INDEX IF NOT EXISTS idx_vector_index_status ON vector_index_entries(status);
|
|
14384
|
+
|
|
14385
|
+
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
14386
|
+
VALUES (4, datetime('now'));
|
|
14387
|
+
`;
|
|
14352
14388
|
function openKnowledgeDb(path) {
|
|
14353
14389
|
ensureParentDir(path);
|
|
14354
14390
|
const db = new Database(path);
|
|
14355
14391
|
db.exec("PRAGMA foreign_keys = ON;");
|
|
14392
|
+
db.exec("PRAGMA busy_timeout = 5000;");
|
|
14356
14393
|
return db;
|
|
14357
14394
|
}
|
|
14358
14395
|
function migrateKnowledgeDb(path) {
|
|
@@ -14363,6 +14400,8 @@ function migrateKnowledgeDb(path) {
|
|
|
14363
14400
|
db.exec(MIGRATION_2);
|
|
14364
14401
|
if (getSchemaVersion(db) < 3)
|
|
14365
14402
|
db.exec(MIGRATION_3);
|
|
14403
|
+
if (getSchemaVersion(db) < 4)
|
|
14404
|
+
db.exec(MIGRATION_4);
|
|
14366
14405
|
return { path, schema_version: getSchemaVersion(db) };
|
|
14367
14406
|
} finally {
|
|
14368
14407
|
db.close();
|
|
@@ -14392,15 +14431,530 @@ function getKnowledgeDbStats(path) {
|
|
|
14392
14431
|
redaction_findings: count(db, "redaction_findings"),
|
|
14393
14432
|
audit_events: count(db, "audit_events"),
|
|
14394
14433
|
approval_gates: count(db, "approval_gates"),
|
|
14395
|
-
storage_objects: count(db, "storage_objects")
|
|
14434
|
+
storage_objects: count(db, "storage_objects"),
|
|
14435
|
+
embeddings: count(db, "chunk_embeddings"),
|
|
14436
|
+
vector_entries: count(db, "vector_index_entries")
|
|
14396
14437
|
};
|
|
14397
14438
|
} finally {
|
|
14398
14439
|
db.close();
|
|
14399
14440
|
}
|
|
14400
14441
|
}
|
|
14401
14442
|
|
|
14443
|
+
// src/providers.ts
|
|
14444
|
+
var DEFAULT_PROVIDER_SETTINGS = {
|
|
14445
|
+
openai: {
|
|
14446
|
+
api_key_env: "OPENAI_API_KEY",
|
|
14447
|
+
default_model: "gpt-5.2"
|
|
14448
|
+
},
|
|
14449
|
+
anthropic: {
|
|
14450
|
+
api_key_env: "ANTHROPIC_API_KEY",
|
|
14451
|
+
default_model: "claude-sonnet-4-6"
|
|
14452
|
+
},
|
|
14453
|
+
deepseek: {
|
|
14454
|
+
api_key_env: "DEEPSEEK_API_KEY",
|
|
14455
|
+
default_model: "deepseek-chat"
|
|
14456
|
+
}
|
|
14457
|
+
};
|
|
14458
|
+
var PROVIDER_CAPABILITIES = {
|
|
14459
|
+
openai: {
|
|
14460
|
+
text_generation: true,
|
|
14461
|
+
structured_output: true,
|
|
14462
|
+
tool_usage: true,
|
|
14463
|
+
tool_streaming: true,
|
|
14464
|
+
image_input: true,
|
|
14465
|
+
native_web_search: true,
|
|
14466
|
+
reasoning: true,
|
|
14467
|
+
embeddings: true
|
|
14468
|
+
},
|
|
14469
|
+
anthropic: {
|
|
14470
|
+
text_generation: true,
|
|
14471
|
+
structured_output: true,
|
|
14472
|
+
tool_usage: true,
|
|
14473
|
+
tool_streaming: true,
|
|
14474
|
+
image_input: true,
|
|
14475
|
+
native_web_search: false,
|
|
14476
|
+
reasoning: true,
|
|
14477
|
+
embeddings: false
|
|
14478
|
+
},
|
|
14479
|
+
deepseek: {
|
|
14480
|
+
text_generation: true,
|
|
14481
|
+
structured_output: true,
|
|
14482
|
+
tool_usage: true,
|
|
14483
|
+
tool_streaming: true,
|
|
14484
|
+
image_input: false,
|
|
14485
|
+
native_web_search: false,
|
|
14486
|
+
reasoning: true,
|
|
14487
|
+
embeddings: false
|
|
14488
|
+
}
|
|
14489
|
+
};
|
|
14490
|
+
var BUILTIN_ALIASES = {
|
|
14491
|
+
default: "openai:gpt-5.2",
|
|
14492
|
+
fast: "openai:gpt-5-mini",
|
|
14493
|
+
reasoning: "anthropic:claude-opus-4-6",
|
|
14494
|
+
sonnet: "anthropic:claude-sonnet-4-6",
|
|
14495
|
+
deepseek: "deepseek:deepseek-chat",
|
|
14496
|
+
"deepseek-reasoning": "deepseek:deepseek-reasoner"
|
|
14497
|
+
};
|
|
14498
|
+
function providerConfig(config2) {
|
|
14499
|
+
return config2.providers ?? {};
|
|
14500
|
+
}
|
|
14501
|
+
function providerSettings(config2, provider) {
|
|
14502
|
+
const configured = providerConfig(config2)[provider] ?? {};
|
|
14503
|
+
return {
|
|
14504
|
+
...DEFAULT_PROVIDER_SETTINGS[provider],
|
|
14505
|
+
...configured
|
|
14506
|
+
};
|
|
14507
|
+
}
|
|
14508
|
+
function modelAliases(config2) {
|
|
14509
|
+
const configured = providerConfig(config2);
|
|
14510
|
+
return {
|
|
14511
|
+
...BUILTIN_ALIASES,
|
|
14512
|
+
...configured.default_model ? { default: configured.default_model } : {},
|
|
14513
|
+
...configured.aliases ?? {}
|
|
14514
|
+
};
|
|
14515
|
+
}
|
|
14516
|
+
function parseModelRef(modelRef) {
|
|
14517
|
+
const [provider, ...rest] = modelRef.split(":");
|
|
14518
|
+
const model = rest.join(":");
|
|
14519
|
+
if (provider !== "openai" && provider !== "anthropic" && provider !== "deepseek") {
|
|
14520
|
+
throw new Error(`Unsupported AI provider: ${provider}`);
|
|
14521
|
+
}
|
|
14522
|
+
if (!model)
|
|
14523
|
+
throw new Error(`Invalid model ref: ${modelRef}. Expected provider:model.`);
|
|
14524
|
+
return { provider, model };
|
|
14525
|
+
}
|
|
14526
|
+
function resolveModelRef(aliasOrRef, config2) {
|
|
14527
|
+
const aliases = modelAliases(config2);
|
|
14528
|
+
return aliases[aliasOrRef] ?? aliasOrRef;
|
|
14529
|
+
}
|
|
14530
|
+
function listModelRegistry(config2) {
|
|
14531
|
+
const aliases = modelAliases(config2);
|
|
14532
|
+
return Object.entries(aliases).map(([alias, modelRef]) => {
|
|
14533
|
+
const parsed = parseModelRef(modelRef);
|
|
14534
|
+
return {
|
|
14535
|
+
alias,
|
|
14536
|
+
model_ref: modelRef,
|
|
14537
|
+
provider: parsed.provider,
|
|
14538
|
+
model: parsed.model,
|
|
14539
|
+
default: alias === "default",
|
|
14540
|
+
capabilities: PROVIDER_CAPABILITIES[parsed.provider]
|
|
14541
|
+
};
|
|
14542
|
+
});
|
|
14543
|
+
}
|
|
14544
|
+
function providerCredentialStatus(config2, env = process.env) {
|
|
14545
|
+
return Object.keys(DEFAULT_PROVIDER_SETTINGS).map((provider) => {
|
|
14546
|
+
const settings = providerSettings(config2, provider);
|
|
14547
|
+
const configured = Boolean(env[settings.api_key_env]);
|
|
14548
|
+
return {
|
|
14549
|
+
provider,
|
|
14550
|
+
api_key_env: settings.api_key_env,
|
|
14551
|
+
configured,
|
|
14552
|
+
source: configured ? "env" : "missing",
|
|
14553
|
+
base_url: settings.base_url ?? null,
|
|
14554
|
+
default_model: settings.default_model
|
|
14555
|
+
};
|
|
14556
|
+
});
|
|
14557
|
+
}
|
|
14558
|
+
function providerStatus(config2, env = process.env) {
|
|
14559
|
+
return {
|
|
14560
|
+
default_model: resolveModelRef("default", config2),
|
|
14561
|
+
providers: providerCredentialStatus(config2, env),
|
|
14562
|
+
models: listModelRegistry(config2)
|
|
14563
|
+
};
|
|
14564
|
+
}
|
|
14565
|
+
function assertProviderCredentials(provider, config2, env = process.env) {
|
|
14566
|
+
const status = providerCredentialStatus(config2, env).find((entry) => entry.provider === provider);
|
|
14567
|
+
if (!status)
|
|
14568
|
+
throw new Error(`Unsupported AI provider: ${provider}`);
|
|
14569
|
+
if (!status.configured)
|
|
14570
|
+
throw new Error(`Missing ${status.api_key_env} for ${provider}. Set the env var to use this provider.`);
|
|
14571
|
+
return status;
|
|
14572
|
+
}
|
|
14573
|
+
|
|
14574
|
+
// src/provenance.ts
|
|
14575
|
+
function isStaleStatus(status) {
|
|
14576
|
+
return ["deleted", "stale", "invalidated", "reindex_required"].includes((status ?? "").toLowerCase());
|
|
14577
|
+
}
|
|
14578
|
+
function sourceProvenance(input) {
|
|
14579
|
+
const status = input.status ?? null;
|
|
14580
|
+
return {
|
|
14581
|
+
source_owner: "open-files",
|
|
14582
|
+
source_ref: input.source_ref ?? null,
|
|
14583
|
+
source_uri: input.source_uri ?? null,
|
|
14584
|
+
source_kind: input.source_kind ?? null,
|
|
14585
|
+
source_revision_id: input.source_revision_id ?? null,
|
|
14586
|
+
revision: input.revision ?? null,
|
|
14587
|
+
hash: input.hash ?? null,
|
|
14588
|
+
chunk_id: input.chunk_id ?? null,
|
|
14589
|
+
start_offset: input.start_offset ?? null,
|
|
14590
|
+
end_offset: input.end_offset ?? null,
|
|
14591
|
+
status,
|
|
14592
|
+
read_only: true,
|
|
14593
|
+
citation_required: true,
|
|
14594
|
+
resolver: input.resolver ?? null,
|
|
14595
|
+
stale: isStaleStatus(status)
|
|
14596
|
+
};
|
|
14597
|
+
}
|
|
14598
|
+
function generatedArtifactProvenance(input) {
|
|
14599
|
+
return {
|
|
14600
|
+
source_owner: "open-files",
|
|
14601
|
+
generated_from: input.generated_from,
|
|
14602
|
+
artifact_key: input.artifact_key,
|
|
14603
|
+
source_refs: input.source_refs ?? [],
|
|
14604
|
+
read_only_sources: true,
|
|
14605
|
+
citation_required: input.citation_required ?? true,
|
|
14606
|
+
raw_source_bytes_stored_in_open_knowledge: false
|
|
14607
|
+
};
|
|
14608
|
+
}
|
|
14609
|
+
function withProvenance(metadata, provenance) {
|
|
14610
|
+
return {
|
|
14611
|
+
...metadata,
|
|
14612
|
+
provenance
|
|
14613
|
+
};
|
|
14614
|
+
}
|
|
14615
|
+
|
|
14616
|
+
// src/embeddings.ts
|
|
14617
|
+
var DEFAULT_EMBEDDING_MODEL_REF = "openai:text-embedding-3-small";
|
|
14618
|
+
var DEFAULT_EMBEDDING_DIMENSIONS = 1536;
|
|
14619
|
+
function embeddingConfig(config2) {
|
|
14620
|
+
return config2?.embeddings ?? {};
|
|
14621
|
+
}
|
|
14622
|
+
function stableId(prefix, value) {
|
|
14623
|
+
return `${prefix}_${createHash("sha256").update(value).digest("hex").slice(0, 20)}`;
|
|
14624
|
+
}
|
|
14625
|
+
function parseJsonObject(value) {
|
|
14626
|
+
if (!value)
|
|
14627
|
+
return {};
|
|
14628
|
+
try {
|
|
14629
|
+
const parsed = JSON.parse(value);
|
|
14630
|
+
return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {};
|
|
14631
|
+
} catch {
|
|
14632
|
+
return {};
|
|
14633
|
+
}
|
|
14634
|
+
}
|
|
14635
|
+
function metadataString(metadata, keys) {
|
|
14636
|
+
for (const key of keys) {
|
|
14637
|
+
const value = metadata[key];
|
|
14638
|
+
if (typeof value === "string" && value.length > 0)
|
|
14639
|
+
return value;
|
|
14640
|
+
}
|
|
14641
|
+
return null;
|
|
14642
|
+
}
|
|
14643
|
+
function metadataNumber(metadata, keys) {
|
|
14644
|
+
for (const key of keys) {
|
|
14645
|
+
const value = metadata[key];
|
|
14646
|
+
if (typeof value === "number" && Number.isFinite(value))
|
|
14647
|
+
return value;
|
|
14648
|
+
}
|
|
14649
|
+
return null;
|
|
14650
|
+
}
|
|
14651
|
+
function vectorNorm(vector) {
|
|
14652
|
+
return Math.sqrt(vector.reduce((sum, value) => sum + value * value, 0));
|
|
14653
|
+
}
|
|
14654
|
+
function cosineSimilarity(a, b, bNorm = vectorNorm(b)) {
|
|
14655
|
+
const aNorm = vectorNorm(a);
|
|
14656
|
+
if (aNorm === 0 || bNorm === 0)
|
|
14657
|
+
return 0;
|
|
14658
|
+
const length = Math.min(a.length, b.length);
|
|
14659
|
+
let dot = 0;
|
|
14660
|
+
for (let i = 0;i < length; i += 1)
|
|
14661
|
+
dot += a[i] * b[i];
|
|
14662
|
+
return dot / (aNorm * bNorm);
|
|
14663
|
+
}
|
|
14664
|
+
function deterministicVector(text, dimensions) {
|
|
14665
|
+
const bytes = createHash("sha256").update(text).digest();
|
|
14666
|
+
return Array.from({ length: dimensions }, (_, index) => {
|
|
14667
|
+
const value = bytes[index % bytes.length] / 255;
|
|
14668
|
+
return Number((value * 2 - 1).toFixed(6));
|
|
14669
|
+
});
|
|
14670
|
+
}
|
|
14671
|
+
async function openAiEmbeddingModel(model, config2, env = process.env) {
|
|
14672
|
+
assertProviderCredentials("openai", config2, env);
|
|
14673
|
+
const settings = providerSettings(config2, "openai");
|
|
14674
|
+
const { createOpenAI } = await import("@ai-sdk/openai");
|
|
14675
|
+
const openai = createOpenAI({
|
|
14676
|
+
apiKey: env[settings.api_key_env],
|
|
14677
|
+
baseURL: settings.base_url
|
|
14678
|
+
});
|
|
14679
|
+
if (openai.embeddingModel)
|
|
14680
|
+
return openai.embeddingModel(model);
|
|
14681
|
+
if (openai.textEmbedding)
|
|
14682
|
+
return openai.textEmbedding(model);
|
|
14683
|
+
if (openai.textEmbeddingModel)
|
|
14684
|
+
return openai.textEmbeddingModel(model);
|
|
14685
|
+
throw new Error("OpenAI provider does not expose an embedding model factory.");
|
|
14686
|
+
}
|
|
14687
|
+
function resolveEmbeddingModelRef(modelRef, config2) {
|
|
14688
|
+
if (!modelRef || modelRef === "default" || modelRef === "embedding") {
|
|
14689
|
+
return embeddingConfig(config2).default_model ?? DEFAULT_EMBEDDING_MODEL_REF;
|
|
14690
|
+
}
|
|
14691
|
+
return modelRef;
|
|
14692
|
+
}
|
|
14693
|
+
async function embedTexts(texts, options = {}) {
|
|
14694
|
+
const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
|
|
14695
|
+
const parsed = parseModelRef(modelRef);
|
|
14696
|
+
if (parsed.provider !== "openai") {
|
|
14697
|
+
throw new Error(`Embedding provider ${parsed.provider} is not supported yet. Use openai:text-embedding-3-small.`);
|
|
14698
|
+
}
|
|
14699
|
+
const dimensions = options.dimensions ?? embeddingConfig(options.config).dimensions ?? DEFAULT_EMBEDDING_DIMENSIONS;
|
|
14700
|
+
if (options.fake) {
|
|
14701
|
+
return {
|
|
14702
|
+
provider: parsed.provider,
|
|
14703
|
+
model: parsed.model,
|
|
14704
|
+
dimensions,
|
|
14705
|
+
vectors: texts.map((text) => deterministicVector(text, dimensions)),
|
|
14706
|
+
usage: { input_tokens: texts.reduce((sum, text) => sum + Math.max(1, Math.ceil(text.split(/\s+/).filter(Boolean).length * 1.25)), 0) }
|
|
14707
|
+
};
|
|
14708
|
+
}
|
|
14709
|
+
const { embedMany } = await import("ai");
|
|
14710
|
+
const model = await openAiEmbeddingModel(parsed.model, options.config, options.env);
|
|
14711
|
+
const result = await embedMany({
|
|
14712
|
+
model,
|
|
14713
|
+
values: texts,
|
|
14714
|
+
maxParallelCalls: options.maxParallelCalls ?? embeddingConfig(options.config).max_parallel_calls,
|
|
14715
|
+
providerOptions: {
|
|
14716
|
+
openai: {
|
|
14717
|
+
dimensions
|
|
14718
|
+
}
|
|
14719
|
+
}
|
|
14720
|
+
});
|
|
14721
|
+
const vectors = result.embeddings;
|
|
14722
|
+
return {
|
|
14723
|
+
provider: parsed.provider,
|
|
14724
|
+
model: parsed.model,
|
|
14725
|
+
dimensions: vectors[0]?.length ?? dimensions,
|
|
14726
|
+
vectors,
|
|
14727
|
+
usage: { input_tokens: result.usage?.tokens ?? 0 }
|
|
14728
|
+
};
|
|
14729
|
+
}
|
|
14730
|
+
function selectCandidateChunks(db, options) {
|
|
14731
|
+
const baseQuery = `SELECT
|
|
14732
|
+
c.id,
|
|
14733
|
+
c.text,
|
|
14734
|
+
c.token_count,
|
|
14735
|
+
c.start_offset,
|
|
14736
|
+
c.end_offset,
|
|
14737
|
+
c.metadata_json,
|
|
14738
|
+
c.source_revision_id,
|
|
14739
|
+
sr.revision,
|
|
14740
|
+
sr.hash,
|
|
14741
|
+
s.uri AS source_uri,
|
|
14742
|
+
s.kind AS source_kind
|
|
14743
|
+
FROM chunks c
|
|
14744
|
+
LEFT JOIN source_revisions sr ON sr.id = c.source_revision_id
|
|
14745
|
+
LEFT JOIN sources s ON s.id = sr.source_id
|
|
14746
|
+
LEFT JOIN vector_index_entries v
|
|
14747
|
+
ON v.chunk_id = c.id AND v.provider = ? AND v.model = ?
|
|
14748
|
+
WHERE v.id IS NULL`;
|
|
14749
|
+
const suffix = `
|
|
14750
|
+
ORDER BY c.created_at ASC, c.ordinal ASC
|
|
14751
|
+
LIMIT ?`;
|
|
14752
|
+
if (options.sourceRevisionId) {
|
|
14753
|
+
return db.query(`${baseQuery} AND c.source_revision_id = ?${suffix}`).all(options.provider, options.model, options.sourceRevisionId, options.limit);
|
|
14754
|
+
}
|
|
14755
|
+
return db.query(`${baseQuery}${suffix}`).all(options.provider, options.model, options.limit);
|
|
14756
|
+
}
|
|
14757
|
+
function provenanceForChunk(row) {
|
|
14758
|
+
const metadata = parseJsonObject(row.metadata_json);
|
|
14759
|
+
const existing = metadata.provenance;
|
|
14760
|
+
if (existing && typeof existing === "object" && !Array.isArray(existing))
|
|
14761
|
+
return existing;
|
|
14762
|
+
return sourceProvenance({
|
|
14763
|
+
source_ref: metadataString(metadata, ["source_ref"]),
|
|
14764
|
+
source_uri: row.source_uri ?? metadataString(metadata, ["source_uri"]),
|
|
14765
|
+
source_kind: row.source_kind ?? metadataString(metadata, ["source_kind"]),
|
|
14766
|
+
source_revision_id: row.source_revision_id,
|
|
14767
|
+
revision: row.revision ?? metadataString(metadata, ["revision"]),
|
|
14768
|
+
hash: row.hash ?? metadataString(metadata, ["hash"]),
|
|
14769
|
+
chunk_id: row.id,
|
|
14770
|
+
start_offset: row.start_offset ?? metadataNumber(metadata, ["start_offset"]),
|
|
14771
|
+
end_offset: row.end_offset ?? metadataNumber(metadata, ["end_offset"]),
|
|
14772
|
+
status: metadataString(metadata, ["status"]),
|
|
14773
|
+
resolver: "open-files-read-only"
|
|
14774
|
+
});
|
|
14775
|
+
}
|
|
14776
|
+
function upsertVectors(db, rows, embedding, now) {
|
|
14777
|
+
const insertEmbedding = db.prepare(`
|
|
14778
|
+
INSERT INTO chunk_embeddings (id, chunk_id, provider, model, dimensions, vector_json, created_at)
|
|
14779
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
14780
|
+
ON CONFLICT(chunk_id, provider, model) DO UPDATE SET
|
|
14781
|
+
dimensions = excluded.dimensions,
|
|
14782
|
+
vector_json = excluded.vector_json,
|
|
14783
|
+
created_at = excluded.created_at
|
|
14784
|
+
`);
|
|
14785
|
+
const insertVector = db.prepare(`
|
|
14786
|
+
INSERT INTO vector_index_entries (
|
|
14787
|
+
id, chunk_id, source_revision_id, provider, model, dimensions, vector_json, vector_norm,
|
|
14788
|
+
source_uri, source_ref, revision, hash, start_offset, end_offset, token_count, status,
|
|
14789
|
+
metadata_json, created_at, updated_at
|
|
14790
|
+
)
|
|
14791
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
14792
|
+
ON CONFLICT(chunk_id, provider, model) DO UPDATE SET
|
|
14793
|
+
source_revision_id = excluded.source_revision_id,
|
|
14794
|
+
dimensions = excluded.dimensions,
|
|
14795
|
+
vector_json = excluded.vector_json,
|
|
14796
|
+
vector_norm = excluded.vector_norm,
|
|
14797
|
+
source_uri = excluded.source_uri,
|
|
14798
|
+
source_ref = excluded.source_ref,
|
|
14799
|
+
revision = excluded.revision,
|
|
14800
|
+
hash = excluded.hash,
|
|
14801
|
+
start_offset = excluded.start_offset,
|
|
14802
|
+
end_offset = excluded.end_offset,
|
|
14803
|
+
token_count = excluded.token_count,
|
|
14804
|
+
status = excluded.status,
|
|
14805
|
+
metadata_json = excluded.metadata_json,
|
|
14806
|
+
updated_at = excluded.updated_at
|
|
14807
|
+
`);
|
|
14808
|
+
const write = db.transaction(() => {
|
|
14809
|
+
for (let index = 0;index < rows.length; index += 1) {
|
|
14810
|
+
const row = rows[index];
|
|
14811
|
+
const vector = embedding.vectors[index];
|
|
14812
|
+
if (!vector)
|
|
14813
|
+
continue;
|
|
14814
|
+
const metadata = parseJsonObject(row.metadata_json);
|
|
14815
|
+
const provenance = provenanceForChunk(row);
|
|
14816
|
+
const sourceRef = provenance.source_ref ?? metadataString(metadata, ["source_ref"]);
|
|
14817
|
+
const sourceUri = provenance.source_uri ?? row.source_uri ?? metadataString(metadata, ["source_uri"]);
|
|
14818
|
+
const revision = provenance.revision ?? row.revision ?? metadataString(metadata, ["revision"]);
|
|
14819
|
+
const hash2 = provenance.hash ?? row.hash ?? metadataString(metadata, ["hash"]);
|
|
14820
|
+
const status = provenance.status ?? metadataString(metadata, ["status"]) ?? "active";
|
|
14821
|
+
const vectorJson = JSON.stringify(vector);
|
|
14822
|
+
insertEmbedding.run(stableId("emb", `${row.id}\x00${embedding.provider}\x00${embedding.model}`), row.id, embedding.provider, embedding.model, embedding.dimensions, vectorJson, now);
|
|
14823
|
+
insertVector.run(stableId("vec", `${row.id}\x00${embedding.provider}\x00${embedding.model}`), row.id, row.source_revision_id, embedding.provider, embedding.model, embedding.dimensions, vectorJson, vectorNorm(vector), sourceUri, sourceRef, revision, hash2, provenance.start_offset, provenance.end_offset, row.token_count, status, JSON.stringify({
|
|
14824
|
+
...metadata,
|
|
14825
|
+
provenance,
|
|
14826
|
+
embedded_at: now
|
|
14827
|
+
}), now, now);
|
|
14828
|
+
}
|
|
14829
|
+
});
|
|
14830
|
+
write();
|
|
14831
|
+
return rows.length;
|
|
14832
|
+
}
|
|
14833
|
+
async function indexKnowledgeEmbeddings(options) {
|
|
14834
|
+
const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
|
|
14835
|
+
const parsed = parseModelRef(modelRef);
|
|
14836
|
+
if (parsed.provider !== "openai")
|
|
14837
|
+
throw new Error(`Embedding provider ${parsed.provider} is not supported yet.`);
|
|
14838
|
+
const now = (options.now ?? new Date).toISOString();
|
|
14839
|
+
const limit = Math.max(1, Math.min(options.limit ?? 100, 1000));
|
|
14840
|
+
migrateKnowledgeDb(options.dbPath);
|
|
14841
|
+
const readDb = openKnowledgeDb(options.dbPath);
|
|
14842
|
+
let rows;
|
|
14843
|
+
try {
|
|
14844
|
+
rows = selectCandidateChunks(readDb, {
|
|
14845
|
+
provider: parsed.provider,
|
|
14846
|
+
model: parsed.model,
|
|
14847
|
+
limit,
|
|
14848
|
+
sourceRevisionId: options.sourceRevisionId
|
|
14849
|
+
});
|
|
14850
|
+
} finally {
|
|
14851
|
+
readDb.close();
|
|
14852
|
+
}
|
|
14853
|
+
if (rows.length === 0) {
|
|
14854
|
+
return {
|
|
14855
|
+
provider: parsed.provider,
|
|
14856
|
+
model: parsed.model,
|
|
14857
|
+
dimensions: options.dimensions ?? embeddingConfig(options.config).dimensions ?? DEFAULT_EMBEDDING_DIMENSIONS,
|
|
14858
|
+
chunks_seen: 0,
|
|
14859
|
+
chunks_embedded: 0,
|
|
14860
|
+
embeddings_upserted: 0,
|
|
14861
|
+
vector_entries_upserted: 0,
|
|
14862
|
+
usage: { input_tokens: 0 }
|
|
14863
|
+
};
|
|
14864
|
+
}
|
|
14865
|
+
const embedding = await embedTexts(rows.map((row) => row.text), options);
|
|
14866
|
+
const writeDb = openKnowledgeDb(options.dbPath);
|
|
14867
|
+
try {
|
|
14868
|
+
const upserted = upsertVectors(writeDb, rows, embedding, now);
|
|
14869
|
+
return {
|
|
14870
|
+
provider: embedding.provider,
|
|
14871
|
+
model: embedding.model,
|
|
14872
|
+
dimensions: embedding.dimensions,
|
|
14873
|
+
chunks_seen: rows.length,
|
|
14874
|
+
chunks_embedded: rows.length,
|
|
14875
|
+
embeddings_upserted: upserted,
|
|
14876
|
+
vector_entries_upserted: upserted,
|
|
14877
|
+
usage: embedding.usage
|
|
14878
|
+
};
|
|
14879
|
+
} finally {
|
|
14880
|
+
writeDb.close();
|
|
14881
|
+
}
|
|
14882
|
+
}
|
|
14883
|
+
function embeddingIndexStatus(dbPath) {
|
|
14884
|
+
migrateKnowledgeDb(dbPath);
|
|
14885
|
+
const db = openKnowledgeDb(dbPath);
|
|
14886
|
+
try {
|
|
14887
|
+
const totalEmbeddings = db.query("SELECT COUNT(*) AS n FROM chunk_embeddings").get()?.n ?? 0;
|
|
14888
|
+
const totalVectorEntries = db.query("SELECT COUNT(*) AS n FROM vector_index_entries").get()?.n ?? 0;
|
|
14889
|
+
const indexes = db.query(`SELECT provider, model, dimensions, COUNT(*) AS entries, MAX(updated_at) AS updated_at
|
|
14890
|
+
FROM vector_index_entries
|
|
14891
|
+
GROUP BY provider, model, dimensions
|
|
14892
|
+
ORDER BY provider, model`).all();
|
|
14893
|
+
return {
|
|
14894
|
+
total_embeddings: totalEmbeddings,
|
|
14895
|
+
total_vector_entries: totalVectorEntries,
|
|
14896
|
+
indexes
|
|
14897
|
+
};
|
|
14898
|
+
} finally {
|
|
14899
|
+
db.close();
|
|
14900
|
+
}
|
|
14901
|
+
}
|
|
14902
|
+
async function searchVectorIndex(options) {
|
|
14903
|
+
const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
|
|
14904
|
+
const parsed = parseModelRef(modelRef);
|
|
14905
|
+
const limit = Math.max(1, Math.min(options.limit ?? 10, 100));
|
|
14906
|
+
const embedded = await embedTexts([options.query], options);
|
|
14907
|
+
const queryVector = embedded.vectors[0] ?? [];
|
|
14908
|
+
migrateKnowledgeDb(options.dbPath);
|
|
14909
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
14910
|
+
try {
|
|
14911
|
+
const rows = db.query(`SELECT
|
|
14912
|
+
v.chunk_id,
|
|
14913
|
+
c.text,
|
|
14914
|
+
v.vector_json,
|
|
14915
|
+
v.vector_norm,
|
|
14916
|
+
v.source_uri,
|
|
14917
|
+
v.source_ref,
|
|
14918
|
+
v.revision,
|
|
14919
|
+
v.hash,
|
|
14920
|
+
v.metadata_json
|
|
14921
|
+
FROM vector_index_entries v
|
|
14922
|
+
JOIN chunks c ON c.id = v.chunk_id
|
|
14923
|
+
WHERE v.provider = ? AND v.model = ? AND v.status = 'active'`).all(parsed.provider, parsed.model);
|
|
14924
|
+
const scored = rows.map((row) => {
|
|
14925
|
+
const vector = JSON.parse(row.vector_json);
|
|
14926
|
+
const metadata = parseJsonObject(row.metadata_json);
|
|
14927
|
+
const provenance = metadata.provenance && typeof metadata.provenance === "object" && !Array.isArray(metadata.provenance) ? metadata.provenance : null;
|
|
14928
|
+
return {
|
|
14929
|
+
chunk_id: row.chunk_id,
|
|
14930
|
+
score: cosineSimilarity(queryVector, vector, row.vector_norm),
|
|
14931
|
+
text: row.text,
|
|
14932
|
+
source_uri: row.source_uri,
|
|
14933
|
+
source_ref: row.source_ref,
|
|
14934
|
+
revision: row.revision,
|
|
14935
|
+
hash: row.hash,
|
|
14936
|
+
provenance
|
|
14937
|
+
};
|
|
14938
|
+
}).sort((a, b) => b.score - a.score).slice(0, limit);
|
|
14939
|
+
return {
|
|
14940
|
+
provider: parsed.provider,
|
|
14941
|
+
model: parsed.model,
|
|
14942
|
+
dimensions: embedded.dimensions,
|
|
14943
|
+
query: options.query,
|
|
14944
|
+
results: scored
|
|
14945
|
+
};
|
|
14946
|
+
} finally {
|
|
14947
|
+
db.close();
|
|
14948
|
+
}
|
|
14949
|
+
}
|
|
14950
|
+
|
|
14951
|
+
// src/outbox-consume.ts
|
|
14952
|
+
import { createHash as createHash3, randomUUID as randomUUID3 } from "crypto";
|
|
14953
|
+
import { existsSync as existsSync4, readFileSync as readFileSync4 } from "fs";
|
|
14954
|
+
import { basename } from "path";
|
|
14955
|
+
|
|
14402
14956
|
// src/safety.ts
|
|
14403
|
-
import { createHash, randomUUID as randomUUID2 } from "crypto";
|
|
14957
|
+
import { createHash as createHash2, randomUUID as randomUUID2 } from "crypto";
|
|
14404
14958
|
import { relative as relative2, resolve as resolve2, sep as sep2 } from "path";
|
|
14405
14959
|
function envEnabled(name) {
|
|
14406
14960
|
const value = process.env[name];
|
|
@@ -14495,7 +15049,7 @@ function redactSecrets(text, policy) {
|
|
|
14495
15049
|
return { text: output, findings };
|
|
14496
15050
|
}
|
|
14497
15051
|
function auditId(input) {
|
|
14498
|
-
return `audit_${
|
|
15052
|
+
return `audit_${createHash2("sha256").update(`${input.event_type}\x00${input.action}\x00${input.target_uri ?? ""}\x00${input.created_at ?? ""}\x00${JSON.stringify(input.metadata ?? {})}\x00${randomUUID2()}`).digest("hex").slice(0, 24)}`;
|
|
14499
15053
|
}
|
|
14500
15054
|
function recordAuditEvent(db, input) {
|
|
14501
15055
|
const createdAt = input.created_at ?? new Date().toISOString();
|
|
@@ -14530,8 +15084,8 @@ function recordRedactionFindings(db, input) {
|
|
|
14530
15084
|
}
|
|
14531
15085
|
|
|
14532
15086
|
// src/outbox-consume.ts
|
|
14533
|
-
function
|
|
14534
|
-
return `${prefix}_${
|
|
15087
|
+
function stableId2(prefix, value) {
|
|
15088
|
+
return `${prefix}_${createHash3("sha256").update(value).digest("hex").slice(0, 20)}`;
|
|
14535
15089
|
}
|
|
14536
15090
|
function asObject(value) {
|
|
14537
15091
|
return value && typeof value === "object" && !Array.isArray(value) ? value : undefined;
|
|
@@ -14685,7 +15239,7 @@ function mergeJson(existing, patch) {
|
|
|
14685
15239
|
return JSON.stringify({ ...base, ...patch });
|
|
14686
15240
|
}
|
|
14687
15241
|
function ensureSource(db, event, now) {
|
|
14688
|
-
const id =
|
|
15242
|
+
const id = stableId2("src", event.sourceUri);
|
|
14689
15243
|
db.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
|
|
14690
15244
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
14691
15245
|
ON CONFLICT(uri) DO UPDATE SET
|
|
@@ -14726,7 +15280,7 @@ function ensureSource(db, event, now) {
|
|
|
14726
15280
|
function ensureRevision(db, sourceId, event, now) {
|
|
14727
15281
|
if (!event.revision)
|
|
14728
15282
|
return null;
|
|
14729
|
-
const id =
|
|
15283
|
+
const id = stableId2("rev", `${sourceId}\x00${event.revision}`);
|
|
14730
15284
|
const metadata = {
|
|
14731
15285
|
source_ref: event.sourceRef,
|
|
14732
15286
|
source_uri: event.sourceUri,
|
|
@@ -14754,16 +15308,20 @@ function revisionIdsForEvent(db, sourceId, event) {
|
|
|
14754
15308
|
function invalidateRevision(db, revisionId) {
|
|
14755
15309
|
const chunks = db.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(revisionId);
|
|
14756
15310
|
let embeddingsDeleted = 0;
|
|
15311
|
+
let vectorEntriesDeleted = 0;
|
|
14757
15312
|
for (const chunk of chunks) {
|
|
14758
15313
|
const row = db.query("SELECT COUNT(*) AS n FROM chunk_embeddings WHERE chunk_id = ?").get(chunk.id);
|
|
14759
15314
|
embeddingsDeleted += row?.n ?? 0;
|
|
15315
|
+
const vectorRow = db.query("SELECT COUNT(*) AS n FROM vector_index_entries WHERE chunk_id = ?").get(chunk.id);
|
|
15316
|
+
vectorEntriesDeleted += vectorRow?.n ?? 0;
|
|
15317
|
+
db.run("DELETE FROM vector_index_entries WHERE chunk_id = ?", [chunk.id]);
|
|
14760
15318
|
db.run("DELETE FROM chunk_embeddings WHERE chunk_id = ?", [chunk.id]);
|
|
14761
15319
|
db.run("DELETE FROM chunks_fts WHERE chunk_id = ?", [chunk.id]);
|
|
14762
15320
|
}
|
|
14763
15321
|
db.run("DELETE FROM chunks WHERE source_revision_id = ?", [revisionId]);
|
|
14764
15322
|
const revision = db.query("SELECT metadata_json FROM source_revisions WHERE id = ?").get(revisionId);
|
|
14765
15323
|
db.run("UPDATE source_revisions SET metadata_json = ? WHERE id = ?", [mergeJson(revision?.metadata_json, { reindex_required: true, invalidated_at: new Date().toISOString() }), revisionId]);
|
|
14766
|
-
return { chunksDeleted: chunks.length, embeddingsDeleted };
|
|
15324
|
+
return { chunksDeleted: chunks.length, embeddingsDeleted, vectorEntriesDeleted };
|
|
14767
15325
|
}
|
|
14768
15326
|
function isDeleteEvent(eventType2, status) {
|
|
14769
15327
|
return status === "deleted" || ["delete", "deleted", "remove", "removed"].includes(eventType2);
|
|
@@ -14801,6 +15359,7 @@ async function consumeOpenFilesOutbox(options) {
|
|
|
14801
15359
|
const revisionsTouched = new Set;
|
|
14802
15360
|
let chunksDeleted = 0;
|
|
14803
15361
|
let embeddingsDeleted = 0;
|
|
15362
|
+
let vectorEntriesDeleted = 0;
|
|
14804
15363
|
let staleRevisions = 0;
|
|
14805
15364
|
let deletedSources = 0;
|
|
14806
15365
|
let movedSources = 0;
|
|
@@ -14826,6 +15385,7 @@ async function consumeOpenFilesOutbox(options) {
|
|
|
14826
15385
|
const invalidation = invalidateRevision(db, revisionId);
|
|
14827
15386
|
chunksDeleted += invalidation.chunksDeleted;
|
|
14828
15387
|
embeddingsDeleted += invalidation.embeddingsDeleted;
|
|
15388
|
+
vectorEntriesDeleted += invalidation.vectorEntriesDeleted;
|
|
14829
15389
|
staleRevisions += 1;
|
|
14830
15390
|
}
|
|
14831
15391
|
if (isDeleteEvent(event.eventType, event.status))
|
|
@@ -14836,7 +15396,7 @@ async function consumeOpenFilesOutbox(options) {
|
|
|
14836
15396
|
permissionUpdates += 1;
|
|
14837
15397
|
db.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
|
|
14838
15398
|
VALUES (?, ?, ?, ?, ?, ?)`, [
|
|
14839
|
-
|
|
15399
|
+
stableId2("evt", `${runId}\x00${index}\x00${event.sourceRef}\x00${event.eventType}`),
|
|
14840
15400
|
runId,
|
|
14841
15401
|
"info",
|
|
14842
15402
|
event.eventType,
|
|
@@ -14853,7 +15413,7 @@ async function consumeOpenFilesOutbox(options) {
|
|
|
14853
15413
|
});
|
|
14854
15414
|
db.run(`INSERT INTO provider_usage (id, run_id, provider, model, input_tokens, output_tokens, cost_usd, metadata_json, created_at)
|
|
14855
15415
|
VALUES (?, ?, ?, ?, 0, 0, 0, ?, ?)`, [
|
|
14856
|
-
|
|
15416
|
+
stableId2("usage", runId),
|
|
14857
15417
|
runId,
|
|
14858
15418
|
"local",
|
|
14859
15419
|
"open-files-outbox",
|
|
@@ -14871,7 +15431,8 @@ async function consumeOpenFilesOutbox(options) {
|
|
|
14871
15431
|
sources: sourcesTouched.size,
|
|
14872
15432
|
revisions: revisionsTouched.size,
|
|
14873
15433
|
chunks_deleted: chunksDeleted,
|
|
14874
|
-
embeddings_deleted: embeddingsDeleted
|
|
15434
|
+
embeddings_deleted: embeddingsDeleted,
|
|
15435
|
+
vector_entries_deleted: vectorEntriesDeleted
|
|
14875
15436
|
},
|
|
14876
15437
|
created_at: now
|
|
14877
15438
|
});
|
|
@@ -14884,6 +15445,7 @@ async function consumeOpenFilesOutbox(options) {
|
|
|
14884
15445
|
revisions_touched: revisionsTouched.size,
|
|
14885
15446
|
chunks_deleted: chunksDeleted,
|
|
14886
15447
|
embeddings_deleted: embeddingsDeleted,
|
|
15448
|
+
vector_entries_deleted: vectorEntriesDeleted,
|
|
14887
15449
|
stale_revisions: staleRevisions,
|
|
14888
15450
|
deleted_sources: deletedSources,
|
|
14889
15451
|
moved_sources: movedSources,
|
|
@@ -14896,11 +15458,11 @@ async function consumeOpenFilesOutbox(options) {
|
|
|
14896
15458
|
}
|
|
14897
15459
|
|
|
14898
15460
|
// src/manifest-ingest.ts
|
|
14899
|
-
import { createHash as
|
|
15461
|
+
import { createHash as createHash4 } from "crypto";
|
|
14900
15462
|
import { existsSync as existsSync5, readFileSync as readFileSync5 } from "fs";
|
|
14901
15463
|
import { basename as basename2 } from "path";
|
|
14902
|
-
function
|
|
14903
|
-
return `${prefix}_${
|
|
15464
|
+
function stableId3(prefix, value) {
|
|
15465
|
+
return `${prefix}_${createHash4("sha256").update(value).digest("hex").slice(0, 20)}`;
|
|
14904
15466
|
}
|
|
14905
15467
|
function asObject2(value) {
|
|
14906
15468
|
return value && typeof value === "object" && !Array.isArray(value) ? value : undefined;
|
|
@@ -15120,7 +15682,7 @@ function deleteChunksForRevision(db, sourceRevisionId) {
|
|
|
15120
15682
|
return rows.length;
|
|
15121
15683
|
}
|
|
15122
15684
|
function upsertSource(db, item, now) {
|
|
15123
|
-
const sourceId =
|
|
15685
|
+
const sourceId = stableId3("src", item.sourceUri);
|
|
15124
15686
|
db.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
|
|
15125
15687
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
15126
15688
|
ON CONFLICT(uri) DO UPDATE SET
|
|
@@ -15144,7 +15706,7 @@ function upsertSource(db, item, now) {
|
|
|
15144
15706
|
return row.id;
|
|
15145
15707
|
}
|
|
15146
15708
|
function upsertRevision(db, sourceId, item, now) {
|
|
15147
|
-
const revisionId =
|
|
15709
|
+
const revisionId = stableId3("rev", `${sourceId}\x00${item.revision}`);
|
|
15148
15710
|
db.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
|
|
15149
15711
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
15150
15712
|
ON CONFLICT(source_id, revision) DO UPDATE SET
|
|
@@ -15186,16 +15748,32 @@ function insertChunks(db, sourceRevisionId, item, now, maxChars, overlapChars, s
|
|
|
15186
15748
|
}
|
|
15187
15749
|
const chunks = chunkText(redacted.text, maxChars, overlapChars);
|
|
15188
15750
|
for (const chunk of chunks) {
|
|
15189
|
-
const chunkId =
|
|
15190
|
-
const
|
|
15751
|
+
const chunkId = stableId3("chk", `${sourceRevisionId}\x00${chunk.ordinal}\x00${chunk.text}`);
|
|
15752
|
+
const provenance = sourceProvenance({
|
|
15753
|
+
source_ref: item.sourceRef,
|
|
15754
|
+
source_uri: item.sourceUri,
|
|
15755
|
+
source_kind: item.kind,
|
|
15756
|
+
source_revision_id: sourceRevisionId,
|
|
15757
|
+
revision: item.revision,
|
|
15758
|
+
hash: item.hash,
|
|
15759
|
+
chunk_id: chunkId,
|
|
15760
|
+
start_offset: chunk.startOffset,
|
|
15761
|
+
end_offset: chunk.endOffset,
|
|
15762
|
+
status: item.status,
|
|
15763
|
+
resolver: "open-files-read-only"
|
|
15764
|
+
});
|
|
15765
|
+
const metadata = withProvenance({
|
|
15191
15766
|
source_ref: item.sourceRef,
|
|
15192
15767
|
source_uri: item.sourceUri,
|
|
15768
|
+
source_kind: item.kind,
|
|
15769
|
+
source_revision_id: sourceRevisionId,
|
|
15770
|
+
revision: item.revision,
|
|
15193
15771
|
hash: item.hash,
|
|
15194
15772
|
status: item.status,
|
|
15195
15773
|
path: asString2(item.raw.path) ?? null,
|
|
15196
15774
|
mime: asString2(item.raw.mime) ?? asString2(item.raw.content_type) ?? null,
|
|
15197
15775
|
size: asNumber(item.raw.size) ?? null
|
|
15198
|
-
};
|
|
15776
|
+
}, provenance);
|
|
15199
15777
|
db.run(`INSERT INTO chunks (id, source_revision_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
|
|
15200
15778
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
|
|
15201
15779
|
chunkId,
|
|
@@ -15298,12 +15876,12 @@ async function ingestOpenFilesManifestItems(options) {
|
|
|
15298
15876
|
}
|
|
15299
15877
|
|
|
15300
15878
|
// src/source-ingest.ts
|
|
15301
|
-
import { createHash as
|
|
15879
|
+
import { createHash as createHash5 } from "crypto";
|
|
15302
15880
|
import { existsSync as existsSync6, readFileSync as readFileSync6 } from "fs";
|
|
15303
15881
|
import { basename as basename3 } from "path";
|
|
15304
15882
|
|
|
15305
15883
|
// src/source-resolver.ts
|
|
15306
|
-
function
|
|
15884
|
+
function parseJsonObject2(value) {
|
|
15307
15885
|
if (!value)
|
|
15308
15886
|
return {};
|
|
15309
15887
|
try {
|
|
@@ -15313,7 +15891,7 @@ function parseJsonObject(value) {
|
|
|
15313
15891
|
return {};
|
|
15314
15892
|
}
|
|
15315
15893
|
}
|
|
15316
|
-
function
|
|
15894
|
+
function metadataString2(metadata, keys) {
|
|
15317
15895
|
for (const key of keys) {
|
|
15318
15896
|
const value = metadata[key];
|
|
15319
15897
|
if (typeof value === "string" && value.length > 0)
|
|
@@ -15321,7 +15899,7 @@ function metadataString(metadata, keys) {
|
|
|
15321
15899
|
}
|
|
15322
15900
|
return null;
|
|
15323
15901
|
}
|
|
15324
|
-
function
|
|
15902
|
+
function metadataNumber2(metadata, keys) {
|
|
15325
15903
|
for (const key of keys) {
|
|
15326
15904
|
const value = metadata[key];
|
|
15327
15905
|
if (typeof value === "number" && Number.isFinite(value))
|
|
@@ -15446,8 +16024,8 @@ async function resolveOpenFilesSource(options) {
|
|
|
15446
16024
|
citations: []
|
|
15447
16025
|
};
|
|
15448
16026
|
}
|
|
15449
|
-
const sourceMetadata =
|
|
15450
|
-
const permissions =
|
|
16027
|
+
const sourceMetadata = parseJsonObject2(source.metadata_json);
|
|
16028
|
+
const permissions = parseJsonObject2(source.acl_json);
|
|
15451
16029
|
try {
|
|
15452
16030
|
assertPurposeAllowed(permissions, purpose);
|
|
15453
16031
|
} catch (error48) {
|
|
@@ -15467,27 +16045,40 @@ async function resolveOpenFilesSource(options) {
|
|
|
15467
16045
|
throw error48;
|
|
15468
16046
|
}
|
|
15469
16047
|
const revision = selectRevision(db, source.id, requestedRevision);
|
|
15470
|
-
const revisionMetadata =
|
|
16048
|
+
const revisionMetadata = parseJsonObject2(revision?.metadata_json);
|
|
15471
16049
|
const totalChunks = countChunks(db, revision?.id ?? null);
|
|
15472
16050
|
const rows = selectChunks(db, revision?.id ?? null, limit);
|
|
15473
16051
|
const effectiveSourceRef = sourceRevisionRef(source.uri, revision, options.sourceRef);
|
|
15474
16052
|
const chunks = rows.map((row) => {
|
|
15475
|
-
const metadata =
|
|
16053
|
+
const metadata = parseJsonObject2(row.metadata_json);
|
|
15476
16054
|
const evidence = {
|
|
15477
16055
|
resolver: "open-files-read-only",
|
|
15478
16056
|
mode: "local_catalog",
|
|
15479
16057
|
purpose,
|
|
15480
16058
|
read_only: true,
|
|
15481
|
-
source_ref:
|
|
16059
|
+
source_ref: metadataString2(metadata, ["source_ref"]) ?? effectiveSourceRef,
|
|
15482
16060
|
source_uri: source.uri,
|
|
15483
16061
|
source_revision_id: revision?.id ?? null,
|
|
15484
16062
|
revision: revision?.revision ?? null,
|
|
15485
|
-
hash: revision?.hash ??
|
|
16063
|
+
hash: revision?.hash ?? metadataString2(metadata, ["hash"]),
|
|
15486
16064
|
chunk_id: row.id,
|
|
15487
16065
|
start_offset: row.start_offset,
|
|
15488
16066
|
end_offset: row.end_offset,
|
|
15489
16067
|
resolved_at: resolvedAt
|
|
15490
16068
|
};
|
|
16069
|
+
const provenance = sourceProvenance({
|
|
16070
|
+
source_ref: evidence.source_ref,
|
|
16071
|
+
source_uri: evidence.source_uri,
|
|
16072
|
+
source_kind: source.kind,
|
|
16073
|
+
source_revision_id: evidence.source_revision_id,
|
|
16074
|
+
revision: evidence.revision,
|
|
16075
|
+
hash: evidence.hash,
|
|
16076
|
+
chunk_id: row.id,
|
|
16077
|
+
start_offset: row.start_offset,
|
|
16078
|
+
end_offset: row.end_offset,
|
|
16079
|
+
status: metadataString2(metadata, ["status"]),
|
|
16080
|
+
resolver: evidence.resolver
|
|
16081
|
+
});
|
|
15491
16082
|
return {
|
|
15492
16083
|
id: row.id,
|
|
15493
16084
|
kind: row.kind,
|
|
@@ -15497,7 +16088,8 @@ async function resolveOpenFilesSource(options) {
|
|
|
15497
16088
|
start_offset: row.start_offset,
|
|
15498
16089
|
end_offset: row.end_offset,
|
|
15499
16090
|
metadata,
|
|
15500
|
-
evidence
|
|
16091
|
+
evidence,
|
|
16092
|
+
provenance
|
|
15501
16093
|
};
|
|
15502
16094
|
});
|
|
15503
16095
|
const citations = chunks.map((chunk) => ({
|
|
@@ -15507,7 +16099,8 @@ async function resolveOpenFilesSource(options) {
|
|
|
15507
16099
|
quote: chunk.text.slice(0, 500),
|
|
15508
16100
|
start_offset: chunk.start_offset,
|
|
15509
16101
|
end_offset: chunk.end_offset,
|
|
15510
|
-
evidence: chunk.evidence
|
|
16102
|
+
evidence: chunk.evidence,
|
|
16103
|
+
provenance: chunk.provenance
|
|
15511
16104
|
}));
|
|
15512
16105
|
recordAuditEvent(db, {
|
|
15513
16106
|
event_type: "source_read",
|
|
@@ -15524,8 +16117,8 @@ async function resolveOpenFilesSource(options) {
|
|
|
15524
16117
|
},
|
|
15525
16118
|
created_at: resolvedAt
|
|
15526
16119
|
});
|
|
15527
|
-
const mime =
|
|
15528
|
-
const size =
|
|
16120
|
+
const mime = metadataString2(sourceMetadata, ["mime", "content_type"]) ?? metadataString2(revisionMetadata, ["mime", "content_type"]);
|
|
16121
|
+
const size = metadataNumber2(sourceMetadata, ["size", "size_bytes"]) ?? metadataNumber2(revisionMetadata, ["size", "size_bytes"]);
|
|
15529
16122
|
return {
|
|
15530
16123
|
source_ref: effectiveSourceRef,
|
|
15531
16124
|
source_uri: source.uri,
|
|
@@ -15558,12 +16151,12 @@ async function resolveOpenFilesSource(options) {
|
|
|
15558
16151
|
content: {
|
|
15559
16152
|
mime,
|
|
15560
16153
|
size,
|
|
15561
|
-
hash: revision?.hash ??
|
|
16154
|
+
hash: revision?.hash ?? metadataString2(sourceMetadata, ["hash", "checksum", "sha256"]),
|
|
15562
16155
|
text_available: totalChunks > 0,
|
|
15563
16156
|
chunks_total: totalChunks,
|
|
15564
16157
|
chunks_returned: chunks.length,
|
|
15565
16158
|
char_count_returned: chunks.reduce((sum, chunk) => sum + chunk.text.length, 0),
|
|
15566
|
-
extracted_text_ref: revision?.extracted_text_uri ??
|
|
16159
|
+
extracted_text_ref: revision?.extracted_text_uri ?? metadataString2(revisionMetadata, ["extracted_text_ref", "extracted_text_uri"]),
|
|
15567
16160
|
bytes_available: false,
|
|
15568
16161
|
bytes_exposed: false
|
|
15569
16162
|
},
|
|
@@ -15578,7 +16171,7 @@ async function resolveOpenFilesSource(options) {
|
|
|
15578
16171
|
|
|
15579
16172
|
// src/source-ingest.ts
|
|
15580
16173
|
function sha256Text(text) {
|
|
15581
|
-
return `sha256:${
|
|
16174
|
+
return `sha256:${createHash5("sha256").update(text).digest("hex")}`;
|
|
15582
16175
|
}
|
|
15583
16176
|
function stripHtml(html) {
|
|
15584
16177
|
return html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ").replace(/<[^>]+>/g, " ").replace(/ /g, " ").replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/\s+\n/g, `
|
|
@@ -15800,131 +16393,8 @@ async function ingestSourceRef(options) {
|
|
|
15800
16393
|
};
|
|
15801
16394
|
}
|
|
15802
16395
|
|
|
15803
|
-
// src/providers.ts
|
|
15804
|
-
var DEFAULT_PROVIDER_SETTINGS = {
|
|
15805
|
-
openai: {
|
|
15806
|
-
api_key_env: "OPENAI_API_KEY",
|
|
15807
|
-
default_model: "gpt-5.2"
|
|
15808
|
-
},
|
|
15809
|
-
anthropic: {
|
|
15810
|
-
api_key_env: "ANTHROPIC_API_KEY",
|
|
15811
|
-
default_model: "claude-sonnet-4-6"
|
|
15812
|
-
},
|
|
15813
|
-
deepseek: {
|
|
15814
|
-
api_key_env: "DEEPSEEK_API_KEY",
|
|
15815
|
-
default_model: "deepseek-chat"
|
|
15816
|
-
}
|
|
15817
|
-
};
|
|
15818
|
-
var PROVIDER_CAPABILITIES = {
|
|
15819
|
-
openai: {
|
|
15820
|
-
text_generation: true,
|
|
15821
|
-
structured_output: true,
|
|
15822
|
-
tool_usage: true,
|
|
15823
|
-
tool_streaming: true,
|
|
15824
|
-
image_input: true,
|
|
15825
|
-
native_web_search: true,
|
|
15826
|
-
reasoning: true,
|
|
15827
|
-
embeddings: true
|
|
15828
|
-
},
|
|
15829
|
-
anthropic: {
|
|
15830
|
-
text_generation: true,
|
|
15831
|
-
structured_output: true,
|
|
15832
|
-
tool_usage: true,
|
|
15833
|
-
tool_streaming: true,
|
|
15834
|
-
image_input: true,
|
|
15835
|
-
native_web_search: false,
|
|
15836
|
-
reasoning: true,
|
|
15837
|
-
embeddings: false
|
|
15838
|
-
},
|
|
15839
|
-
deepseek: {
|
|
15840
|
-
text_generation: true,
|
|
15841
|
-
structured_output: true,
|
|
15842
|
-
tool_usage: true,
|
|
15843
|
-
tool_streaming: true,
|
|
15844
|
-
image_input: false,
|
|
15845
|
-
native_web_search: false,
|
|
15846
|
-
reasoning: true,
|
|
15847
|
-
embeddings: false
|
|
15848
|
-
}
|
|
15849
|
-
};
|
|
15850
|
-
var BUILTIN_ALIASES = {
|
|
15851
|
-
default: "openai:gpt-5.2",
|
|
15852
|
-
fast: "openai:gpt-5-mini",
|
|
15853
|
-
reasoning: "anthropic:claude-opus-4-6",
|
|
15854
|
-
sonnet: "anthropic:claude-sonnet-4-6",
|
|
15855
|
-
deepseek: "deepseek:deepseek-chat",
|
|
15856
|
-
"deepseek-reasoning": "deepseek:deepseek-reasoner"
|
|
15857
|
-
};
|
|
15858
|
-
function providerConfig(config2) {
|
|
15859
|
-
return config2.providers ?? {};
|
|
15860
|
-
}
|
|
15861
|
-
function providerSettings(config2, provider) {
|
|
15862
|
-
const configured = providerConfig(config2)[provider] ?? {};
|
|
15863
|
-
return {
|
|
15864
|
-
...DEFAULT_PROVIDER_SETTINGS[provider],
|
|
15865
|
-
...configured
|
|
15866
|
-
};
|
|
15867
|
-
}
|
|
15868
|
-
function modelAliases(config2) {
|
|
15869
|
-
const configured = providerConfig(config2);
|
|
15870
|
-
return {
|
|
15871
|
-
...BUILTIN_ALIASES,
|
|
15872
|
-
...configured.default_model ? { default: configured.default_model } : {},
|
|
15873
|
-
...configured.aliases ?? {}
|
|
15874
|
-
};
|
|
15875
|
-
}
|
|
15876
|
-
function parseModelRef(modelRef) {
|
|
15877
|
-
const [provider, ...rest] = modelRef.split(":");
|
|
15878
|
-
const model = rest.join(":");
|
|
15879
|
-
if (provider !== "openai" && provider !== "anthropic" && provider !== "deepseek") {
|
|
15880
|
-
throw new Error(`Unsupported AI provider: ${provider}`);
|
|
15881
|
-
}
|
|
15882
|
-
if (!model)
|
|
15883
|
-
throw new Error(`Invalid model ref: ${modelRef}. Expected provider:model.`);
|
|
15884
|
-
return { provider, model };
|
|
15885
|
-
}
|
|
15886
|
-
function resolveModelRef(aliasOrRef, config2) {
|
|
15887
|
-
const aliases = modelAliases(config2);
|
|
15888
|
-
return aliases[aliasOrRef] ?? aliasOrRef;
|
|
15889
|
-
}
|
|
15890
|
-
function listModelRegistry(config2) {
|
|
15891
|
-
const aliases = modelAliases(config2);
|
|
15892
|
-
return Object.entries(aliases).map(([alias, modelRef]) => {
|
|
15893
|
-
const parsed = parseModelRef(modelRef);
|
|
15894
|
-
return {
|
|
15895
|
-
alias,
|
|
15896
|
-
model_ref: modelRef,
|
|
15897
|
-
provider: parsed.provider,
|
|
15898
|
-
model: parsed.model,
|
|
15899
|
-
default: alias === "default",
|
|
15900
|
-
capabilities: PROVIDER_CAPABILITIES[parsed.provider]
|
|
15901
|
-
};
|
|
15902
|
-
});
|
|
15903
|
-
}
|
|
15904
|
-
function providerCredentialStatus(config2, env = process.env) {
|
|
15905
|
-
return Object.keys(DEFAULT_PROVIDER_SETTINGS).map((provider) => {
|
|
15906
|
-
const settings = providerSettings(config2, provider);
|
|
15907
|
-
const configured = Boolean(env[settings.api_key_env]);
|
|
15908
|
-
return {
|
|
15909
|
-
provider,
|
|
15910
|
-
api_key_env: settings.api_key_env,
|
|
15911
|
-
configured,
|
|
15912
|
-
source: configured ? "env" : "missing",
|
|
15913
|
-
base_url: settings.base_url ?? null,
|
|
15914
|
-
default_model: settings.default_model
|
|
15915
|
-
};
|
|
15916
|
-
});
|
|
15917
|
-
}
|
|
15918
|
-
function providerStatus(config2, env = process.env) {
|
|
15919
|
-
return {
|
|
15920
|
-
default_model: resolveModelRef("default", config2),
|
|
15921
|
-
providers: providerCredentialStatus(config2, env),
|
|
15922
|
-
models: listModelRegistry(config2)
|
|
15923
|
-
};
|
|
15924
|
-
}
|
|
15925
|
-
|
|
15926
16396
|
// src/storage-contract.ts
|
|
15927
|
-
import { createHash as
|
|
16397
|
+
import { createHash as createHash6, randomUUID as randomUUID4 } from "crypto";
|
|
15928
16398
|
var GENERATED_ARTIFACTS = [
|
|
15929
16399
|
{
|
|
15930
16400
|
kind: "schema",
|
|
@@ -15960,7 +16430,7 @@ var GENERATED_ARTIFACTS = [
|
|
|
15960
16430
|
function hashArtifactBody(body) {
|
|
15961
16431
|
const bytes = typeof body === "string" ? Buffer.from(body) : Buffer.from(body);
|
|
15962
16432
|
return {
|
|
15963
|
-
hash: `sha256:${
|
|
16433
|
+
hash: `sha256:${createHash6("sha256").update(bytes).digest("hex")}`,
|
|
15964
16434
|
size_bytes: bytes.byteLength
|
|
15965
16435
|
};
|
|
15966
16436
|
}
|
|
@@ -16095,12 +16565,16 @@ function recordStorageObjects(db, objects, now = new Date) {
|
|
|
16095
16565
|
}
|
|
16096
16566
|
|
|
16097
16567
|
// src/wiki-layout.ts
|
|
16568
|
+
import { createHash as createHash7 } from "crypto";
|
|
16098
16569
|
function todayParts(now) {
|
|
16099
16570
|
const year = String(now.getUTCFullYear());
|
|
16100
16571
|
const month = String(now.getUTCMonth() + 1).padStart(2, "0");
|
|
16101
16572
|
const day = String(now.getUTCDate()).padStart(2, "0");
|
|
16102
16573
|
return { year, month, day };
|
|
16103
16574
|
}
|
|
16575
|
+
function stableId4(prefix, value) {
|
|
16576
|
+
return `${prefix}_${createHash7("sha256").update(value).digest("hex").slice(0, 20)}`;
|
|
16577
|
+
}
|
|
16104
16578
|
function agentSchemaTemplate() {
|
|
16105
16579
|
return `# Knowledge Agent Schema v1
|
|
16106
16580
|
|
|
@@ -16182,6 +16656,13 @@ async function initializeWikiLayout(store, now = new Date) {
|
|
|
16182
16656
|
uri: result.uri,
|
|
16183
16657
|
kind: artifactKindForKey(entry.key),
|
|
16184
16658
|
content_type: entry.content_type,
|
|
16659
|
+
metadata: {
|
|
16660
|
+
provenance: generatedArtifactProvenance({
|
|
16661
|
+
generated_from: "wiki_layout_init",
|
|
16662
|
+
artifact_key: entry.key,
|
|
16663
|
+
citation_required: entry.key.startsWith("wiki/") || entry.key.startsWith("indexes/")
|
|
16664
|
+
})
|
|
16665
|
+
},
|
|
16185
16666
|
...hashArtifactBody(entry.body)
|
|
16186
16667
|
};
|
|
16187
16668
|
}));
|
|
@@ -16194,6 +16675,66 @@ async function initializeWikiLayout(store, now = new Date) {
|
|
|
16194
16675
|
written: [schemaKey, rootIndexKey, wikiReadmeKey, logKey]
|
|
16195
16676
|
};
|
|
16196
16677
|
}
|
|
16678
|
+
function provenanceFor(artifact) {
|
|
16679
|
+
const existing = artifact.metadata?.provenance;
|
|
16680
|
+
if (existing && typeof existing === "object" && !Array.isArray(existing)) {
|
|
16681
|
+
return existing;
|
|
16682
|
+
}
|
|
16683
|
+
return generatedArtifactProvenance({
|
|
16684
|
+
generated_from: "wiki_layout_init",
|
|
16685
|
+
artifact_key: artifact.key
|
|
16686
|
+
});
|
|
16687
|
+
}
|
|
16688
|
+
function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
|
|
16689
|
+
const timestamp = now.toISOString();
|
|
16690
|
+
const rootIndex = artifacts.find((artifact) => artifact.key.endsWith("indexes/root.md"));
|
|
16691
|
+
const wikiReadme = artifacts.find((artifact) => artifact.key.endsWith("wiki/README.md"));
|
|
16692
|
+
if (rootIndex) {
|
|
16693
|
+
db.run(`INSERT INTO knowledge_indexes (id, kind, name, artifact_uri, shard_key, metadata_json, created_at, updated_at)
|
|
16694
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
16695
|
+
ON CONFLICT(kind, name, shard_key) DO UPDATE SET
|
|
16696
|
+
artifact_uri = excluded.artifact_uri,
|
|
16697
|
+
metadata_json = excluded.metadata_json,
|
|
16698
|
+
updated_at = excluded.updated_at`, [
|
|
16699
|
+
stableId4("idx", "root:indexes/root.md"),
|
|
16700
|
+
"root",
|
|
16701
|
+
"root",
|
|
16702
|
+
rootIndex.uri,
|
|
16703
|
+
"root",
|
|
16704
|
+
JSON.stringify({
|
|
16705
|
+
artifact_key: rootIndex.key,
|
|
16706
|
+
content_hash: rootIndex.hash ?? null,
|
|
16707
|
+
provenance: provenanceFor(rootIndex)
|
|
16708
|
+
}),
|
|
16709
|
+
timestamp,
|
|
16710
|
+
timestamp
|
|
16711
|
+
]);
|
|
16712
|
+
}
|
|
16713
|
+
if (wikiReadme) {
|
|
16714
|
+
db.run(`INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
|
|
16715
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
16716
|
+
ON CONFLICT(path) DO UPDATE SET
|
|
16717
|
+
title = excluded.title,
|
|
16718
|
+
artifact_uri = excluded.artifact_uri,
|
|
16719
|
+
content_hash = excluded.content_hash,
|
|
16720
|
+
status = excluded.status,
|
|
16721
|
+
metadata_json = excluded.metadata_json,
|
|
16722
|
+
updated_at = excluded.updated_at`, [
|
|
16723
|
+
stableId4("wiki", "wiki/README.md"),
|
|
16724
|
+
"wiki/README.md",
|
|
16725
|
+
"Wiki",
|
|
16726
|
+
wikiReadme.uri,
|
|
16727
|
+
wikiReadme.hash ?? null,
|
|
16728
|
+
"active",
|
|
16729
|
+
JSON.stringify({
|
|
16730
|
+
artifact_key: wikiReadme.key,
|
|
16731
|
+
provenance: provenanceFor(wikiReadme)
|
|
16732
|
+
}),
|
|
16733
|
+
timestamp,
|
|
16734
|
+
timestamp
|
|
16735
|
+
]);
|
|
16736
|
+
}
|
|
16737
|
+
}
|
|
16197
16738
|
|
|
16198
16739
|
// src/service.ts
|
|
16199
16740
|
class KnowledgeService {
|
|
@@ -16270,6 +16811,7 @@ class KnowledgeService {
|
|
|
16270
16811
|
const db = openKnowledgeDb(workspace.knowledgeDbPath);
|
|
16271
16812
|
try {
|
|
16272
16813
|
recordStorageObjects(db, result.artifacts);
|
|
16814
|
+
recordWikiLayoutCatalog(db, result.artifacts);
|
|
16273
16815
|
} finally {
|
|
16274
16816
|
db.close();
|
|
16275
16817
|
}
|
|
@@ -16319,6 +16861,26 @@ class KnowledgeService {
|
|
|
16319
16861
|
modelRegistry() {
|
|
16320
16862
|
return listModelRegistry(this.config());
|
|
16321
16863
|
}
|
|
16864
|
+
embeddingStatus() {
|
|
16865
|
+
const workspace = this.ensureWorkspace();
|
|
16866
|
+
return embeddingIndexStatus(workspace.knowledgeDbPath);
|
|
16867
|
+
}
|
|
16868
|
+
async indexEmbeddings(options = {}) {
|
|
16869
|
+
const workspace = this.ensureWorkspace();
|
|
16870
|
+
return indexKnowledgeEmbeddings({
|
|
16871
|
+
...options,
|
|
16872
|
+
dbPath: workspace.knowledgeDbPath,
|
|
16873
|
+
config: this.config()
|
|
16874
|
+
});
|
|
16875
|
+
}
|
|
16876
|
+
async semanticSearch(options) {
|
|
16877
|
+
const workspace = this.ensureWorkspace();
|
|
16878
|
+
return searchVectorIndex({
|
|
16879
|
+
...options,
|
|
16880
|
+
dbPath: workspace.knowledgeDbPath,
|
|
16881
|
+
config: this.config()
|
|
16882
|
+
});
|
|
16883
|
+
}
|
|
16322
16884
|
}
|
|
16323
16885
|
function createKnowledgeService(options = {}) {
|
|
16324
16886
|
return new KnowledgeService(options);
|
|
@@ -16433,6 +16995,41 @@ function buildServer() {
|
|
|
16433
16995
|
const service = createKnowledgeService({ scope });
|
|
16434
16996
|
return jsonText({ ok: true, models: service.modelRegistry() });
|
|
16435
16997
|
});
|
|
16998
|
+
registerTool(server, "ok_embeddings_status", "Embedding index status", "Inspect local embedding/vector index counts by provider and model", {
|
|
16999
|
+
scope: scopeField
|
|
17000
|
+
}, async ({ scope }) => {
|
|
17001
|
+
const service = createKnowledgeService({ scope });
|
|
17002
|
+
return jsonText({ ok: true, ...service.embeddingStatus() });
|
|
17003
|
+
});
|
|
17004
|
+
registerTool(server, "ok_embeddings_index", "Index embeddings", "Embed unindexed knowledge chunks into the local vector index", {
|
|
17005
|
+
scope: scopeField,
|
|
17006
|
+
limit: exports_external.number().optional().describe("Maximum chunks to embed"),
|
|
17007
|
+
model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
|
|
17008
|
+
dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
|
|
17009
|
+
fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
|
|
17010
|
+
}, async ({ scope, limit, model, dimensions, fake }) => {
|
|
17011
|
+
const service = createKnowledgeService({ scope });
|
|
17012
|
+
try {
|
|
17013
|
+
return jsonText({ ok: true, ...await service.indexEmbeddings({ limit, modelRef: model, dimensions, fake }) });
|
|
17014
|
+
} catch (error48) {
|
|
17015
|
+
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
17016
|
+
}
|
|
17017
|
+
});
|
|
17018
|
+
registerTool(server, "ok_semantic_search", "Semantic search", "Search the local vector index and return cited chunks with provenance", {
|
|
17019
|
+
scope: scopeField,
|
|
17020
|
+
query: exports_external.string().describe("Semantic query"),
|
|
17021
|
+
limit: exports_external.number().optional().describe("Maximum results"),
|
|
17022
|
+
model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
|
|
17023
|
+
dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
|
|
17024
|
+
fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
|
|
17025
|
+
}, async ({ scope, query, limit, model, dimensions, fake }) => {
|
|
17026
|
+
const service = createKnowledgeService({ scope });
|
|
17027
|
+
try {
|
|
17028
|
+
return jsonText({ ok: true, ...await service.semanticSearch({ query, limit, modelRef: model, dimensions, fake }) });
|
|
17029
|
+
} catch (error48) {
|
|
17030
|
+
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
17031
|
+
}
|
|
17032
|
+
});
|
|
16436
17033
|
registerTool(server, "ok_add", "Add a knowledge item", "Add a new item to the knowledge store", {
|
|
16437
17034
|
title: exports_external.string().describe("Item title"),
|
|
16438
17035
|
content: exports_external.string().describe("Item content/body"),
|