@hasna/knowledge 0.2.13 → 0.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -3
- package/bin/open-knowledge-mcp.js +1106 -227
- package/bin/open-knowledge.js +192 -35
- package/docs/architecture/ai-native-knowledge-base.md +11 -0
- package/docs/architecture/hybrid-semantic-search.md +31 -8
- package/package.json +1 -1
- package/src/cli.ts +68 -4
- package/src/embeddings.ts +516 -0
- package/src/knowledge-db.ts +39 -1
- package/src/mcp.js +55 -0
- package/src/outbox-consume.ts +11 -2
- package/src/search.ts +510 -0
- package/src/service.ts +40 -0
- package/src/wiki-layout.ts +41 -1
- package/src/workspace.ts +12 -0
|
@@ -13660,7 +13660,7 @@ import { existsSync as existsSync7, readFileSync as readFileSync7, writeFileSync
|
|
|
13660
13660
|
// package.json
|
|
13661
13661
|
var package_default = {
|
|
13662
13662
|
name: "@hasna/knowledge",
|
|
13663
|
-
version: "0.2.
|
|
13663
|
+
version: "0.2.15",
|
|
13664
13664
|
description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
|
|
13665
13665
|
type: "module",
|
|
13666
13666
|
bin: {
|
|
@@ -13790,6 +13790,12 @@ function defaultKnowledgeConfig() {
|
|
|
13790
13790
|
default_model: "deepseek-chat"
|
|
13791
13791
|
}
|
|
13792
13792
|
},
|
|
13793
|
+
embeddings: {
|
|
13794
|
+
default_model: "openai:text-embedding-3-small",
|
|
13795
|
+
dimensions: 1536,
|
|
13796
|
+
batch_size: 64,
|
|
13797
|
+
max_parallel_calls: 4
|
|
13798
|
+
},
|
|
13793
13799
|
safety: {
|
|
13794
13800
|
network: {
|
|
13795
13801
|
web_search_enabled: false,
|
|
@@ -14128,10 +14134,8 @@ function createArtifactStore(config2, workspace) {
|
|
|
14128
14134
|
return new LocalArtifactStore(workspace.artifactsDir);
|
|
14129
14135
|
}
|
|
14130
14136
|
|
|
14131
|
-
// src/
|
|
14132
|
-
import { createHash
|
|
14133
|
-
import { existsSync as existsSync4, readFileSync as readFileSync4 } from "fs";
|
|
14134
|
-
import { basename } from "path";
|
|
14137
|
+
// src/embeddings.ts
|
|
14138
|
+
import { createHash } from "crypto";
|
|
14135
14139
|
|
|
14136
14140
|
// src/knowledge-db.ts
|
|
14137
14141
|
import { Database } from "bun:sqlite";
|
|
@@ -14349,6 +14353,38 @@ CREATE INDEX IF NOT EXISTS idx_approval_gates_status ON approval_gates(status);
|
|
|
14349
14353
|
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
14350
14354
|
VALUES (3, datetime('now'));
|
|
14351
14355
|
`;
|
|
14356
|
+
var MIGRATION_4 = `
|
|
14357
|
+
CREATE TABLE IF NOT EXISTS vector_index_entries (
|
|
14358
|
+
id TEXT PRIMARY KEY,
|
|
14359
|
+
chunk_id TEXT NOT NULL REFERENCES chunks(id) ON DELETE CASCADE,
|
|
14360
|
+
source_revision_id TEXT REFERENCES source_revisions(id) ON DELETE CASCADE,
|
|
14361
|
+
provider TEXT NOT NULL,
|
|
14362
|
+
model TEXT NOT NULL,
|
|
14363
|
+
dimensions INTEGER NOT NULL,
|
|
14364
|
+
vector_json TEXT NOT NULL,
|
|
14365
|
+
vector_norm REAL NOT NULL,
|
|
14366
|
+
source_uri TEXT,
|
|
14367
|
+
source_ref TEXT,
|
|
14368
|
+
revision TEXT,
|
|
14369
|
+
hash TEXT,
|
|
14370
|
+
start_offset INTEGER,
|
|
14371
|
+
end_offset INTEGER,
|
|
14372
|
+
token_count INTEGER,
|
|
14373
|
+
status TEXT NOT NULL DEFAULT 'active',
|
|
14374
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
14375
|
+
created_at TEXT NOT NULL,
|
|
14376
|
+
updated_at TEXT NOT NULL,
|
|
14377
|
+
UNIQUE(chunk_id, provider, model)
|
|
14378
|
+
);
|
|
14379
|
+
|
|
14380
|
+
CREATE INDEX IF NOT EXISTS idx_vector_index_provider_model ON vector_index_entries(provider, model);
|
|
14381
|
+
CREATE INDEX IF NOT EXISTS idx_vector_index_source_revision ON vector_index_entries(source_revision_id);
|
|
14382
|
+
CREATE INDEX IF NOT EXISTS idx_vector_index_source_uri ON vector_index_entries(source_uri);
|
|
14383
|
+
CREATE INDEX IF NOT EXISTS idx_vector_index_status ON vector_index_entries(status);
|
|
14384
|
+
|
|
14385
|
+
INSERT OR IGNORE INTO schema_versions(version, applied_at)
|
|
14386
|
+
VALUES (4, datetime('now'));
|
|
14387
|
+
`;
|
|
14352
14388
|
function openKnowledgeDb(path) {
|
|
14353
14389
|
ensureParentDir(path);
|
|
14354
14390
|
const db = new Database(path);
|
|
@@ -14364,6 +14400,8 @@ function migrateKnowledgeDb(path) {
|
|
|
14364
14400
|
db.exec(MIGRATION_2);
|
|
14365
14401
|
if (getSchemaVersion(db) < 3)
|
|
14366
14402
|
db.exec(MIGRATION_3);
|
|
14403
|
+
if (getSchemaVersion(db) < 4)
|
|
14404
|
+
db.exec(MIGRATION_4);
|
|
14367
14405
|
return { path, schema_version: getSchemaVersion(db) };
|
|
14368
14406
|
} finally {
|
|
14369
14407
|
db.close();
|
|
@@ -14393,15 +14431,530 @@ function getKnowledgeDbStats(path) {
|
|
|
14393
14431
|
redaction_findings: count(db, "redaction_findings"),
|
|
14394
14432
|
audit_events: count(db, "audit_events"),
|
|
14395
14433
|
approval_gates: count(db, "approval_gates"),
|
|
14396
|
-
storage_objects: count(db, "storage_objects")
|
|
14434
|
+
storage_objects: count(db, "storage_objects"),
|
|
14435
|
+
embeddings: count(db, "chunk_embeddings"),
|
|
14436
|
+
vector_entries: count(db, "vector_index_entries")
|
|
14437
|
+
};
|
|
14438
|
+
} finally {
|
|
14439
|
+
db.close();
|
|
14440
|
+
}
|
|
14441
|
+
}
|
|
14442
|
+
|
|
14443
|
+
// src/providers.ts
|
|
14444
|
+
var DEFAULT_PROVIDER_SETTINGS = {
|
|
14445
|
+
openai: {
|
|
14446
|
+
api_key_env: "OPENAI_API_KEY",
|
|
14447
|
+
default_model: "gpt-5.2"
|
|
14448
|
+
},
|
|
14449
|
+
anthropic: {
|
|
14450
|
+
api_key_env: "ANTHROPIC_API_KEY",
|
|
14451
|
+
default_model: "claude-sonnet-4-6"
|
|
14452
|
+
},
|
|
14453
|
+
deepseek: {
|
|
14454
|
+
api_key_env: "DEEPSEEK_API_KEY",
|
|
14455
|
+
default_model: "deepseek-chat"
|
|
14456
|
+
}
|
|
14457
|
+
};
|
|
14458
|
+
var PROVIDER_CAPABILITIES = {
|
|
14459
|
+
openai: {
|
|
14460
|
+
text_generation: true,
|
|
14461
|
+
structured_output: true,
|
|
14462
|
+
tool_usage: true,
|
|
14463
|
+
tool_streaming: true,
|
|
14464
|
+
image_input: true,
|
|
14465
|
+
native_web_search: true,
|
|
14466
|
+
reasoning: true,
|
|
14467
|
+
embeddings: true
|
|
14468
|
+
},
|
|
14469
|
+
anthropic: {
|
|
14470
|
+
text_generation: true,
|
|
14471
|
+
structured_output: true,
|
|
14472
|
+
tool_usage: true,
|
|
14473
|
+
tool_streaming: true,
|
|
14474
|
+
image_input: true,
|
|
14475
|
+
native_web_search: false,
|
|
14476
|
+
reasoning: true,
|
|
14477
|
+
embeddings: false
|
|
14478
|
+
},
|
|
14479
|
+
deepseek: {
|
|
14480
|
+
text_generation: true,
|
|
14481
|
+
structured_output: true,
|
|
14482
|
+
tool_usage: true,
|
|
14483
|
+
tool_streaming: true,
|
|
14484
|
+
image_input: false,
|
|
14485
|
+
native_web_search: false,
|
|
14486
|
+
reasoning: true,
|
|
14487
|
+
embeddings: false
|
|
14488
|
+
}
|
|
14489
|
+
};
|
|
14490
|
+
var BUILTIN_ALIASES = {
|
|
14491
|
+
default: "openai:gpt-5.2",
|
|
14492
|
+
fast: "openai:gpt-5-mini",
|
|
14493
|
+
reasoning: "anthropic:claude-opus-4-6",
|
|
14494
|
+
sonnet: "anthropic:claude-sonnet-4-6",
|
|
14495
|
+
deepseek: "deepseek:deepseek-chat",
|
|
14496
|
+
"deepseek-reasoning": "deepseek:deepseek-reasoner"
|
|
14497
|
+
};
|
|
14498
|
+
function providerConfig(config2) {
|
|
14499
|
+
return config2.providers ?? {};
|
|
14500
|
+
}
|
|
14501
|
+
function providerSettings(config2, provider) {
|
|
14502
|
+
const configured = providerConfig(config2)[provider] ?? {};
|
|
14503
|
+
return {
|
|
14504
|
+
...DEFAULT_PROVIDER_SETTINGS[provider],
|
|
14505
|
+
...configured
|
|
14506
|
+
};
|
|
14507
|
+
}
|
|
14508
|
+
function modelAliases(config2) {
|
|
14509
|
+
const configured = providerConfig(config2);
|
|
14510
|
+
return {
|
|
14511
|
+
...BUILTIN_ALIASES,
|
|
14512
|
+
...configured.default_model ? { default: configured.default_model } : {},
|
|
14513
|
+
...configured.aliases ?? {}
|
|
14514
|
+
};
|
|
14515
|
+
}
|
|
14516
|
+
function parseModelRef(modelRef) {
|
|
14517
|
+
const [provider, ...rest] = modelRef.split(":");
|
|
14518
|
+
const model = rest.join(":");
|
|
14519
|
+
if (provider !== "openai" && provider !== "anthropic" && provider !== "deepseek") {
|
|
14520
|
+
throw new Error(`Unsupported AI provider: ${provider}`);
|
|
14521
|
+
}
|
|
14522
|
+
if (!model)
|
|
14523
|
+
throw new Error(`Invalid model ref: ${modelRef}. Expected provider:model.`);
|
|
14524
|
+
return { provider, model };
|
|
14525
|
+
}
|
|
14526
|
+
function resolveModelRef(aliasOrRef, config2) {
|
|
14527
|
+
const aliases = modelAliases(config2);
|
|
14528
|
+
return aliases[aliasOrRef] ?? aliasOrRef;
|
|
14529
|
+
}
|
|
14530
|
+
function listModelRegistry(config2) {
|
|
14531
|
+
const aliases = modelAliases(config2);
|
|
14532
|
+
return Object.entries(aliases).map(([alias, modelRef]) => {
|
|
14533
|
+
const parsed = parseModelRef(modelRef);
|
|
14534
|
+
return {
|
|
14535
|
+
alias,
|
|
14536
|
+
model_ref: modelRef,
|
|
14537
|
+
provider: parsed.provider,
|
|
14538
|
+
model: parsed.model,
|
|
14539
|
+
default: alias === "default",
|
|
14540
|
+
capabilities: PROVIDER_CAPABILITIES[parsed.provider]
|
|
14541
|
+
};
|
|
14542
|
+
});
|
|
14543
|
+
}
|
|
14544
|
+
function providerCredentialStatus(config2, env = process.env) {
|
|
14545
|
+
return Object.keys(DEFAULT_PROVIDER_SETTINGS).map((provider) => {
|
|
14546
|
+
const settings = providerSettings(config2, provider);
|
|
14547
|
+
const configured = Boolean(env[settings.api_key_env]);
|
|
14548
|
+
return {
|
|
14549
|
+
provider,
|
|
14550
|
+
api_key_env: settings.api_key_env,
|
|
14551
|
+
configured,
|
|
14552
|
+
source: configured ? "env" : "missing",
|
|
14553
|
+
base_url: settings.base_url ?? null,
|
|
14554
|
+
default_model: settings.default_model
|
|
14555
|
+
};
|
|
14556
|
+
});
|
|
14557
|
+
}
|
|
14558
|
+
function providerStatus(config2, env = process.env) {
|
|
14559
|
+
return {
|
|
14560
|
+
default_model: resolveModelRef("default", config2),
|
|
14561
|
+
providers: providerCredentialStatus(config2, env),
|
|
14562
|
+
models: listModelRegistry(config2)
|
|
14563
|
+
};
|
|
14564
|
+
}
|
|
14565
|
+
function assertProviderCredentials(provider, config2, env = process.env) {
|
|
14566
|
+
const status = providerCredentialStatus(config2, env).find((entry) => entry.provider === provider);
|
|
14567
|
+
if (!status)
|
|
14568
|
+
throw new Error(`Unsupported AI provider: ${provider}`);
|
|
14569
|
+
if (!status.configured)
|
|
14570
|
+
throw new Error(`Missing ${status.api_key_env} for ${provider}. Set the env var to use this provider.`);
|
|
14571
|
+
return status;
|
|
14572
|
+
}
|
|
14573
|
+
|
|
14574
|
+
// src/provenance.ts
|
|
14575
|
+
function isStaleStatus(status) {
|
|
14576
|
+
return ["deleted", "stale", "invalidated", "reindex_required"].includes((status ?? "").toLowerCase());
|
|
14577
|
+
}
|
|
14578
|
+
function sourceProvenance(input) {
|
|
14579
|
+
const status = input.status ?? null;
|
|
14580
|
+
return {
|
|
14581
|
+
source_owner: "open-files",
|
|
14582
|
+
source_ref: input.source_ref ?? null,
|
|
14583
|
+
source_uri: input.source_uri ?? null,
|
|
14584
|
+
source_kind: input.source_kind ?? null,
|
|
14585
|
+
source_revision_id: input.source_revision_id ?? null,
|
|
14586
|
+
revision: input.revision ?? null,
|
|
14587
|
+
hash: input.hash ?? null,
|
|
14588
|
+
chunk_id: input.chunk_id ?? null,
|
|
14589
|
+
start_offset: input.start_offset ?? null,
|
|
14590
|
+
end_offset: input.end_offset ?? null,
|
|
14591
|
+
status,
|
|
14592
|
+
read_only: true,
|
|
14593
|
+
citation_required: true,
|
|
14594
|
+
resolver: input.resolver ?? null,
|
|
14595
|
+
stale: isStaleStatus(status)
|
|
14596
|
+
};
|
|
14597
|
+
}
|
|
14598
|
+
function generatedArtifactProvenance(input) {
|
|
14599
|
+
return {
|
|
14600
|
+
source_owner: "open-files",
|
|
14601
|
+
generated_from: input.generated_from,
|
|
14602
|
+
artifact_key: input.artifact_key,
|
|
14603
|
+
source_refs: input.source_refs ?? [],
|
|
14604
|
+
read_only_sources: true,
|
|
14605
|
+
citation_required: input.citation_required ?? true,
|
|
14606
|
+
raw_source_bytes_stored_in_open_knowledge: false
|
|
14607
|
+
};
|
|
14608
|
+
}
|
|
14609
|
+
function withProvenance(metadata, provenance) {
|
|
14610
|
+
return {
|
|
14611
|
+
...metadata,
|
|
14612
|
+
provenance
|
|
14613
|
+
};
|
|
14614
|
+
}
|
|
14615
|
+
|
|
14616
|
+
// src/embeddings.ts
|
|
14617
|
+
var DEFAULT_EMBEDDING_MODEL_REF = "openai:text-embedding-3-small";
|
|
14618
|
+
var DEFAULT_EMBEDDING_DIMENSIONS = 1536;
|
|
14619
|
+
function embeddingConfig(config2) {
|
|
14620
|
+
return config2?.embeddings ?? {};
|
|
14621
|
+
}
|
|
14622
|
+
function stableId(prefix, value) {
|
|
14623
|
+
return `${prefix}_${createHash("sha256").update(value).digest("hex").slice(0, 20)}`;
|
|
14624
|
+
}
|
|
14625
|
+
function parseJsonObject(value) {
|
|
14626
|
+
if (!value)
|
|
14627
|
+
return {};
|
|
14628
|
+
try {
|
|
14629
|
+
const parsed = JSON.parse(value);
|
|
14630
|
+
return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {};
|
|
14631
|
+
} catch {
|
|
14632
|
+
return {};
|
|
14633
|
+
}
|
|
14634
|
+
}
|
|
14635
|
+
function metadataString(metadata, keys) {
|
|
14636
|
+
for (const key of keys) {
|
|
14637
|
+
const value = metadata[key];
|
|
14638
|
+
if (typeof value === "string" && value.length > 0)
|
|
14639
|
+
return value;
|
|
14640
|
+
}
|
|
14641
|
+
return null;
|
|
14642
|
+
}
|
|
14643
|
+
function metadataNumber(metadata, keys) {
|
|
14644
|
+
for (const key of keys) {
|
|
14645
|
+
const value = metadata[key];
|
|
14646
|
+
if (typeof value === "number" && Number.isFinite(value))
|
|
14647
|
+
return value;
|
|
14648
|
+
}
|
|
14649
|
+
return null;
|
|
14650
|
+
}
|
|
14651
|
+
function vectorNorm(vector) {
|
|
14652
|
+
return Math.sqrt(vector.reduce((sum, value) => sum + value * value, 0));
|
|
14653
|
+
}
|
|
14654
|
+
function cosineSimilarity(a, b, bNorm = vectorNorm(b)) {
|
|
14655
|
+
const aNorm = vectorNorm(a);
|
|
14656
|
+
if (aNorm === 0 || bNorm === 0)
|
|
14657
|
+
return 0;
|
|
14658
|
+
const length = Math.min(a.length, b.length);
|
|
14659
|
+
let dot = 0;
|
|
14660
|
+
for (let i = 0;i < length; i += 1)
|
|
14661
|
+
dot += a[i] * b[i];
|
|
14662
|
+
return dot / (aNorm * bNorm);
|
|
14663
|
+
}
|
|
14664
|
+
function deterministicVector(text, dimensions) {
|
|
14665
|
+
const bytes = createHash("sha256").update(text).digest();
|
|
14666
|
+
return Array.from({ length: dimensions }, (_, index) => {
|
|
14667
|
+
const value = bytes[index % bytes.length] / 255;
|
|
14668
|
+
return Number((value * 2 - 1).toFixed(6));
|
|
14669
|
+
});
|
|
14670
|
+
}
|
|
14671
|
+
async function openAiEmbeddingModel(model, config2, env = process.env) {
|
|
14672
|
+
assertProviderCredentials("openai", config2, env);
|
|
14673
|
+
const settings = providerSettings(config2, "openai");
|
|
14674
|
+
const { createOpenAI } = await import("@ai-sdk/openai");
|
|
14675
|
+
const openai = createOpenAI({
|
|
14676
|
+
apiKey: env[settings.api_key_env],
|
|
14677
|
+
baseURL: settings.base_url
|
|
14678
|
+
});
|
|
14679
|
+
if (openai.embeddingModel)
|
|
14680
|
+
return openai.embeddingModel(model);
|
|
14681
|
+
if (openai.textEmbedding)
|
|
14682
|
+
return openai.textEmbedding(model);
|
|
14683
|
+
if (openai.textEmbeddingModel)
|
|
14684
|
+
return openai.textEmbeddingModel(model);
|
|
14685
|
+
throw new Error("OpenAI provider does not expose an embedding model factory.");
|
|
14686
|
+
}
|
|
14687
|
+
function resolveEmbeddingModelRef(modelRef, config2) {
|
|
14688
|
+
if (!modelRef || modelRef === "default" || modelRef === "embedding") {
|
|
14689
|
+
return embeddingConfig(config2).default_model ?? DEFAULT_EMBEDDING_MODEL_REF;
|
|
14690
|
+
}
|
|
14691
|
+
return modelRef;
|
|
14692
|
+
}
|
|
14693
|
+
async function embedTexts(texts, options = {}) {
|
|
14694
|
+
const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
|
|
14695
|
+
const parsed = parseModelRef(modelRef);
|
|
14696
|
+
if (parsed.provider !== "openai") {
|
|
14697
|
+
throw new Error(`Embedding provider ${parsed.provider} is not supported yet. Use openai:text-embedding-3-small.`);
|
|
14698
|
+
}
|
|
14699
|
+
const dimensions = options.dimensions ?? embeddingConfig(options.config).dimensions ?? DEFAULT_EMBEDDING_DIMENSIONS;
|
|
14700
|
+
if (options.fake) {
|
|
14701
|
+
return {
|
|
14702
|
+
provider: parsed.provider,
|
|
14703
|
+
model: parsed.model,
|
|
14704
|
+
dimensions,
|
|
14705
|
+
vectors: texts.map((text) => deterministicVector(text, dimensions)),
|
|
14706
|
+
usage: { input_tokens: texts.reduce((sum, text) => sum + Math.max(1, Math.ceil(text.split(/\s+/).filter(Boolean).length * 1.25)), 0) }
|
|
14707
|
+
};
|
|
14708
|
+
}
|
|
14709
|
+
const { embedMany } = await import("ai");
|
|
14710
|
+
const model = await openAiEmbeddingModel(parsed.model, options.config, options.env);
|
|
14711
|
+
const result = await embedMany({
|
|
14712
|
+
model,
|
|
14713
|
+
values: texts,
|
|
14714
|
+
maxParallelCalls: options.maxParallelCalls ?? embeddingConfig(options.config).max_parallel_calls,
|
|
14715
|
+
providerOptions: {
|
|
14716
|
+
openai: {
|
|
14717
|
+
dimensions
|
|
14718
|
+
}
|
|
14719
|
+
}
|
|
14720
|
+
});
|
|
14721
|
+
const vectors = result.embeddings;
|
|
14722
|
+
return {
|
|
14723
|
+
provider: parsed.provider,
|
|
14724
|
+
model: parsed.model,
|
|
14725
|
+
dimensions: vectors[0]?.length ?? dimensions,
|
|
14726
|
+
vectors,
|
|
14727
|
+
usage: { input_tokens: result.usage?.tokens ?? 0 }
|
|
14728
|
+
};
|
|
14729
|
+
}
|
|
14730
|
+
function selectCandidateChunks(db, options) {
|
|
14731
|
+
const baseQuery = `SELECT
|
|
14732
|
+
c.id,
|
|
14733
|
+
c.text,
|
|
14734
|
+
c.token_count,
|
|
14735
|
+
c.start_offset,
|
|
14736
|
+
c.end_offset,
|
|
14737
|
+
c.metadata_json,
|
|
14738
|
+
c.source_revision_id,
|
|
14739
|
+
sr.revision,
|
|
14740
|
+
sr.hash,
|
|
14741
|
+
s.uri AS source_uri,
|
|
14742
|
+
s.kind AS source_kind
|
|
14743
|
+
FROM chunks c
|
|
14744
|
+
LEFT JOIN source_revisions sr ON sr.id = c.source_revision_id
|
|
14745
|
+
LEFT JOIN sources s ON s.id = sr.source_id
|
|
14746
|
+
LEFT JOIN vector_index_entries v
|
|
14747
|
+
ON v.chunk_id = c.id AND v.provider = ? AND v.model = ?
|
|
14748
|
+
WHERE v.id IS NULL`;
|
|
14749
|
+
const suffix = `
|
|
14750
|
+
ORDER BY c.created_at ASC, c.ordinal ASC
|
|
14751
|
+
LIMIT ?`;
|
|
14752
|
+
if (options.sourceRevisionId) {
|
|
14753
|
+
return db.query(`${baseQuery} AND c.source_revision_id = ?${suffix}`).all(options.provider, options.model, options.sourceRevisionId, options.limit);
|
|
14754
|
+
}
|
|
14755
|
+
return db.query(`${baseQuery}${suffix}`).all(options.provider, options.model, options.limit);
|
|
14756
|
+
}
|
|
14757
|
+
function provenanceForChunk(row) {
|
|
14758
|
+
const metadata = parseJsonObject(row.metadata_json);
|
|
14759
|
+
const existing = metadata.provenance;
|
|
14760
|
+
if (existing && typeof existing === "object" && !Array.isArray(existing))
|
|
14761
|
+
return existing;
|
|
14762
|
+
return sourceProvenance({
|
|
14763
|
+
source_ref: metadataString(metadata, ["source_ref"]),
|
|
14764
|
+
source_uri: row.source_uri ?? metadataString(metadata, ["source_uri"]),
|
|
14765
|
+
source_kind: row.source_kind ?? metadataString(metadata, ["source_kind"]),
|
|
14766
|
+
source_revision_id: row.source_revision_id,
|
|
14767
|
+
revision: row.revision ?? metadataString(metadata, ["revision"]),
|
|
14768
|
+
hash: row.hash ?? metadataString(metadata, ["hash"]),
|
|
14769
|
+
chunk_id: row.id,
|
|
14770
|
+
start_offset: row.start_offset ?? metadataNumber(metadata, ["start_offset"]),
|
|
14771
|
+
end_offset: row.end_offset ?? metadataNumber(metadata, ["end_offset"]),
|
|
14772
|
+
status: metadataString(metadata, ["status"]),
|
|
14773
|
+
resolver: "open-files-read-only"
|
|
14774
|
+
});
|
|
14775
|
+
}
|
|
14776
|
+
function upsertVectors(db, rows, embedding, now) {
|
|
14777
|
+
const insertEmbedding = db.prepare(`
|
|
14778
|
+
INSERT INTO chunk_embeddings (id, chunk_id, provider, model, dimensions, vector_json, created_at)
|
|
14779
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
14780
|
+
ON CONFLICT(chunk_id, provider, model) DO UPDATE SET
|
|
14781
|
+
dimensions = excluded.dimensions,
|
|
14782
|
+
vector_json = excluded.vector_json,
|
|
14783
|
+
created_at = excluded.created_at
|
|
14784
|
+
`);
|
|
14785
|
+
const insertVector = db.prepare(`
|
|
14786
|
+
INSERT INTO vector_index_entries (
|
|
14787
|
+
id, chunk_id, source_revision_id, provider, model, dimensions, vector_json, vector_norm,
|
|
14788
|
+
source_uri, source_ref, revision, hash, start_offset, end_offset, token_count, status,
|
|
14789
|
+
metadata_json, created_at, updated_at
|
|
14790
|
+
)
|
|
14791
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
14792
|
+
ON CONFLICT(chunk_id, provider, model) DO UPDATE SET
|
|
14793
|
+
source_revision_id = excluded.source_revision_id,
|
|
14794
|
+
dimensions = excluded.dimensions,
|
|
14795
|
+
vector_json = excluded.vector_json,
|
|
14796
|
+
vector_norm = excluded.vector_norm,
|
|
14797
|
+
source_uri = excluded.source_uri,
|
|
14798
|
+
source_ref = excluded.source_ref,
|
|
14799
|
+
revision = excluded.revision,
|
|
14800
|
+
hash = excluded.hash,
|
|
14801
|
+
start_offset = excluded.start_offset,
|
|
14802
|
+
end_offset = excluded.end_offset,
|
|
14803
|
+
token_count = excluded.token_count,
|
|
14804
|
+
status = excluded.status,
|
|
14805
|
+
metadata_json = excluded.metadata_json,
|
|
14806
|
+
updated_at = excluded.updated_at
|
|
14807
|
+
`);
|
|
14808
|
+
const write = db.transaction(() => {
|
|
14809
|
+
for (let index = 0;index < rows.length; index += 1) {
|
|
14810
|
+
const row = rows[index];
|
|
14811
|
+
const vector = embedding.vectors[index];
|
|
14812
|
+
if (!vector)
|
|
14813
|
+
continue;
|
|
14814
|
+
const metadata = parseJsonObject(row.metadata_json);
|
|
14815
|
+
const provenance = provenanceForChunk(row);
|
|
14816
|
+
const sourceRef = provenance.source_ref ?? metadataString(metadata, ["source_ref"]);
|
|
14817
|
+
const sourceUri = provenance.source_uri ?? row.source_uri ?? metadataString(metadata, ["source_uri"]);
|
|
14818
|
+
const revision = provenance.revision ?? row.revision ?? metadataString(metadata, ["revision"]);
|
|
14819
|
+
const hash2 = provenance.hash ?? row.hash ?? metadataString(metadata, ["hash"]);
|
|
14820
|
+
const status = provenance.status ?? metadataString(metadata, ["status"]) ?? "active";
|
|
14821
|
+
const vectorJson = JSON.stringify(vector);
|
|
14822
|
+
insertEmbedding.run(stableId("emb", `${row.id}\x00${embedding.provider}\x00${embedding.model}`), row.id, embedding.provider, embedding.model, embedding.dimensions, vectorJson, now);
|
|
14823
|
+
insertVector.run(stableId("vec", `${row.id}\x00${embedding.provider}\x00${embedding.model}`), row.id, row.source_revision_id, embedding.provider, embedding.model, embedding.dimensions, vectorJson, vectorNorm(vector), sourceUri, sourceRef, revision, hash2, provenance.start_offset, provenance.end_offset, row.token_count, status, JSON.stringify({
|
|
14824
|
+
...metadata,
|
|
14825
|
+
provenance,
|
|
14826
|
+
embedded_at: now
|
|
14827
|
+
}), now, now);
|
|
14828
|
+
}
|
|
14829
|
+
});
|
|
14830
|
+
write();
|
|
14831
|
+
return rows.length;
|
|
14832
|
+
}
|
|
14833
|
+
async function indexKnowledgeEmbeddings(options) {
|
|
14834
|
+
const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
|
|
14835
|
+
const parsed = parseModelRef(modelRef);
|
|
14836
|
+
if (parsed.provider !== "openai")
|
|
14837
|
+
throw new Error(`Embedding provider ${parsed.provider} is not supported yet.`);
|
|
14838
|
+
const now = (options.now ?? new Date).toISOString();
|
|
14839
|
+
const limit = Math.max(1, Math.min(options.limit ?? 100, 1000));
|
|
14840
|
+
migrateKnowledgeDb(options.dbPath);
|
|
14841
|
+
const readDb = openKnowledgeDb(options.dbPath);
|
|
14842
|
+
let rows;
|
|
14843
|
+
try {
|
|
14844
|
+
rows = selectCandidateChunks(readDb, {
|
|
14845
|
+
provider: parsed.provider,
|
|
14846
|
+
model: parsed.model,
|
|
14847
|
+
limit,
|
|
14848
|
+
sourceRevisionId: options.sourceRevisionId
|
|
14849
|
+
});
|
|
14850
|
+
} finally {
|
|
14851
|
+
readDb.close();
|
|
14852
|
+
}
|
|
14853
|
+
if (rows.length === 0) {
|
|
14854
|
+
return {
|
|
14855
|
+
provider: parsed.provider,
|
|
14856
|
+
model: parsed.model,
|
|
14857
|
+
dimensions: options.dimensions ?? embeddingConfig(options.config).dimensions ?? DEFAULT_EMBEDDING_DIMENSIONS,
|
|
14858
|
+
chunks_seen: 0,
|
|
14859
|
+
chunks_embedded: 0,
|
|
14860
|
+
embeddings_upserted: 0,
|
|
14861
|
+
vector_entries_upserted: 0,
|
|
14862
|
+
usage: { input_tokens: 0 }
|
|
14863
|
+
};
|
|
14864
|
+
}
|
|
14865
|
+
const embedding = await embedTexts(rows.map((row) => row.text), options);
|
|
14866
|
+
const writeDb = openKnowledgeDb(options.dbPath);
|
|
14867
|
+
try {
|
|
14868
|
+
const upserted = upsertVectors(writeDb, rows, embedding, now);
|
|
14869
|
+
return {
|
|
14870
|
+
provider: embedding.provider,
|
|
14871
|
+
model: embedding.model,
|
|
14872
|
+
dimensions: embedding.dimensions,
|
|
14873
|
+
chunks_seen: rows.length,
|
|
14874
|
+
chunks_embedded: rows.length,
|
|
14875
|
+
embeddings_upserted: upserted,
|
|
14876
|
+
vector_entries_upserted: upserted,
|
|
14877
|
+
usage: embedding.usage
|
|
14878
|
+
};
|
|
14879
|
+
} finally {
|
|
14880
|
+
writeDb.close();
|
|
14881
|
+
}
|
|
14882
|
+
}
|
|
14883
|
+
function embeddingIndexStatus(dbPath) {
|
|
14884
|
+
migrateKnowledgeDb(dbPath);
|
|
14885
|
+
const db = openKnowledgeDb(dbPath);
|
|
14886
|
+
try {
|
|
14887
|
+
const totalEmbeddings = db.query("SELECT COUNT(*) AS n FROM chunk_embeddings").get()?.n ?? 0;
|
|
14888
|
+
const totalVectorEntries = db.query("SELECT COUNT(*) AS n FROM vector_index_entries").get()?.n ?? 0;
|
|
14889
|
+
const indexes = db.query(`SELECT provider, model, dimensions, COUNT(*) AS entries, MAX(updated_at) AS updated_at
|
|
14890
|
+
FROM vector_index_entries
|
|
14891
|
+
GROUP BY provider, model, dimensions
|
|
14892
|
+
ORDER BY provider, model`).all();
|
|
14893
|
+
return {
|
|
14894
|
+
total_embeddings: totalEmbeddings,
|
|
14895
|
+
total_vector_entries: totalVectorEntries,
|
|
14896
|
+
indexes
|
|
14897
|
+
};
|
|
14898
|
+
} finally {
|
|
14899
|
+
db.close();
|
|
14900
|
+
}
|
|
14901
|
+
}
|
|
14902
|
+
async function searchVectorIndex(options) {
|
|
14903
|
+
const modelRef = resolveEmbeddingModelRef(options.modelRef, options.config);
|
|
14904
|
+
const parsed = parseModelRef(modelRef);
|
|
14905
|
+
const limit = Math.max(1, Math.min(options.limit ?? 10, 100));
|
|
14906
|
+
const embedded = await embedTexts([options.query], options);
|
|
14907
|
+
const queryVector = embedded.vectors[0] ?? [];
|
|
14908
|
+
migrateKnowledgeDb(options.dbPath);
|
|
14909
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
14910
|
+
try {
|
|
14911
|
+
const rows = db.query(`SELECT
|
|
14912
|
+
v.chunk_id,
|
|
14913
|
+
c.text,
|
|
14914
|
+
v.vector_json,
|
|
14915
|
+
v.vector_norm,
|
|
14916
|
+
v.source_uri,
|
|
14917
|
+
v.source_ref,
|
|
14918
|
+
v.revision,
|
|
14919
|
+
v.hash,
|
|
14920
|
+
v.metadata_json
|
|
14921
|
+
FROM vector_index_entries v
|
|
14922
|
+
JOIN chunks c ON c.id = v.chunk_id
|
|
14923
|
+
WHERE v.provider = ? AND v.model = ? AND v.status = 'active'`).all(parsed.provider, parsed.model);
|
|
14924
|
+
const scored = rows.map((row) => {
|
|
14925
|
+
const vector = JSON.parse(row.vector_json);
|
|
14926
|
+
const metadata = parseJsonObject(row.metadata_json);
|
|
14927
|
+
const provenance = metadata.provenance && typeof metadata.provenance === "object" && !Array.isArray(metadata.provenance) ? metadata.provenance : null;
|
|
14928
|
+
return {
|
|
14929
|
+
chunk_id: row.chunk_id,
|
|
14930
|
+
score: cosineSimilarity(queryVector, vector, row.vector_norm),
|
|
14931
|
+
text: row.text,
|
|
14932
|
+
source_uri: row.source_uri,
|
|
14933
|
+
source_ref: row.source_ref,
|
|
14934
|
+
revision: row.revision,
|
|
14935
|
+
hash: row.hash,
|
|
14936
|
+
provenance
|
|
14937
|
+
};
|
|
14938
|
+
}).sort((a, b) => b.score - a.score).slice(0, limit);
|
|
14939
|
+
return {
|
|
14940
|
+
provider: parsed.provider,
|
|
14941
|
+
model: parsed.model,
|
|
14942
|
+
dimensions: embedded.dimensions,
|
|
14943
|
+
query: options.query,
|
|
14944
|
+
results: scored
|
|
14397
14945
|
};
|
|
14398
14946
|
} finally {
|
|
14399
14947
|
db.close();
|
|
14400
14948
|
}
|
|
14401
14949
|
}
|
|
14402
14950
|
|
|
14951
|
+
// src/outbox-consume.ts
|
|
14952
|
+
import { createHash as createHash3, randomUUID as randomUUID3 } from "crypto";
|
|
14953
|
+
import { existsSync as existsSync4, readFileSync as readFileSync4 } from "fs";
|
|
14954
|
+
import { basename } from "path";
|
|
14955
|
+
|
|
14403
14956
|
// src/safety.ts
|
|
14404
|
-
import { createHash, randomUUID as randomUUID2 } from "crypto";
|
|
14957
|
+
import { createHash as createHash2, randomUUID as randomUUID2 } from "crypto";
|
|
14405
14958
|
import { relative as relative2, resolve as resolve2, sep as sep2 } from "path";
|
|
14406
14959
|
function envEnabled(name) {
|
|
14407
14960
|
const value = process.env[name];
|
|
@@ -14496,7 +15049,7 @@ function redactSecrets(text, policy) {
|
|
|
14496
15049
|
return { text: output, findings };
|
|
14497
15050
|
}
|
|
14498
15051
|
function auditId(input) {
|
|
14499
|
-
return `audit_${
|
|
15052
|
+
return `audit_${createHash2("sha256").update(`${input.event_type}\x00${input.action}\x00${input.target_uri ?? ""}\x00${input.created_at ?? ""}\x00${JSON.stringify(input.metadata ?? {})}\x00${randomUUID2()}`).digest("hex").slice(0, 24)}`;
|
|
14500
15053
|
}
|
|
14501
15054
|
function recordAuditEvent(db, input) {
|
|
14502
15055
|
const createdAt = input.created_at ?? new Date().toISOString();
|
|
@@ -14531,8 +15084,8 @@ function recordRedactionFindings(db, input) {
|
|
|
14531
15084
|
}
|
|
14532
15085
|
|
|
14533
15086
|
// src/outbox-consume.ts
|
|
14534
|
-
function
|
|
14535
|
-
return `${prefix}_${
|
|
15087
|
+
function stableId2(prefix, value) {
|
|
15088
|
+
return `${prefix}_${createHash3("sha256").update(value).digest("hex").slice(0, 20)}`;
|
|
14536
15089
|
}
|
|
14537
15090
|
function asObject(value) {
|
|
14538
15091
|
return value && typeof value === "object" && !Array.isArray(value) ? value : undefined;
|
|
@@ -14686,7 +15239,7 @@ function mergeJson(existing, patch) {
|
|
|
14686
15239
|
return JSON.stringify({ ...base, ...patch });
|
|
14687
15240
|
}
|
|
14688
15241
|
function ensureSource(db, event, now) {
|
|
14689
|
-
const id =
|
|
15242
|
+
const id = stableId2("src", event.sourceUri);
|
|
14690
15243
|
db.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
|
|
14691
15244
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
14692
15245
|
ON CONFLICT(uri) DO UPDATE SET
|
|
@@ -14727,7 +15280,7 @@ function ensureSource(db, event, now) {
|
|
|
14727
15280
|
function ensureRevision(db, sourceId, event, now) {
|
|
14728
15281
|
if (!event.revision)
|
|
14729
15282
|
return null;
|
|
14730
|
-
const id =
|
|
15283
|
+
const id = stableId2("rev", `${sourceId}\x00${event.revision}`);
|
|
14731
15284
|
const metadata = {
|
|
14732
15285
|
source_ref: event.sourceRef,
|
|
14733
15286
|
source_uri: event.sourceUri,
|
|
@@ -14755,16 +15308,20 @@ function revisionIdsForEvent(db, sourceId, event) {
|
|
|
14755
15308
|
function invalidateRevision(db, revisionId) {
|
|
14756
15309
|
const chunks = db.query("SELECT id FROM chunks WHERE source_revision_id = ?").all(revisionId);
|
|
14757
15310
|
let embeddingsDeleted = 0;
|
|
15311
|
+
let vectorEntriesDeleted = 0;
|
|
14758
15312
|
for (const chunk of chunks) {
|
|
14759
15313
|
const row = db.query("SELECT COUNT(*) AS n FROM chunk_embeddings WHERE chunk_id = ?").get(chunk.id);
|
|
14760
15314
|
embeddingsDeleted += row?.n ?? 0;
|
|
15315
|
+
const vectorRow = db.query("SELECT COUNT(*) AS n FROM vector_index_entries WHERE chunk_id = ?").get(chunk.id);
|
|
15316
|
+
vectorEntriesDeleted += vectorRow?.n ?? 0;
|
|
15317
|
+
db.run("DELETE FROM vector_index_entries WHERE chunk_id = ?", [chunk.id]);
|
|
14761
15318
|
db.run("DELETE FROM chunk_embeddings WHERE chunk_id = ?", [chunk.id]);
|
|
14762
15319
|
db.run("DELETE FROM chunks_fts WHERE chunk_id = ?", [chunk.id]);
|
|
14763
15320
|
}
|
|
14764
15321
|
db.run("DELETE FROM chunks WHERE source_revision_id = ?", [revisionId]);
|
|
14765
15322
|
const revision = db.query("SELECT metadata_json FROM source_revisions WHERE id = ?").get(revisionId);
|
|
14766
15323
|
db.run("UPDATE source_revisions SET metadata_json = ? WHERE id = ?", [mergeJson(revision?.metadata_json, { reindex_required: true, invalidated_at: new Date().toISOString() }), revisionId]);
|
|
14767
|
-
return { chunksDeleted: chunks.length, embeddingsDeleted };
|
|
15324
|
+
return { chunksDeleted: chunks.length, embeddingsDeleted, vectorEntriesDeleted };
|
|
14768
15325
|
}
|
|
14769
15326
|
function isDeleteEvent(eventType2, status) {
|
|
14770
15327
|
return status === "deleted" || ["delete", "deleted", "remove", "removed"].includes(eventType2);
|
|
@@ -14802,6 +15359,7 @@ async function consumeOpenFilesOutbox(options) {
|
|
|
14802
15359
|
const revisionsTouched = new Set;
|
|
14803
15360
|
let chunksDeleted = 0;
|
|
14804
15361
|
let embeddingsDeleted = 0;
|
|
15362
|
+
let vectorEntriesDeleted = 0;
|
|
14805
15363
|
let staleRevisions = 0;
|
|
14806
15364
|
let deletedSources = 0;
|
|
14807
15365
|
let movedSources = 0;
|
|
@@ -14827,6 +15385,7 @@ async function consumeOpenFilesOutbox(options) {
|
|
|
14827
15385
|
const invalidation = invalidateRevision(db, revisionId);
|
|
14828
15386
|
chunksDeleted += invalidation.chunksDeleted;
|
|
14829
15387
|
embeddingsDeleted += invalidation.embeddingsDeleted;
|
|
15388
|
+
vectorEntriesDeleted += invalidation.vectorEntriesDeleted;
|
|
14830
15389
|
staleRevisions += 1;
|
|
14831
15390
|
}
|
|
14832
15391
|
if (isDeleteEvent(event.eventType, event.status))
|
|
@@ -14837,7 +15396,7 @@ async function consumeOpenFilesOutbox(options) {
|
|
|
14837
15396
|
permissionUpdates += 1;
|
|
14838
15397
|
db.run(`INSERT INTO run_events (id, run_id, level, event, metadata_json, created_at)
|
|
14839
15398
|
VALUES (?, ?, ?, ?, ?, ?)`, [
|
|
14840
|
-
|
|
15399
|
+
stableId2("evt", `${runId}\x00${index}\x00${event.sourceRef}\x00${event.eventType}`),
|
|
14841
15400
|
runId,
|
|
14842
15401
|
"info",
|
|
14843
15402
|
event.eventType,
|
|
@@ -14854,7 +15413,7 @@ async function consumeOpenFilesOutbox(options) {
|
|
|
14854
15413
|
});
|
|
14855
15414
|
db.run(`INSERT INTO provider_usage (id, run_id, provider, model, input_tokens, output_tokens, cost_usd, metadata_json, created_at)
|
|
14856
15415
|
VALUES (?, ?, ?, ?, 0, 0, 0, ?, ?)`, [
|
|
14857
|
-
|
|
15416
|
+
stableId2("usage", runId),
|
|
14858
15417
|
runId,
|
|
14859
15418
|
"local",
|
|
14860
15419
|
"open-files-outbox",
|
|
@@ -14865,87 +15424,45 @@ async function consumeOpenFilesOutbox(options) {
|
|
|
14865
15424
|
event_type: "write",
|
|
14866
15425
|
action: "knowledge_outbox_invalidation",
|
|
14867
15426
|
target_uri: options.dbPath,
|
|
14868
|
-
decision: "allow",
|
|
14869
|
-
metadata: {
|
|
14870
|
-
run_id: runId,
|
|
14871
|
-
events: events.length,
|
|
14872
|
-
sources: sourcesTouched.size,
|
|
14873
|
-
revisions: revisionsTouched.size,
|
|
14874
|
-
chunks_deleted: chunksDeleted,
|
|
14875
|
-
embeddings_deleted: embeddingsDeleted
|
|
14876
|
-
|
|
14877
|
-
|
|
14878
|
-
|
|
14879
|
-
|
|
14880
|
-
|
|
14881
|
-
|
|
14882
|
-
|
|
14883
|
-
|
|
14884
|
-
|
|
14885
|
-
|
|
14886
|
-
|
|
14887
|
-
|
|
14888
|
-
|
|
14889
|
-
|
|
14890
|
-
|
|
14891
|
-
|
|
14892
|
-
|
|
14893
|
-
|
|
14894
|
-
|
|
14895
|
-
|
|
14896
|
-
}
|
|
14897
|
-
|
|
14898
|
-
|
|
14899
|
-
// src/manifest-ingest.ts
|
|
14900
|
-
import { createHash as createHash3 } from "crypto";
|
|
14901
|
-
import { existsSync as existsSync5, readFileSync as readFileSync5 } from "fs";
|
|
14902
|
-
import { basename as basename2 } from "path";
|
|
14903
|
-
|
|
14904
|
-
// src/provenance.ts
|
|
14905
|
-
function isStaleStatus(status) {
|
|
14906
|
-
return ["deleted", "stale", "invalidated", "reindex_required"].includes((status ?? "").toLowerCase());
|
|
14907
|
-
}
|
|
14908
|
-
function sourceProvenance(input) {
|
|
14909
|
-
const status = input.status ?? null;
|
|
14910
|
-
return {
|
|
14911
|
-
source_owner: "open-files",
|
|
14912
|
-
source_ref: input.source_ref ?? null,
|
|
14913
|
-
source_uri: input.source_uri ?? null,
|
|
14914
|
-
source_kind: input.source_kind ?? null,
|
|
14915
|
-
source_revision_id: input.source_revision_id ?? null,
|
|
14916
|
-
revision: input.revision ?? null,
|
|
14917
|
-
hash: input.hash ?? null,
|
|
14918
|
-
chunk_id: input.chunk_id ?? null,
|
|
14919
|
-
start_offset: input.start_offset ?? null,
|
|
14920
|
-
end_offset: input.end_offset ?? null,
|
|
14921
|
-
status,
|
|
14922
|
-
read_only: true,
|
|
14923
|
-
citation_required: true,
|
|
14924
|
-
resolver: input.resolver ?? null,
|
|
14925
|
-
stale: isStaleStatus(status)
|
|
14926
|
-
};
|
|
14927
|
-
}
|
|
14928
|
-
function generatedArtifactProvenance(input) {
|
|
14929
|
-
return {
|
|
14930
|
-
source_owner: "open-files",
|
|
14931
|
-
generated_from: input.generated_from,
|
|
14932
|
-
artifact_key: input.artifact_key,
|
|
14933
|
-
source_refs: input.source_refs ?? [],
|
|
14934
|
-
read_only_sources: true,
|
|
14935
|
-
citation_required: input.citation_required ?? true,
|
|
14936
|
-
raw_source_bytes_stored_in_open_knowledge: false
|
|
14937
|
-
};
|
|
14938
|
-
}
|
|
14939
|
-
function withProvenance(metadata, provenance) {
|
|
14940
|
-
return {
|
|
14941
|
-
...metadata,
|
|
14942
|
-
provenance
|
|
14943
|
-
};
|
|
15427
|
+
decision: "allow",
|
|
15428
|
+
metadata: {
|
|
15429
|
+
run_id: runId,
|
|
15430
|
+
events: events.length,
|
|
15431
|
+
sources: sourcesTouched.size,
|
|
15432
|
+
revisions: revisionsTouched.size,
|
|
15433
|
+
chunks_deleted: chunksDeleted,
|
|
15434
|
+
embeddings_deleted: embeddingsDeleted,
|
|
15435
|
+
vector_entries_deleted: vectorEntriesDeleted
|
|
15436
|
+
},
|
|
15437
|
+
created_at: now
|
|
15438
|
+
});
|
|
15439
|
+
return {
|
|
15440
|
+
path: options.input,
|
|
15441
|
+
db_path: options.dbPath,
|
|
15442
|
+
run_id: runId,
|
|
15443
|
+
events_seen: events.length,
|
|
15444
|
+
sources_touched: sourcesTouched.size,
|
|
15445
|
+
revisions_touched: revisionsTouched.size,
|
|
15446
|
+
chunks_deleted: chunksDeleted,
|
|
15447
|
+
embeddings_deleted: embeddingsDeleted,
|
|
15448
|
+
vector_entries_deleted: vectorEntriesDeleted,
|
|
15449
|
+
stale_revisions: staleRevisions,
|
|
15450
|
+
deleted_sources: deletedSources,
|
|
15451
|
+
moved_sources: movedSources,
|
|
15452
|
+
permission_updates: permissionUpdates
|
|
15453
|
+
};
|
|
15454
|
+
})();
|
|
15455
|
+
} finally {
|
|
15456
|
+
db.close();
|
|
15457
|
+
}
|
|
14944
15458
|
}
|
|
14945
15459
|
|
|
14946
15460
|
// src/manifest-ingest.ts
|
|
14947
|
-
|
|
14948
|
-
|
|
15461
|
+
import { createHash as createHash4 } from "crypto";
|
|
15462
|
+
import { existsSync as existsSync5, readFileSync as readFileSync5 } from "fs";
|
|
15463
|
+
import { basename as basename2 } from "path";
|
|
15464
|
+
function stableId3(prefix, value) {
|
|
15465
|
+
return `${prefix}_${createHash4("sha256").update(value).digest("hex").slice(0, 20)}`;
|
|
14949
15466
|
}
|
|
14950
15467
|
function asObject2(value) {
|
|
14951
15468
|
return value && typeof value === "object" && !Array.isArray(value) ? value : undefined;
|
|
@@ -15165,7 +15682,7 @@ function deleteChunksForRevision(db, sourceRevisionId) {
|
|
|
15165
15682
|
return rows.length;
|
|
15166
15683
|
}
|
|
15167
15684
|
function upsertSource(db, item, now) {
|
|
15168
|
-
const sourceId =
|
|
15685
|
+
const sourceId = stableId3("src", item.sourceUri);
|
|
15169
15686
|
db.run(`INSERT INTO sources (id, uri, kind, title, metadata_json, acl_json, created_at, updated_at)
|
|
15170
15687
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
15171
15688
|
ON CONFLICT(uri) DO UPDATE SET
|
|
@@ -15189,7 +15706,7 @@ function upsertSource(db, item, now) {
|
|
|
15189
15706
|
return row.id;
|
|
15190
15707
|
}
|
|
15191
15708
|
function upsertRevision(db, sourceId, item, now) {
|
|
15192
|
-
const revisionId =
|
|
15709
|
+
const revisionId = stableId3("rev", `${sourceId}\x00${item.revision}`);
|
|
15193
15710
|
db.run(`INSERT INTO source_revisions (id, source_id, revision, hash, extracted_text_uri, metadata_json, created_at)
|
|
15194
15711
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
15195
15712
|
ON CONFLICT(source_id, revision) DO UPDATE SET
|
|
@@ -15231,7 +15748,7 @@ function insertChunks(db, sourceRevisionId, item, now, maxChars, overlapChars, s
|
|
|
15231
15748
|
}
|
|
15232
15749
|
const chunks = chunkText(redacted.text, maxChars, overlapChars);
|
|
15233
15750
|
for (const chunk of chunks) {
|
|
15234
|
-
const chunkId =
|
|
15751
|
+
const chunkId = stableId3("chk", `${sourceRevisionId}\x00${chunk.ordinal}\x00${chunk.text}`);
|
|
15235
15752
|
const provenance = sourceProvenance({
|
|
15236
15753
|
source_ref: item.sourceRef,
|
|
15237
15754
|
source_uri: item.sourceUri,
|
|
@@ -15359,12 +15876,12 @@ async function ingestOpenFilesManifestItems(options) {
|
|
|
15359
15876
|
}
|
|
15360
15877
|
|
|
15361
15878
|
// src/source-ingest.ts
|
|
15362
|
-
import { createHash as
|
|
15879
|
+
import { createHash as createHash5 } from "crypto";
|
|
15363
15880
|
import { existsSync as existsSync6, readFileSync as readFileSync6 } from "fs";
|
|
15364
15881
|
import { basename as basename3 } from "path";
|
|
15365
15882
|
|
|
15366
15883
|
// src/source-resolver.ts
|
|
15367
|
-
function
|
|
15884
|
+
function parseJsonObject2(value) {
|
|
15368
15885
|
if (!value)
|
|
15369
15886
|
return {};
|
|
15370
15887
|
try {
|
|
@@ -15374,7 +15891,7 @@ function parseJsonObject(value) {
|
|
|
15374
15891
|
return {};
|
|
15375
15892
|
}
|
|
15376
15893
|
}
|
|
15377
|
-
function
|
|
15894
|
+
function metadataString2(metadata, keys) {
|
|
15378
15895
|
for (const key of keys) {
|
|
15379
15896
|
const value = metadata[key];
|
|
15380
15897
|
if (typeof value === "string" && value.length > 0)
|
|
@@ -15382,7 +15899,7 @@ function metadataString(metadata, keys) {
|
|
|
15382
15899
|
}
|
|
15383
15900
|
return null;
|
|
15384
15901
|
}
|
|
15385
|
-
function
|
|
15902
|
+
function metadataNumber2(metadata, keys) {
|
|
15386
15903
|
for (const key of keys) {
|
|
15387
15904
|
const value = metadata[key];
|
|
15388
15905
|
if (typeof value === "number" && Number.isFinite(value))
|
|
@@ -15507,8 +16024,8 @@ async function resolveOpenFilesSource(options) {
|
|
|
15507
16024
|
citations: []
|
|
15508
16025
|
};
|
|
15509
16026
|
}
|
|
15510
|
-
const sourceMetadata =
|
|
15511
|
-
const permissions =
|
|
16027
|
+
const sourceMetadata = parseJsonObject2(source.metadata_json);
|
|
16028
|
+
const permissions = parseJsonObject2(source.acl_json);
|
|
15512
16029
|
try {
|
|
15513
16030
|
assertPurposeAllowed(permissions, purpose);
|
|
15514
16031
|
} catch (error48) {
|
|
@@ -15528,22 +16045,22 @@ async function resolveOpenFilesSource(options) {
|
|
|
15528
16045
|
throw error48;
|
|
15529
16046
|
}
|
|
15530
16047
|
const revision = selectRevision(db, source.id, requestedRevision);
|
|
15531
|
-
const revisionMetadata =
|
|
16048
|
+
const revisionMetadata = parseJsonObject2(revision?.metadata_json);
|
|
15532
16049
|
const totalChunks = countChunks(db, revision?.id ?? null);
|
|
15533
16050
|
const rows = selectChunks(db, revision?.id ?? null, limit);
|
|
15534
16051
|
const effectiveSourceRef = sourceRevisionRef(source.uri, revision, options.sourceRef);
|
|
15535
16052
|
const chunks = rows.map((row) => {
|
|
15536
|
-
const metadata =
|
|
16053
|
+
const metadata = parseJsonObject2(row.metadata_json);
|
|
15537
16054
|
const evidence = {
|
|
15538
16055
|
resolver: "open-files-read-only",
|
|
15539
16056
|
mode: "local_catalog",
|
|
15540
16057
|
purpose,
|
|
15541
16058
|
read_only: true,
|
|
15542
|
-
source_ref:
|
|
16059
|
+
source_ref: metadataString2(metadata, ["source_ref"]) ?? effectiveSourceRef,
|
|
15543
16060
|
source_uri: source.uri,
|
|
15544
16061
|
source_revision_id: revision?.id ?? null,
|
|
15545
16062
|
revision: revision?.revision ?? null,
|
|
15546
|
-
hash: revision?.hash ??
|
|
16063
|
+
hash: revision?.hash ?? metadataString2(metadata, ["hash"]),
|
|
15547
16064
|
chunk_id: row.id,
|
|
15548
16065
|
start_offset: row.start_offset,
|
|
15549
16066
|
end_offset: row.end_offset,
|
|
@@ -15559,7 +16076,7 @@ async function resolveOpenFilesSource(options) {
|
|
|
15559
16076
|
chunk_id: row.id,
|
|
15560
16077
|
start_offset: row.start_offset,
|
|
15561
16078
|
end_offset: row.end_offset,
|
|
15562
|
-
status:
|
|
16079
|
+
status: metadataString2(metadata, ["status"]),
|
|
15563
16080
|
resolver: evidence.resolver
|
|
15564
16081
|
});
|
|
15565
16082
|
return {
|
|
@@ -15600,8 +16117,8 @@ async function resolveOpenFilesSource(options) {
|
|
|
15600
16117
|
},
|
|
15601
16118
|
created_at: resolvedAt
|
|
15602
16119
|
});
|
|
15603
|
-
const mime =
|
|
15604
|
-
const size =
|
|
16120
|
+
const mime = metadataString2(sourceMetadata, ["mime", "content_type"]) ?? metadataString2(revisionMetadata, ["mime", "content_type"]);
|
|
16121
|
+
const size = metadataNumber2(sourceMetadata, ["size", "size_bytes"]) ?? metadataNumber2(revisionMetadata, ["size", "size_bytes"]);
|
|
15605
16122
|
return {
|
|
15606
16123
|
source_ref: effectiveSourceRef,
|
|
15607
16124
|
source_uri: source.uri,
|
|
@@ -15634,12 +16151,12 @@ async function resolveOpenFilesSource(options) {
|
|
|
15634
16151
|
content: {
|
|
15635
16152
|
mime,
|
|
15636
16153
|
size,
|
|
15637
|
-
hash: revision?.hash ??
|
|
16154
|
+
hash: revision?.hash ?? metadataString2(sourceMetadata, ["hash", "checksum", "sha256"]),
|
|
15638
16155
|
text_available: totalChunks > 0,
|
|
15639
16156
|
chunks_total: totalChunks,
|
|
15640
16157
|
chunks_returned: chunks.length,
|
|
15641
16158
|
char_count_returned: chunks.reduce((sum, chunk) => sum + chunk.text.length, 0),
|
|
15642
|
-
extracted_text_ref: revision?.extracted_text_uri ??
|
|
16159
|
+
extracted_text_ref: revision?.extracted_text_uri ?? metadataString2(revisionMetadata, ["extracted_text_ref", "extracted_text_uri"]),
|
|
15643
16160
|
bytes_available: false,
|
|
15644
16161
|
bytes_exposed: false
|
|
15645
16162
|
},
|
|
@@ -15654,7 +16171,7 @@ async function resolveOpenFilesSource(options) {
|
|
|
15654
16171
|
|
|
15655
16172
|
// src/source-ingest.ts
|
|
15656
16173
|
function sha256Text(text) {
|
|
15657
|
-
return `sha256:${
|
|
16174
|
+
return `sha256:${createHash5("sha256").update(text).digest("hex")}`;
|
|
15658
16175
|
}
|
|
15659
16176
|
function stripHtml(html) {
|
|
15660
16177
|
return html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ").replace(/<[^>]+>/g, " ").replace(/ /g, " ").replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/\s+\n/g, `
|
|
@@ -15876,131 +16393,381 @@ async function ingestSourceRef(options) {
|
|
|
15876
16393
|
};
|
|
15877
16394
|
}
|
|
15878
16395
|
|
|
15879
|
-
// src/
|
|
15880
|
-
|
|
15881
|
-
|
|
15882
|
-
|
|
15883
|
-
|
|
15884
|
-
|
|
15885
|
-
|
|
15886
|
-
|
|
15887
|
-
|
|
15888
|
-
},
|
|
15889
|
-
deepseek: {
|
|
15890
|
-
api_key_env: "DEEPSEEK_API_KEY",
|
|
15891
|
-
default_model: "deepseek-chat"
|
|
16396
|
+
// src/search.ts
|
|
16397
|
+
function parseJsonObject3(value) {
|
|
16398
|
+
if (!value)
|
|
16399
|
+
return {};
|
|
16400
|
+
try {
|
|
16401
|
+
const parsed = JSON.parse(value);
|
|
16402
|
+
return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {};
|
|
16403
|
+
} catch {
|
|
16404
|
+
return {};
|
|
15892
16405
|
}
|
|
15893
|
-
}
|
|
15894
|
-
|
|
15895
|
-
|
|
15896
|
-
|
|
15897
|
-
|
|
15898
|
-
|
|
15899
|
-
tool_streaming: true,
|
|
15900
|
-
image_input: true,
|
|
15901
|
-
native_web_search: true,
|
|
15902
|
-
reasoning: true,
|
|
15903
|
-
embeddings: true
|
|
15904
|
-
},
|
|
15905
|
-
anthropic: {
|
|
15906
|
-
text_generation: true,
|
|
15907
|
-
structured_output: true,
|
|
15908
|
-
tool_usage: true,
|
|
15909
|
-
tool_streaming: true,
|
|
15910
|
-
image_input: true,
|
|
15911
|
-
native_web_search: false,
|
|
15912
|
-
reasoning: true,
|
|
15913
|
-
embeddings: false
|
|
15914
|
-
},
|
|
15915
|
-
deepseek: {
|
|
15916
|
-
text_generation: true,
|
|
15917
|
-
structured_output: true,
|
|
15918
|
-
tool_usage: true,
|
|
15919
|
-
tool_streaming: true,
|
|
15920
|
-
image_input: false,
|
|
15921
|
-
native_web_search: false,
|
|
15922
|
-
reasoning: true,
|
|
15923
|
-
embeddings: false
|
|
16406
|
+
}
|
|
16407
|
+
function metadataString3(metadata, keys) {
|
|
16408
|
+
for (const key of keys) {
|
|
16409
|
+
const value = metadata[key];
|
|
16410
|
+
if (typeof value === "string" && value.length > 0)
|
|
16411
|
+
return value;
|
|
15924
16412
|
}
|
|
15925
|
-
|
|
15926
|
-
var BUILTIN_ALIASES = {
|
|
15927
|
-
default: "openai:gpt-5.2",
|
|
15928
|
-
fast: "openai:gpt-5-mini",
|
|
15929
|
-
reasoning: "anthropic:claude-opus-4-6",
|
|
15930
|
-
sonnet: "anthropic:claude-sonnet-4-6",
|
|
15931
|
-
deepseek: "deepseek:deepseek-chat",
|
|
15932
|
-
"deepseek-reasoning": "deepseek:deepseek-reasoner"
|
|
15933
|
-
};
|
|
15934
|
-
function providerConfig(config2) {
|
|
15935
|
-
return config2.providers ?? {};
|
|
16413
|
+
return null;
|
|
15936
16414
|
}
|
|
15937
|
-
function
|
|
15938
|
-
const
|
|
15939
|
-
|
|
15940
|
-
|
|
15941
|
-
|
|
15942
|
-
}
|
|
16415
|
+
function metadataNumber3(metadata, keys) {
|
|
16416
|
+
for (const key of keys) {
|
|
16417
|
+
const value = metadata[key];
|
|
16418
|
+
if (typeof value === "number" && Number.isFinite(value))
|
|
16419
|
+
return value;
|
|
16420
|
+
}
|
|
16421
|
+
return null;
|
|
15943
16422
|
}
|
|
15944
|
-
function
|
|
15945
|
-
|
|
15946
|
-
return {
|
|
15947
|
-
...BUILTIN_ALIASES,
|
|
15948
|
-
...configured.default_model ? { default: configured.default_model } : {},
|
|
15949
|
-
...configured.aliases ?? {}
|
|
15950
|
-
};
|
|
16423
|
+
function unique(values) {
|
|
16424
|
+
return Array.from(new Set(values));
|
|
15951
16425
|
}
|
|
15952
|
-
function
|
|
15953
|
-
const
|
|
15954
|
-
|
|
15955
|
-
if (provider !== "openai" && provider !== "anthropic" && provider !== "deepseek") {
|
|
15956
|
-
throw new Error(`Unsupported AI provider: ${provider}`);
|
|
15957
|
-
}
|
|
15958
|
-
if (!model)
|
|
15959
|
-
throw new Error(`Invalid model ref: ${modelRef}. Expected provider:model.`);
|
|
15960
|
-
return { provider, model };
|
|
16426
|
+
function queryTerms(query) {
|
|
16427
|
+
const terms = query.normalize("NFKC").toLowerCase().match(/[\p{L}\p{N}_]+/gu) ?? [];
|
|
16428
|
+
return unique(terms.filter((term) => term.length > 0)).slice(0, 16);
|
|
15961
16429
|
}
|
|
15962
|
-
function
|
|
15963
|
-
|
|
15964
|
-
|
|
16430
|
+
function ftsQueryForTerms(terms) {
|
|
16431
|
+
if (terms.length === 0)
|
|
16432
|
+
return null;
|
|
16433
|
+
return terms.map((term) => `${term}*`).join(" OR ");
|
|
15965
16434
|
}
|
|
15966
|
-
function
|
|
15967
|
-
|
|
15968
|
-
|
|
15969
|
-
|
|
15970
|
-
|
|
15971
|
-
|
|
15972
|
-
|
|
15973
|
-
|
|
15974
|
-
|
|
15975
|
-
|
|
15976
|
-
|
|
15977
|
-
|
|
16435
|
+
function escapeLikeTerm(term) {
|
|
16436
|
+
return term.replace(/[\\%_]/g, (char) => `\\${char}`);
|
|
16437
|
+
}
|
|
16438
|
+
function likeParams(terms, fieldsPerTerm) {
|
|
16439
|
+
return terms.flatMap((term) => Array.from({ length: fieldsPerTerm }, () => `%${escapeLikeTerm(term)}%`));
|
|
16440
|
+
}
|
|
16441
|
+
function scoreFromRank(rank, index) {
|
|
16442
|
+
const rankScore = Number.isFinite(rank) ? 1 / (1 + Math.abs(rank)) : 0;
|
|
16443
|
+
const orderScore = 1 / (1 + index);
|
|
16444
|
+
return roundScore(Math.max(rankScore, orderScore));
|
|
16445
|
+
}
|
|
16446
|
+
function catalogScore(haystack, terms) {
|
|
16447
|
+
if (terms.length === 0)
|
|
16448
|
+
return 0;
|
|
16449
|
+
const matched = terms.filter((term) => haystack.includes(term)).length;
|
|
16450
|
+
if (matched === 0)
|
|
16451
|
+
return 0;
|
|
16452
|
+
return roundScore(Math.min(0.85, 0.35 + matched / terms.length * 0.5));
|
|
16453
|
+
}
|
|
16454
|
+
function semanticScore(score) {
|
|
16455
|
+
return roundScore(Math.max(0, Math.min(1, (score + 1) / 2)));
|
|
16456
|
+
}
|
|
16457
|
+
function roundScore(score) {
|
|
16458
|
+
return Number(score.toFixed(6));
|
|
16459
|
+
}
|
|
16460
|
+
function combinedScore(scores, citation) {
|
|
16461
|
+
const keyword = scores.keyword ?? 0;
|
|
16462
|
+
const semantic = scores.semantic ?? 0;
|
|
16463
|
+
const catalog = scores.catalog ?? 0;
|
|
16464
|
+
const citationBoost = citation?.chunk_id ? 0.05 : 0;
|
|
16465
|
+
return roundScore(Math.min(1, keyword * 0.55 + semantic * 0.4 + catalog * 0.35 + citationBoost));
|
|
16466
|
+
}
|
|
16467
|
+
function existingProvenance(metadata) {
|
|
16468
|
+
const provenance = metadata.provenance;
|
|
16469
|
+
return provenance && typeof provenance === "object" && !Array.isArray(provenance) ? provenance : null;
|
|
16470
|
+
}
|
|
16471
|
+
function provenanceForChunk2(row) {
|
|
16472
|
+
const metadata = parseJsonObject3(row.chunk_metadata_json);
|
|
16473
|
+
const existing = existingProvenance(metadata);
|
|
16474
|
+
if (existing)
|
|
16475
|
+
return existing;
|
|
16476
|
+
if (!row.source_revision_id && !row.source_uri)
|
|
16477
|
+
return null;
|
|
16478
|
+
return sourceProvenance({
|
|
16479
|
+
source_ref: metadataString3(metadata, ["source_ref"]),
|
|
16480
|
+
source_uri: row.source_uri ?? metadataString3(metadata, ["source_uri"]),
|
|
16481
|
+
source_kind: row.source_kind ?? metadataString3(metadata, ["source_kind"]),
|
|
16482
|
+
source_revision_id: row.source_revision_id,
|
|
16483
|
+
revision: row.revision ?? metadataString3(metadata, ["revision"]),
|
|
16484
|
+
hash: row.hash ?? metadataString3(metadata, ["hash"]),
|
|
16485
|
+
chunk_id: row.chunk_id,
|
|
16486
|
+
start_offset: row.start_offset ?? metadataNumber3(metadata, ["start_offset"]),
|
|
16487
|
+
end_offset: row.end_offset ?? metadataNumber3(metadata, ["end_offset"]),
|
|
16488
|
+
status: metadataString3(metadata, ["status"]),
|
|
16489
|
+
resolver: "open-files-read-only"
|
|
15978
16490
|
});
|
|
15979
16491
|
}
|
|
15980
|
-
function
|
|
15981
|
-
|
|
15982
|
-
|
|
15983
|
-
|
|
15984
|
-
|
|
15985
|
-
|
|
15986
|
-
|
|
15987
|
-
|
|
15988
|
-
|
|
15989
|
-
|
|
15990
|
-
|
|
15991
|
-
|
|
16492
|
+
function selectFtsChunks(db, ftsQuery, limit) {
|
|
16493
|
+
if (!ftsQuery)
|
|
16494
|
+
return [];
|
|
16495
|
+
return db.query(`SELECT
|
|
16496
|
+
chunks_fts.chunk_id,
|
|
16497
|
+
c.kind AS chunk_kind,
|
|
16498
|
+
c.wiki_page_id,
|
|
16499
|
+
c.text,
|
|
16500
|
+
c.token_count,
|
|
16501
|
+
c.start_offset,
|
|
16502
|
+
c.end_offset,
|
|
16503
|
+
c.metadata_json AS chunk_metadata_json,
|
|
16504
|
+
c.source_revision_id,
|
|
16505
|
+
sr.revision,
|
|
16506
|
+
sr.hash,
|
|
16507
|
+
s.uri AS source_uri,
|
|
16508
|
+
s.kind AS source_kind,
|
|
16509
|
+
s.title AS source_title,
|
|
16510
|
+
wp.path AS wiki_path,
|
|
16511
|
+
wp.title AS wiki_title,
|
|
16512
|
+
wp.artifact_uri AS wiki_artifact_uri,
|
|
16513
|
+
wp.content_hash AS wiki_content_hash,
|
|
16514
|
+
wp.status AS wiki_status,
|
|
16515
|
+
wp.metadata_json AS wiki_metadata_json,
|
|
16516
|
+
bm25(chunks_fts) AS rank
|
|
16517
|
+
FROM chunks_fts
|
|
16518
|
+
JOIN chunks c ON c.id = chunks_fts.chunk_id
|
|
16519
|
+
LEFT JOIN source_revisions sr ON sr.id = c.source_revision_id
|
|
16520
|
+
LEFT JOIN sources s ON s.id = sr.source_id
|
|
16521
|
+
LEFT JOIN wiki_pages wp ON wp.id = c.wiki_page_id
|
|
16522
|
+
WHERE chunks_fts MATCH ?
|
|
16523
|
+
ORDER BY rank ASC
|
|
16524
|
+
LIMIT ?`).all(ftsQuery, limit);
|
|
16525
|
+
}
|
|
16526
|
+
function catalogWhere(fields, terms) {
|
|
16527
|
+
if (terms.length === 0)
|
|
16528
|
+
return "1 = 0";
|
|
16529
|
+
const clauses = terms.map(() => `(${fields.map((field) => `lower(COALESCE(${field}, '')) LIKE ? ESCAPE '\\'`).join(" OR ")})`);
|
|
16530
|
+
return clauses.join(" OR ");
|
|
16531
|
+
}
|
|
16532
|
+
function selectWikiPages(db, terms, limit) {
|
|
16533
|
+
const fields = ["path", "title", "artifact_uri", "metadata_json"];
|
|
16534
|
+
return db.query(`SELECT id, path, title, artifact_uri, content_hash, status, metadata_json
|
|
16535
|
+
FROM wiki_pages
|
|
16536
|
+
WHERE status = 'active' AND (${catalogWhere(fields, terms)})
|
|
16537
|
+
ORDER BY updated_at DESC
|
|
16538
|
+
LIMIT ?`).all(...likeParams(terms, fields.length), limit);
|
|
16539
|
+
}
|
|
16540
|
+
function selectKnowledgeIndexes(db, terms, limit) {
|
|
16541
|
+
const fields = ["kind", "name", "shard_key", "artifact_uri", "metadata_json"];
|
|
16542
|
+
return db.query(`SELECT id, kind, name, artifact_uri, shard_key, metadata_json
|
|
16543
|
+
FROM knowledge_indexes
|
|
16544
|
+
WHERE ${catalogWhere(fields, terms)}
|
|
16545
|
+
ORDER BY updated_at DESC
|
|
16546
|
+
LIMIT ?`).all(...likeParams(terms, fields.length), limit);
|
|
16547
|
+
}
|
|
16548
|
+
function chunkResult(row, keywordScore) {
|
|
16549
|
+
const metadata = parseJsonObject3(row.chunk_metadata_json);
|
|
16550
|
+
const provenance = provenanceForChunk2(row);
|
|
16551
|
+
const sourceRef = metadataString3(metadata, ["source_ref"]);
|
|
16552
|
+
const sourceUri = row.source_uri ?? metadataString3(metadata, ["source_uri"]);
|
|
16553
|
+
const isWiki = Boolean(row.wiki_page_id);
|
|
16554
|
+
const result = {
|
|
16555
|
+
kind: isWiki ? "wiki_chunk" : "source_chunk",
|
|
16556
|
+
id: row.chunk_id,
|
|
16557
|
+
title: isWiki ? row.wiki_title : row.source_title,
|
|
16558
|
+
text: row.text,
|
|
16559
|
+
score: 0,
|
|
16560
|
+
scores: { keyword: keywordScore },
|
|
16561
|
+
source: sourceUri || sourceRef ? {
|
|
16562
|
+
uri: sourceUri,
|
|
16563
|
+
ref: sourceRef,
|
|
16564
|
+
kind: row.source_kind ?? metadataString3(metadata, ["source_kind"]),
|
|
16565
|
+
revision: row.revision ?? metadataString3(metadata, ["revision"]),
|
|
16566
|
+
hash: row.hash ?? metadataString3(metadata, ["hash"])
|
|
16567
|
+
} : null,
|
|
16568
|
+
citation: {
|
|
16569
|
+
chunk_id: row.chunk_id,
|
|
16570
|
+
start_offset: row.start_offset,
|
|
16571
|
+
end_offset: row.end_offset
|
|
16572
|
+
},
|
|
16573
|
+
artifact: isWiki ? {
|
|
16574
|
+
uri: row.wiki_artifact_uri,
|
|
16575
|
+
path: row.wiki_path,
|
|
16576
|
+
hash: row.wiki_content_hash,
|
|
16577
|
+
shard_key: row.wiki_path
|
|
16578
|
+
} : null,
|
|
16579
|
+
provenance,
|
|
16580
|
+
reasons: ["keyword_match"]
|
|
16581
|
+
};
|
|
16582
|
+
result.score = combinedScore(result.scores, result.citation);
|
|
16583
|
+
return result;
|
|
16584
|
+
}
|
|
16585
|
+
function wikiPageResult(row, terms) {
|
|
16586
|
+
const metadata = parseJsonObject3(row.metadata_json);
|
|
16587
|
+
const score = catalogScore(`${row.path} ${row.title} ${row.artifact_uri ?? ""} ${row.metadata_json}`.toLowerCase(), terms);
|
|
16588
|
+
const result = {
|
|
16589
|
+
kind: "wiki_page",
|
|
16590
|
+
id: row.id,
|
|
16591
|
+
title: row.title,
|
|
16592
|
+
text: null,
|
|
16593
|
+
score: 0,
|
|
16594
|
+
scores: { catalog: score },
|
|
16595
|
+
source: null,
|
|
16596
|
+
citation: null,
|
|
16597
|
+
artifact: {
|
|
16598
|
+
uri: row.artifact_uri,
|
|
16599
|
+
path: row.path,
|
|
16600
|
+
hash: row.content_hash,
|
|
16601
|
+
shard_key: row.path
|
|
16602
|
+
},
|
|
16603
|
+
provenance: existingProvenance(metadata),
|
|
16604
|
+
reasons: ["wiki_catalog_match"]
|
|
16605
|
+
};
|
|
16606
|
+
result.score = combinedScore(result.scores, result.citation);
|
|
16607
|
+
return result;
|
|
16608
|
+
}
|
|
16609
|
+
function indexResult(row, terms) {
|
|
16610
|
+
const metadata = parseJsonObject3(row.metadata_json);
|
|
16611
|
+
const score = catalogScore(`${row.kind} ${row.name} ${row.shard_key ?? ""} ${row.artifact_uri ?? ""} ${row.metadata_json}`.toLowerCase(), terms);
|
|
16612
|
+
const result = {
|
|
16613
|
+
kind: "knowledge_index",
|
|
16614
|
+
id: row.id,
|
|
16615
|
+
title: row.name,
|
|
16616
|
+
text: null,
|
|
16617
|
+
score: 0,
|
|
16618
|
+
scores: { catalog: score },
|
|
16619
|
+
source: null,
|
|
16620
|
+
citation: null,
|
|
16621
|
+
artifact: {
|
|
16622
|
+
uri: row.artifact_uri,
|
|
16623
|
+
path: metadataString3(metadata, ["artifact_key"]),
|
|
16624
|
+
hash: metadataString3(metadata, ["content_hash"]),
|
|
16625
|
+
shard_key: row.shard_key
|
|
16626
|
+
},
|
|
16627
|
+
provenance: existingProvenance(metadata),
|
|
16628
|
+
reasons: ["index_catalog_match"]
|
|
16629
|
+
};
|
|
16630
|
+
result.score = combinedScore(result.scores, result.citation);
|
|
16631
|
+
return result;
|
|
16632
|
+
}
|
|
16633
|
+
function mergeResult(results, entry) {
|
|
16634
|
+
const key = `${entry.kind}:${entry.id}`;
|
|
16635
|
+
const existing = results.get(key);
|
|
16636
|
+
if (!existing) {
|
|
16637
|
+
results.set(key, entry);
|
|
16638
|
+
return;
|
|
16639
|
+
}
|
|
16640
|
+
existing.scores = {
|
|
16641
|
+
keyword: Math.max(existing.scores.keyword ?? 0, entry.scores.keyword ?? 0) || undefined,
|
|
16642
|
+
semantic: Math.max(existing.scores.semantic ?? 0, entry.scores.semantic ?? 0) || undefined,
|
|
16643
|
+
catalog: Math.max(existing.scores.catalog ?? 0, entry.scores.catalog ?? 0) || undefined
|
|
16644
|
+
};
|
|
16645
|
+
existing.reasons = unique([...existing.reasons, ...entry.reasons]);
|
|
16646
|
+
existing.text = existing.text ?? entry.text;
|
|
16647
|
+
existing.title = existing.title ?? entry.title;
|
|
16648
|
+
existing.source = existing.source ?? entry.source;
|
|
16649
|
+
existing.citation = existing.citation ?? entry.citation;
|
|
16650
|
+
existing.artifact = existing.artifact ?? entry.artifact;
|
|
16651
|
+
existing.provenance = existing.provenance ?? entry.provenance;
|
|
16652
|
+
existing.score = combinedScore(existing.scores, existing.citation);
|
|
16653
|
+
}
|
|
16654
|
+
function sortResults(results) {
|
|
16655
|
+
const kindOrder = {
|
|
16656
|
+
source_chunk: 0,
|
|
16657
|
+
wiki_chunk: 1,
|
|
16658
|
+
wiki_page: 2,
|
|
16659
|
+
knowledge_index: 3
|
|
16660
|
+
};
|
|
16661
|
+
return results.sort((a, b) => {
|
|
16662
|
+
if (b.score !== a.score)
|
|
16663
|
+
return b.score - a.score;
|
|
16664
|
+
return kindOrder[a.kind] - kindOrder[b.kind] || a.id.localeCompare(b.id);
|
|
15992
16665
|
});
|
|
15993
16666
|
}
|
|
15994
|
-
function
|
|
16667
|
+
async function hybridSearch(options) {
|
|
16668
|
+
const query = options.query.trim();
|
|
16669
|
+
if (!query)
|
|
16670
|
+
throw new Error("Search query is required.");
|
|
16671
|
+
const limit = Math.max(1, Math.min(options.limit ?? 10, 100));
|
|
16672
|
+
const terms = queryTerms(query);
|
|
16673
|
+
const ftsQuery = ftsQueryForTerms(terms);
|
|
16674
|
+
const semanticEnabled = options.semantic === true || options.fake === true || Boolean(options.modelRef);
|
|
16675
|
+
const warnings = [];
|
|
16676
|
+
let semanticProvider = null;
|
|
16677
|
+
let semanticModel = null;
|
|
16678
|
+
let semanticDimensions = null;
|
|
16679
|
+
let keywordCount = 0;
|
|
16680
|
+
let catalogCount = 0;
|
|
16681
|
+
let semanticCount = 0;
|
|
16682
|
+
const merged = new Map;
|
|
16683
|
+
migrateKnowledgeDb(options.dbPath);
|
|
16684
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
16685
|
+
try {
|
|
16686
|
+
const ftsRows = selectFtsChunks(db, ftsQuery, Math.max(limit * 3, 20));
|
|
16687
|
+
keywordCount = ftsRows.length;
|
|
16688
|
+
ftsRows.forEach((row, index) => mergeResult(merged, chunkResult(row, scoreFromRank(row.rank, index))));
|
|
16689
|
+
const wikiRows = selectWikiPages(db, terms, Math.max(limit, 10));
|
|
16690
|
+
const indexRows = selectKnowledgeIndexes(db, terms, Math.max(limit, 10));
|
|
16691
|
+
catalogCount = wikiRows.length + indexRows.length;
|
|
16692
|
+
wikiRows.forEach((row) => mergeResult(merged, wikiPageResult(row, terms)));
|
|
16693
|
+
indexRows.forEach((row) => mergeResult(merged, indexResult(row, terms)));
|
|
16694
|
+
} finally {
|
|
16695
|
+
db.close();
|
|
16696
|
+
}
|
|
16697
|
+
if (semanticEnabled) {
|
|
16698
|
+
try {
|
|
16699
|
+
const semantic = await searchVectorIndex({
|
|
16700
|
+
dbPath: options.dbPath,
|
|
16701
|
+
query,
|
|
16702
|
+
limit: Math.max(limit * 3, 20),
|
|
16703
|
+
config: options.config,
|
|
16704
|
+
env: options.env,
|
|
16705
|
+
modelRef: options.modelRef,
|
|
16706
|
+
dimensions: options.dimensions,
|
|
16707
|
+
fake: options.fake,
|
|
16708
|
+
batchSize: options.batchSize,
|
|
16709
|
+
maxParallelCalls: options.maxParallelCalls
|
|
16710
|
+
});
|
|
16711
|
+
semanticProvider = semantic.provider;
|
|
16712
|
+
semanticModel = semantic.model;
|
|
16713
|
+
semanticDimensions = semantic.dimensions;
|
|
16714
|
+
semanticCount = semantic.results.length;
|
|
16715
|
+
for (const row of semantic.results) {
|
|
16716
|
+
const result = {
|
|
16717
|
+
kind: "source_chunk",
|
|
16718
|
+
id: row.chunk_id,
|
|
16719
|
+
title: null,
|
|
16720
|
+
text: row.text,
|
|
16721
|
+
score: 0,
|
|
16722
|
+
scores: { semantic: semanticScore(row.score) },
|
|
16723
|
+
source: {
|
|
16724
|
+
uri: row.source_uri,
|
|
16725
|
+
ref: row.source_ref,
|
|
16726
|
+
kind: row.provenance?.source_kind ?? null,
|
|
16727
|
+
revision: row.revision,
|
|
16728
|
+
hash: row.hash
|
|
16729
|
+
},
|
|
16730
|
+
citation: {
|
|
16731
|
+
chunk_id: row.chunk_id,
|
|
16732
|
+
start_offset: row.provenance?.start_offset ?? null,
|
|
16733
|
+
end_offset: row.provenance?.end_offset ?? null
|
|
16734
|
+
},
|
|
16735
|
+
artifact: null,
|
|
16736
|
+
provenance: row.provenance,
|
|
16737
|
+
reasons: ["semantic_match"]
|
|
16738
|
+
};
|
|
16739
|
+
result.score = combinedScore(result.scores, result.citation);
|
|
16740
|
+
mergeResult(merged, result);
|
|
16741
|
+
}
|
|
16742
|
+
} catch (error48) {
|
|
16743
|
+
warnings.push(`semantic_search_failed: ${error48 instanceof Error ? error48.message : String(error48)}`);
|
|
16744
|
+
}
|
|
16745
|
+
}
|
|
16746
|
+
const results = sortResults(Array.from(merged.values())).slice(0, limit);
|
|
15995
16747
|
return {
|
|
15996
|
-
|
|
15997
|
-
|
|
15998
|
-
|
|
16748
|
+
query,
|
|
16749
|
+
limit,
|
|
16750
|
+
mode: {
|
|
16751
|
+
keyword: true,
|
|
16752
|
+
catalog: true,
|
|
16753
|
+
semantic: semanticEnabled
|
|
16754
|
+
},
|
|
16755
|
+
semantic_provider: semanticProvider,
|
|
16756
|
+
semantic_model: semanticModel,
|
|
16757
|
+
semantic_dimensions: semanticDimensions,
|
|
16758
|
+
counts: {
|
|
16759
|
+
keyword_results: keywordCount,
|
|
16760
|
+
catalog_results: catalogCount,
|
|
16761
|
+
semantic_results: semanticCount,
|
|
16762
|
+
merged_results: results.length
|
|
16763
|
+
},
|
|
16764
|
+
warnings,
|
|
16765
|
+
results
|
|
15999
16766
|
};
|
|
16000
16767
|
}
|
|
16001
16768
|
|
|
16002
16769
|
// src/storage-contract.ts
|
|
16003
|
-
import { createHash as
|
|
16770
|
+
import { createHash as createHash6, randomUUID as randomUUID4 } from "crypto";
|
|
16004
16771
|
var GENERATED_ARTIFACTS = [
|
|
16005
16772
|
{
|
|
16006
16773
|
kind: "schema",
|
|
@@ -16036,7 +16803,7 @@ var GENERATED_ARTIFACTS = [
|
|
|
16036
16803
|
function hashArtifactBody(body) {
|
|
16037
16804
|
const bytes = typeof body === "string" ? Buffer.from(body) : Buffer.from(body);
|
|
16038
16805
|
return {
|
|
16039
|
-
hash: `sha256:${
|
|
16806
|
+
hash: `sha256:${createHash6("sha256").update(bytes).digest("hex")}`,
|
|
16040
16807
|
size_bytes: bytes.byteLength
|
|
16041
16808
|
};
|
|
16042
16809
|
}
|
|
@@ -16171,15 +16938,19 @@ function recordStorageObjects(db, objects, now = new Date) {
|
|
|
16171
16938
|
}
|
|
16172
16939
|
|
|
16173
16940
|
// src/wiki-layout.ts
|
|
16174
|
-
import { createHash as
|
|
16941
|
+
import { createHash as createHash7 } from "crypto";
|
|
16175
16942
|
function todayParts(now) {
|
|
16176
16943
|
const year = String(now.getUTCFullYear());
|
|
16177
16944
|
const month = String(now.getUTCMonth() + 1).padStart(2, "0");
|
|
16178
16945
|
const day = String(now.getUTCDate()).padStart(2, "0");
|
|
16179
16946
|
return { year, month, day };
|
|
16180
16947
|
}
|
|
16181
|
-
function
|
|
16182
|
-
return `${prefix}_${
|
|
16948
|
+
function stableId4(prefix, value) {
|
|
16949
|
+
return `${prefix}_${createHash7("sha256").update(value).digest("hex").slice(0, 20)}`;
|
|
16950
|
+
}
|
|
16951
|
+
function estimateTokenCount2(text) {
|
|
16952
|
+
const words = text.trim().split(/\s+/).filter(Boolean).length;
|
|
16953
|
+
return Math.max(1, Math.ceil(words * 1.25));
|
|
16183
16954
|
}
|
|
16184
16955
|
function agentSchemaTemplate() {
|
|
16185
16956
|
return `# Knowledge Agent Schema v1
|
|
@@ -16291,6 +17062,33 @@ function provenanceFor(artifact) {
|
|
|
16291
17062
|
artifact_key: artifact.key
|
|
16292
17063
|
});
|
|
16293
17064
|
}
|
|
17065
|
+
function recordWikiChunk(db, pageId, title, artifact, body, now) {
|
|
17066
|
+
const provenance = provenanceFor(artifact);
|
|
17067
|
+
const chunkId = stableId4("chk", `${pageId}\x00${artifact.hash ?? artifact.uri}`);
|
|
17068
|
+
const existing = db.query("SELECT id FROM chunks WHERE wiki_page_id = ?").all(pageId);
|
|
17069
|
+
for (const row of existing)
|
|
17070
|
+
db.run("DELETE FROM chunks_fts WHERE chunk_id = ?", [row.id]);
|
|
17071
|
+
db.run("DELETE FROM chunks WHERE wiki_page_id = ?", [pageId]);
|
|
17072
|
+
db.run(`INSERT INTO chunks (id, wiki_page_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
|
|
17073
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
|
|
17074
|
+
chunkId,
|
|
17075
|
+
pageId,
|
|
17076
|
+
"wiki",
|
|
17077
|
+
0,
|
|
17078
|
+
body,
|
|
17079
|
+
estimateTokenCount2(body),
|
|
17080
|
+
0,
|
|
17081
|
+
body.length,
|
|
17082
|
+
JSON.stringify({
|
|
17083
|
+
artifact_key: artifact.key,
|
|
17084
|
+
artifact_uri: artifact.uri,
|
|
17085
|
+
content_hash: artifact.hash ?? null,
|
|
17086
|
+
provenance
|
|
17087
|
+
}),
|
|
17088
|
+
now
|
|
17089
|
+
]);
|
|
17090
|
+
db.run("INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)", [chunkId, body, title, artifact.uri]);
|
|
17091
|
+
}
|
|
16294
17092
|
function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
|
|
16295
17093
|
const timestamp = now.toISOString();
|
|
16296
17094
|
const rootIndex = artifacts.find((artifact) => artifact.key.endsWith("indexes/root.md"));
|
|
@@ -16302,7 +17100,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
|
|
|
16302
17100
|
artifact_uri = excluded.artifact_uri,
|
|
16303
17101
|
metadata_json = excluded.metadata_json,
|
|
16304
17102
|
updated_at = excluded.updated_at`, [
|
|
16305
|
-
|
|
17103
|
+
stableId4("idx", "root:indexes/root.md"),
|
|
16306
17104
|
"root",
|
|
16307
17105
|
"root",
|
|
16308
17106
|
rootIndex.uri,
|
|
@@ -16317,6 +17115,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
|
|
|
16317
17115
|
]);
|
|
16318
17116
|
}
|
|
16319
17117
|
if (wikiReadme) {
|
|
17118
|
+
const wikiPageId = stableId4("wiki", "wiki/README.md");
|
|
16320
17119
|
db.run(`INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
|
|
16321
17120
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
16322
17121
|
ON CONFLICT(path) DO UPDATE SET
|
|
@@ -16326,7 +17125,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
|
|
|
16326
17125
|
status = excluded.status,
|
|
16327
17126
|
metadata_json = excluded.metadata_json,
|
|
16328
17127
|
updated_at = excluded.updated_at`, [
|
|
16329
|
-
|
|
17128
|
+
wikiPageId,
|
|
16330
17129
|
"wiki/README.md",
|
|
16331
17130
|
"Wiki",
|
|
16332
17131
|
wikiReadme.uri,
|
|
@@ -16339,6 +17138,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
|
|
|
16339
17138
|
timestamp,
|
|
16340
17139
|
timestamp
|
|
16341
17140
|
]);
|
|
17141
|
+
recordWikiChunk(db, wikiPageId, "Wiki", wikiReadme, wikiReadmeTemplate(), timestamp);
|
|
16342
17142
|
}
|
|
16343
17143
|
}
|
|
16344
17144
|
|
|
@@ -16467,6 +17267,34 @@ class KnowledgeService {
|
|
|
16467
17267
|
modelRegistry() {
|
|
16468
17268
|
return listModelRegistry(this.config());
|
|
16469
17269
|
}
|
|
17270
|
+
embeddingStatus() {
|
|
17271
|
+
const workspace = this.ensureWorkspace();
|
|
17272
|
+
return embeddingIndexStatus(workspace.knowledgeDbPath);
|
|
17273
|
+
}
|
|
17274
|
+
async indexEmbeddings(options = {}) {
|
|
17275
|
+
const workspace = this.ensureWorkspace();
|
|
17276
|
+
return indexKnowledgeEmbeddings({
|
|
17277
|
+
...options,
|
|
17278
|
+
dbPath: workspace.knowledgeDbPath,
|
|
17279
|
+
config: this.config()
|
|
17280
|
+
});
|
|
17281
|
+
}
|
|
17282
|
+
async semanticSearch(options) {
|
|
17283
|
+
const workspace = this.ensureWorkspace();
|
|
17284
|
+
return searchVectorIndex({
|
|
17285
|
+
...options,
|
|
17286
|
+
dbPath: workspace.knowledgeDbPath,
|
|
17287
|
+
config: this.config()
|
|
17288
|
+
});
|
|
17289
|
+
}
|
|
17290
|
+
async search(options) {
|
|
17291
|
+
const workspace = this.ensureWorkspace();
|
|
17292
|
+
return hybridSearch({
|
|
17293
|
+
...options,
|
|
17294
|
+
dbPath: workspace.knowledgeDbPath,
|
|
17295
|
+
config: this.config()
|
|
17296
|
+
});
|
|
17297
|
+
}
|
|
16470
17298
|
}
|
|
16471
17299
|
function createKnowledgeService(options = {}) {
|
|
16472
17300
|
return new KnowledgeService(options);
|
|
@@ -16581,6 +17409,57 @@ function buildServer() {
|
|
|
16581
17409
|
const service = createKnowledgeService({ scope });
|
|
16582
17410
|
return jsonText({ ok: true, models: service.modelRegistry() });
|
|
16583
17411
|
});
|
|
17412
|
+
registerTool(server, "ok_embeddings_status", "Embedding index status", "Inspect local embedding/vector index counts by provider and model", {
|
|
17413
|
+
scope: scopeField
|
|
17414
|
+
}, async ({ scope }) => {
|
|
17415
|
+
const service = createKnowledgeService({ scope });
|
|
17416
|
+
return jsonText({ ok: true, ...service.embeddingStatus() });
|
|
17417
|
+
});
|
|
17418
|
+
registerTool(server, "ok_embeddings_index", "Index embeddings", "Embed unindexed knowledge chunks into the local vector index", {
|
|
17419
|
+
scope: scopeField,
|
|
17420
|
+
limit: exports_external.number().optional().describe("Maximum chunks to embed"),
|
|
17421
|
+
model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
|
|
17422
|
+
dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
|
|
17423
|
+
fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
|
|
17424
|
+
}, async ({ scope, limit, model, dimensions, fake }) => {
|
|
17425
|
+
const service = createKnowledgeService({ scope });
|
|
17426
|
+
try {
|
|
17427
|
+
return jsonText({ ok: true, ...await service.indexEmbeddings({ limit, modelRef: model, dimensions, fake }) });
|
|
17428
|
+
} catch (error48) {
|
|
17429
|
+
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
17430
|
+
}
|
|
17431
|
+
});
|
|
17432
|
+
registerTool(server, "ok_semantic_search", "Semantic search", "Search the local vector index and return cited chunks with provenance", {
|
|
17433
|
+
scope: scopeField,
|
|
17434
|
+
query: exports_external.string().describe("Semantic query"),
|
|
17435
|
+
limit: exports_external.number().optional().describe("Maximum results"),
|
|
17436
|
+
model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
|
|
17437
|
+
dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
|
|
17438
|
+
fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
|
|
17439
|
+
}, async ({ scope, query, limit, model, dimensions, fake }) => {
|
|
17440
|
+
const service = createKnowledgeService({ scope });
|
|
17441
|
+
try {
|
|
17442
|
+
return jsonText({ ok: true, ...await service.semanticSearch({ query, limit, modelRef: model, dimensions, fake }) });
|
|
17443
|
+
} catch (error48) {
|
|
17444
|
+
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
17445
|
+
}
|
|
17446
|
+
});
|
|
17447
|
+
registerTool(server, "ok_search", "Hybrid knowledge search", "Search source chunks, generated wiki pages, sharded indexes, and optional semantic vectors", {
|
|
17448
|
+
scope: scopeField,
|
|
17449
|
+
query: exports_external.string().describe("Search query"),
|
|
17450
|
+
limit: exports_external.number().optional().describe("Maximum results"),
|
|
17451
|
+
semantic: exports_external.boolean().optional().describe("Include vector semantic results"),
|
|
17452
|
+
model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
|
|
17453
|
+
dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
|
|
17454
|
+
fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
|
|
17455
|
+
}, async ({ scope, query, limit, semantic, model, dimensions, fake }) => {
|
|
17456
|
+
const service = createKnowledgeService({ scope });
|
|
17457
|
+
try {
|
|
17458
|
+
return jsonText({ ok: true, ...await service.search({ query, limit, semantic, modelRef: model, dimensions, fake }) });
|
|
17459
|
+
} catch (error48) {
|
|
17460
|
+
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
17461
|
+
}
|
|
17462
|
+
});
|
|
16584
17463
|
registerTool(server, "ok_add", "Add a knowledge item", "Add a new item to the knowledge store", {
|
|
16585
17464
|
title: exports_external.string().describe("Item title"),
|
|
16586
17465
|
content: exports_external.string().describe("Item content/body"),
|