@hasna/knowledge 0.2.14 → 0.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/bin/open-knowledge-mcp.js +432 -2
- package/bin/open-knowledge.js +80 -40
- package/docs/architecture/ai-native-knowledge-base.md +7 -5
- package/docs/architecture/hybrid-semantic-search.md +17 -11
- package/package.json +1 -1
- package/src/cli.ts +24 -4
- package/src/mcp.js +17 -0
- package/src/search.ts +510 -0
- package/src/service.ts +10 -0
- package/src/wiki-layout.ts +41 -1
package/README.md
CHANGED
|
@@ -93,6 +93,10 @@ open-knowledge providers models --scope project --json
|
|
|
93
93
|
# Embed indexed chunks and run semantic search
|
|
94
94
|
open-knowledge embeddings index --scope project --model openai:text-embedding-3-small --json
|
|
95
95
|
open-knowledge embeddings search "company wiki policy" --scope project --json
|
|
96
|
+
|
|
97
|
+
# Hybrid search over source chunks, generated wiki pages, indexes, and optional vectors
|
|
98
|
+
open-knowledge search "company wiki policy" --scope project --json
|
|
99
|
+
open-knowledge search "company wiki policy" --scope project --semantic --json
|
|
96
100
|
```
|
|
97
101
|
|
|
98
102
|
## Commands
|
|
@@ -237,6 +241,17 @@ Consume open-files JSON or JSONL change events. This invalidates matching
|
|
|
237
241
|
source chunks and embeddings by source ref, revision, or hash, updates
|
|
238
242
|
permission/path/delete metadata, and records a local run ledger.
|
|
239
243
|
|
|
244
|
+
### search
|
|
245
|
+
```bash
|
|
246
|
+
open-knowledge search <query> [--scope project] [--limit <n>] [--json]
|
|
247
|
+
open-knowledge search <query> --semantic [--model openai:text-embedding-3-small] [--scope project] [--json]
|
|
248
|
+
```
|
|
249
|
+
Run hybrid search over `chunks_fts`, generated wiki chunks, wiki/index catalog
|
|
250
|
+
rows, and optional vector results. The default path is local-only keyword and
|
|
251
|
+
catalog search. `--semantic` embeds the query and merges vector results from
|
|
252
|
+
`vector_index_entries`, preserving source refs, artifact URIs, citations,
|
|
253
|
+
revision/hash metadata, and provenance in each structured result.
|
|
254
|
+
|
|
240
255
|
### safety
|
|
241
256
|
```bash
|
|
242
257
|
open-knowledge safety status [--scope project] [--json]
|
|
@@ -315,8 +330,8 @@ The MCP server exposes item tools (`ok_add`, `ok_list`, `ok_get`, `ok_update`,
|
|
|
315
330
|
`ok_import`, `ok_batch`), workspace/storage inspection (`ok_paths`,
|
|
316
331
|
`ok_storage_status`), provider/embedding tools (`ok_provider_status`,
|
|
317
332
|
`ok_provider_models`, `ok_embeddings_status`, `ok_embeddings_index`,
|
|
318
|
-
`ok_semantic_search`), and source-ref
|
|
319
|
-
`ok_resolve_source`).
|
|
333
|
+
`ok_semantic_search`), hybrid retrieval (`ok_search`), and source-ref
|
|
334
|
+
parsing/resolution (`ok_parse_source_ref`, `ok_resolve_source`).
|
|
320
335
|
|
|
321
336
|
## Source And Artifact Boundary
|
|
322
337
|
|
|
@@ -13660,7 +13660,7 @@ import { existsSync as existsSync7, readFileSync as readFileSync7, writeFileSync
|
|
|
13660
13660
|
// package.json
|
|
13661
13661
|
var package_default = {
|
|
13662
13662
|
name: "@hasna/knowledge",
|
|
13663
|
-
version: "0.2.
|
|
13663
|
+
version: "0.2.15",
|
|
13664
13664
|
description: "Agent-friendly local knowledge CLI with JSON output, pagination, and safe destructive actions",
|
|
13665
13665
|
type: "module",
|
|
13666
13666
|
bin: {
|
|
@@ -16393,6 +16393,379 @@ async function ingestSourceRef(options) {
|
|
|
16393
16393
|
};
|
|
16394
16394
|
}
|
|
16395
16395
|
|
|
16396
|
+
// src/search.ts
|
|
16397
|
+
function parseJsonObject3(value) {
|
|
16398
|
+
if (!value)
|
|
16399
|
+
return {};
|
|
16400
|
+
try {
|
|
16401
|
+
const parsed = JSON.parse(value);
|
|
16402
|
+
return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {};
|
|
16403
|
+
} catch {
|
|
16404
|
+
return {};
|
|
16405
|
+
}
|
|
16406
|
+
}
|
|
16407
|
+
function metadataString3(metadata, keys) {
|
|
16408
|
+
for (const key of keys) {
|
|
16409
|
+
const value = metadata[key];
|
|
16410
|
+
if (typeof value === "string" && value.length > 0)
|
|
16411
|
+
return value;
|
|
16412
|
+
}
|
|
16413
|
+
return null;
|
|
16414
|
+
}
|
|
16415
|
+
function metadataNumber3(metadata, keys) {
|
|
16416
|
+
for (const key of keys) {
|
|
16417
|
+
const value = metadata[key];
|
|
16418
|
+
if (typeof value === "number" && Number.isFinite(value))
|
|
16419
|
+
return value;
|
|
16420
|
+
}
|
|
16421
|
+
return null;
|
|
16422
|
+
}
|
|
16423
|
+
function unique(values) {
|
|
16424
|
+
return Array.from(new Set(values));
|
|
16425
|
+
}
|
|
16426
|
+
function queryTerms(query) {
|
|
16427
|
+
const terms = query.normalize("NFKC").toLowerCase().match(/[\p{L}\p{N}_]+/gu) ?? [];
|
|
16428
|
+
return unique(terms.filter((term) => term.length > 0)).slice(0, 16);
|
|
16429
|
+
}
|
|
16430
|
+
function ftsQueryForTerms(terms) {
|
|
16431
|
+
if (terms.length === 0)
|
|
16432
|
+
return null;
|
|
16433
|
+
return terms.map((term) => `${term}*`).join(" OR ");
|
|
16434
|
+
}
|
|
16435
|
+
function escapeLikeTerm(term) {
|
|
16436
|
+
return term.replace(/[\\%_]/g, (char) => `\\${char}`);
|
|
16437
|
+
}
|
|
16438
|
+
function likeParams(terms, fieldsPerTerm) {
|
|
16439
|
+
return terms.flatMap((term) => Array.from({ length: fieldsPerTerm }, () => `%${escapeLikeTerm(term)}%`));
|
|
16440
|
+
}
|
|
16441
|
+
function scoreFromRank(rank, index) {
|
|
16442
|
+
const rankScore = Number.isFinite(rank) ? 1 / (1 + Math.abs(rank)) : 0;
|
|
16443
|
+
const orderScore = 1 / (1 + index);
|
|
16444
|
+
return roundScore(Math.max(rankScore, orderScore));
|
|
16445
|
+
}
|
|
16446
|
+
function catalogScore(haystack, terms) {
|
|
16447
|
+
if (terms.length === 0)
|
|
16448
|
+
return 0;
|
|
16449
|
+
const matched = terms.filter((term) => haystack.includes(term)).length;
|
|
16450
|
+
if (matched === 0)
|
|
16451
|
+
return 0;
|
|
16452
|
+
return roundScore(Math.min(0.85, 0.35 + matched / terms.length * 0.5));
|
|
16453
|
+
}
|
|
16454
|
+
function semanticScore(score) {
|
|
16455
|
+
return roundScore(Math.max(0, Math.min(1, (score + 1) / 2)));
|
|
16456
|
+
}
|
|
16457
|
+
function roundScore(score) {
|
|
16458
|
+
return Number(score.toFixed(6));
|
|
16459
|
+
}
|
|
16460
|
+
function combinedScore(scores, citation) {
|
|
16461
|
+
const keyword = scores.keyword ?? 0;
|
|
16462
|
+
const semantic = scores.semantic ?? 0;
|
|
16463
|
+
const catalog = scores.catalog ?? 0;
|
|
16464
|
+
const citationBoost = citation?.chunk_id ? 0.05 : 0;
|
|
16465
|
+
return roundScore(Math.min(1, keyword * 0.55 + semantic * 0.4 + catalog * 0.35 + citationBoost));
|
|
16466
|
+
}
|
|
16467
|
+
function existingProvenance(metadata) {
|
|
16468
|
+
const provenance = metadata.provenance;
|
|
16469
|
+
return provenance && typeof provenance === "object" && !Array.isArray(provenance) ? provenance : null;
|
|
16470
|
+
}
|
|
16471
|
+
function provenanceForChunk2(row) {
|
|
16472
|
+
const metadata = parseJsonObject3(row.chunk_metadata_json);
|
|
16473
|
+
const existing = existingProvenance(metadata);
|
|
16474
|
+
if (existing)
|
|
16475
|
+
return existing;
|
|
16476
|
+
if (!row.source_revision_id && !row.source_uri)
|
|
16477
|
+
return null;
|
|
16478
|
+
return sourceProvenance({
|
|
16479
|
+
source_ref: metadataString3(metadata, ["source_ref"]),
|
|
16480
|
+
source_uri: row.source_uri ?? metadataString3(metadata, ["source_uri"]),
|
|
16481
|
+
source_kind: row.source_kind ?? metadataString3(metadata, ["source_kind"]),
|
|
16482
|
+
source_revision_id: row.source_revision_id,
|
|
16483
|
+
revision: row.revision ?? metadataString3(metadata, ["revision"]),
|
|
16484
|
+
hash: row.hash ?? metadataString3(metadata, ["hash"]),
|
|
16485
|
+
chunk_id: row.chunk_id,
|
|
16486
|
+
start_offset: row.start_offset ?? metadataNumber3(metadata, ["start_offset"]),
|
|
16487
|
+
end_offset: row.end_offset ?? metadataNumber3(metadata, ["end_offset"]),
|
|
16488
|
+
status: metadataString3(metadata, ["status"]),
|
|
16489
|
+
resolver: "open-files-read-only"
|
|
16490
|
+
});
|
|
16491
|
+
}
|
|
16492
|
+
function selectFtsChunks(db, ftsQuery, limit) {
|
|
16493
|
+
if (!ftsQuery)
|
|
16494
|
+
return [];
|
|
16495
|
+
return db.query(`SELECT
|
|
16496
|
+
chunks_fts.chunk_id,
|
|
16497
|
+
c.kind AS chunk_kind,
|
|
16498
|
+
c.wiki_page_id,
|
|
16499
|
+
c.text,
|
|
16500
|
+
c.token_count,
|
|
16501
|
+
c.start_offset,
|
|
16502
|
+
c.end_offset,
|
|
16503
|
+
c.metadata_json AS chunk_metadata_json,
|
|
16504
|
+
c.source_revision_id,
|
|
16505
|
+
sr.revision,
|
|
16506
|
+
sr.hash,
|
|
16507
|
+
s.uri AS source_uri,
|
|
16508
|
+
s.kind AS source_kind,
|
|
16509
|
+
s.title AS source_title,
|
|
16510
|
+
wp.path AS wiki_path,
|
|
16511
|
+
wp.title AS wiki_title,
|
|
16512
|
+
wp.artifact_uri AS wiki_artifact_uri,
|
|
16513
|
+
wp.content_hash AS wiki_content_hash,
|
|
16514
|
+
wp.status AS wiki_status,
|
|
16515
|
+
wp.metadata_json AS wiki_metadata_json,
|
|
16516
|
+
bm25(chunks_fts) AS rank
|
|
16517
|
+
FROM chunks_fts
|
|
16518
|
+
JOIN chunks c ON c.id = chunks_fts.chunk_id
|
|
16519
|
+
LEFT JOIN source_revisions sr ON sr.id = c.source_revision_id
|
|
16520
|
+
LEFT JOIN sources s ON s.id = sr.source_id
|
|
16521
|
+
LEFT JOIN wiki_pages wp ON wp.id = c.wiki_page_id
|
|
16522
|
+
WHERE chunks_fts MATCH ?
|
|
16523
|
+
ORDER BY rank ASC
|
|
16524
|
+
LIMIT ?`).all(ftsQuery, limit);
|
|
16525
|
+
}
|
|
16526
|
+
function catalogWhere(fields, terms) {
|
|
16527
|
+
if (terms.length === 0)
|
|
16528
|
+
return "1 = 0";
|
|
16529
|
+
const clauses = terms.map(() => `(${fields.map((field) => `lower(COALESCE(${field}, '')) LIKE ? ESCAPE '\\'`).join(" OR ")})`);
|
|
16530
|
+
return clauses.join(" OR ");
|
|
16531
|
+
}
|
|
16532
|
+
function selectWikiPages(db, terms, limit) {
|
|
16533
|
+
const fields = ["path", "title", "artifact_uri", "metadata_json"];
|
|
16534
|
+
return db.query(`SELECT id, path, title, artifact_uri, content_hash, status, metadata_json
|
|
16535
|
+
FROM wiki_pages
|
|
16536
|
+
WHERE status = 'active' AND (${catalogWhere(fields, terms)})
|
|
16537
|
+
ORDER BY updated_at DESC
|
|
16538
|
+
LIMIT ?`).all(...likeParams(terms, fields.length), limit);
|
|
16539
|
+
}
|
|
16540
|
+
function selectKnowledgeIndexes(db, terms, limit) {
|
|
16541
|
+
const fields = ["kind", "name", "shard_key", "artifact_uri", "metadata_json"];
|
|
16542
|
+
return db.query(`SELECT id, kind, name, artifact_uri, shard_key, metadata_json
|
|
16543
|
+
FROM knowledge_indexes
|
|
16544
|
+
WHERE ${catalogWhere(fields, terms)}
|
|
16545
|
+
ORDER BY updated_at DESC
|
|
16546
|
+
LIMIT ?`).all(...likeParams(terms, fields.length), limit);
|
|
16547
|
+
}
|
|
16548
|
+
function chunkResult(row, keywordScore) {
|
|
16549
|
+
const metadata = parseJsonObject3(row.chunk_metadata_json);
|
|
16550
|
+
const provenance = provenanceForChunk2(row);
|
|
16551
|
+
const sourceRef = metadataString3(metadata, ["source_ref"]);
|
|
16552
|
+
const sourceUri = row.source_uri ?? metadataString3(metadata, ["source_uri"]);
|
|
16553
|
+
const isWiki = Boolean(row.wiki_page_id);
|
|
16554
|
+
const result = {
|
|
16555
|
+
kind: isWiki ? "wiki_chunk" : "source_chunk",
|
|
16556
|
+
id: row.chunk_id,
|
|
16557
|
+
title: isWiki ? row.wiki_title : row.source_title,
|
|
16558
|
+
text: row.text,
|
|
16559
|
+
score: 0,
|
|
16560
|
+
scores: { keyword: keywordScore },
|
|
16561
|
+
source: sourceUri || sourceRef ? {
|
|
16562
|
+
uri: sourceUri,
|
|
16563
|
+
ref: sourceRef,
|
|
16564
|
+
kind: row.source_kind ?? metadataString3(metadata, ["source_kind"]),
|
|
16565
|
+
revision: row.revision ?? metadataString3(metadata, ["revision"]),
|
|
16566
|
+
hash: row.hash ?? metadataString3(metadata, ["hash"])
|
|
16567
|
+
} : null,
|
|
16568
|
+
citation: {
|
|
16569
|
+
chunk_id: row.chunk_id,
|
|
16570
|
+
start_offset: row.start_offset,
|
|
16571
|
+
end_offset: row.end_offset
|
|
16572
|
+
},
|
|
16573
|
+
artifact: isWiki ? {
|
|
16574
|
+
uri: row.wiki_artifact_uri,
|
|
16575
|
+
path: row.wiki_path,
|
|
16576
|
+
hash: row.wiki_content_hash,
|
|
16577
|
+
shard_key: row.wiki_path
|
|
16578
|
+
} : null,
|
|
16579
|
+
provenance,
|
|
16580
|
+
reasons: ["keyword_match"]
|
|
16581
|
+
};
|
|
16582
|
+
result.score = combinedScore(result.scores, result.citation);
|
|
16583
|
+
return result;
|
|
16584
|
+
}
|
|
16585
|
+
function wikiPageResult(row, terms) {
|
|
16586
|
+
const metadata = parseJsonObject3(row.metadata_json);
|
|
16587
|
+
const score = catalogScore(`${row.path} ${row.title} ${row.artifact_uri ?? ""} ${row.metadata_json}`.toLowerCase(), terms);
|
|
16588
|
+
const result = {
|
|
16589
|
+
kind: "wiki_page",
|
|
16590
|
+
id: row.id,
|
|
16591
|
+
title: row.title,
|
|
16592
|
+
text: null,
|
|
16593
|
+
score: 0,
|
|
16594
|
+
scores: { catalog: score },
|
|
16595
|
+
source: null,
|
|
16596
|
+
citation: null,
|
|
16597
|
+
artifact: {
|
|
16598
|
+
uri: row.artifact_uri,
|
|
16599
|
+
path: row.path,
|
|
16600
|
+
hash: row.content_hash,
|
|
16601
|
+
shard_key: row.path
|
|
16602
|
+
},
|
|
16603
|
+
provenance: existingProvenance(metadata),
|
|
16604
|
+
reasons: ["wiki_catalog_match"]
|
|
16605
|
+
};
|
|
16606
|
+
result.score = combinedScore(result.scores, result.citation);
|
|
16607
|
+
return result;
|
|
16608
|
+
}
|
|
16609
|
+
function indexResult(row, terms) {
|
|
16610
|
+
const metadata = parseJsonObject3(row.metadata_json);
|
|
16611
|
+
const score = catalogScore(`${row.kind} ${row.name} ${row.shard_key ?? ""} ${row.artifact_uri ?? ""} ${row.metadata_json}`.toLowerCase(), terms);
|
|
16612
|
+
const result = {
|
|
16613
|
+
kind: "knowledge_index",
|
|
16614
|
+
id: row.id,
|
|
16615
|
+
title: row.name,
|
|
16616
|
+
text: null,
|
|
16617
|
+
score: 0,
|
|
16618
|
+
scores: { catalog: score },
|
|
16619
|
+
source: null,
|
|
16620
|
+
citation: null,
|
|
16621
|
+
artifact: {
|
|
16622
|
+
uri: row.artifact_uri,
|
|
16623
|
+
path: metadataString3(metadata, ["artifact_key"]),
|
|
16624
|
+
hash: metadataString3(metadata, ["content_hash"]),
|
|
16625
|
+
shard_key: row.shard_key
|
|
16626
|
+
},
|
|
16627
|
+
provenance: existingProvenance(metadata),
|
|
16628
|
+
reasons: ["index_catalog_match"]
|
|
16629
|
+
};
|
|
16630
|
+
result.score = combinedScore(result.scores, result.citation);
|
|
16631
|
+
return result;
|
|
16632
|
+
}
|
|
16633
|
+
function mergeResult(results, entry) {
|
|
16634
|
+
const key = `${entry.kind}:${entry.id}`;
|
|
16635
|
+
const existing = results.get(key);
|
|
16636
|
+
if (!existing) {
|
|
16637
|
+
results.set(key, entry);
|
|
16638
|
+
return;
|
|
16639
|
+
}
|
|
16640
|
+
existing.scores = {
|
|
16641
|
+
keyword: Math.max(existing.scores.keyword ?? 0, entry.scores.keyword ?? 0) || undefined,
|
|
16642
|
+
semantic: Math.max(existing.scores.semantic ?? 0, entry.scores.semantic ?? 0) || undefined,
|
|
16643
|
+
catalog: Math.max(existing.scores.catalog ?? 0, entry.scores.catalog ?? 0) || undefined
|
|
16644
|
+
};
|
|
16645
|
+
existing.reasons = unique([...existing.reasons, ...entry.reasons]);
|
|
16646
|
+
existing.text = existing.text ?? entry.text;
|
|
16647
|
+
existing.title = existing.title ?? entry.title;
|
|
16648
|
+
existing.source = existing.source ?? entry.source;
|
|
16649
|
+
existing.citation = existing.citation ?? entry.citation;
|
|
16650
|
+
existing.artifact = existing.artifact ?? entry.artifact;
|
|
16651
|
+
existing.provenance = existing.provenance ?? entry.provenance;
|
|
16652
|
+
existing.score = combinedScore(existing.scores, existing.citation);
|
|
16653
|
+
}
|
|
16654
|
+
function sortResults(results) {
|
|
16655
|
+
const kindOrder = {
|
|
16656
|
+
source_chunk: 0,
|
|
16657
|
+
wiki_chunk: 1,
|
|
16658
|
+
wiki_page: 2,
|
|
16659
|
+
knowledge_index: 3
|
|
16660
|
+
};
|
|
16661
|
+
return results.sort((a, b) => {
|
|
16662
|
+
if (b.score !== a.score)
|
|
16663
|
+
return b.score - a.score;
|
|
16664
|
+
return kindOrder[a.kind] - kindOrder[b.kind] || a.id.localeCompare(b.id);
|
|
16665
|
+
});
|
|
16666
|
+
}
|
|
16667
|
+
async function hybridSearch(options) {
|
|
16668
|
+
const query = options.query.trim();
|
|
16669
|
+
if (!query)
|
|
16670
|
+
throw new Error("Search query is required.");
|
|
16671
|
+
const limit = Math.max(1, Math.min(options.limit ?? 10, 100));
|
|
16672
|
+
const terms = queryTerms(query);
|
|
16673
|
+
const ftsQuery = ftsQueryForTerms(terms);
|
|
16674
|
+
const semanticEnabled = options.semantic === true || options.fake === true || Boolean(options.modelRef);
|
|
16675
|
+
const warnings = [];
|
|
16676
|
+
let semanticProvider = null;
|
|
16677
|
+
let semanticModel = null;
|
|
16678
|
+
let semanticDimensions = null;
|
|
16679
|
+
let keywordCount = 0;
|
|
16680
|
+
let catalogCount = 0;
|
|
16681
|
+
let semanticCount = 0;
|
|
16682
|
+
const merged = new Map;
|
|
16683
|
+
migrateKnowledgeDb(options.dbPath);
|
|
16684
|
+
const db = openKnowledgeDb(options.dbPath);
|
|
16685
|
+
try {
|
|
16686
|
+
const ftsRows = selectFtsChunks(db, ftsQuery, Math.max(limit * 3, 20));
|
|
16687
|
+
keywordCount = ftsRows.length;
|
|
16688
|
+
ftsRows.forEach((row, index) => mergeResult(merged, chunkResult(row, scoreFromRank(row.rank, index))));
|
|
16689
|
+
const wikiRows = selectWikiPages(db, terms, Math.max(limit, 10));
|
|
16690
|
+
const indexRows = selectKnowledgeIndexes(db, terms, Math.max(limit, 10));
|
|
16691
|
+
catalogCount = wikiRows.length + indexRows.length;
|
|
16692
|
+
wikiRows.forEach((row) => mergeResult(merged, wikiPageResult(row, terms)));
|
|
16693
|
+
indexRows.forEach((row) => mergeResult(merged, indexResult(row, terms)));
|
|
16694
|
+
} finally {
|
|
16695
|
+
db.close();
|
|
16696
|
+
}
|
|
16697
|
+
if (semanticEnabled) {
|
|
16698
|
+
try {
|
|
16699
|
+
const semantic = await searchVectorIndex({
|
|
16700
|
+
dbPath: options.dbPath,
|
|
16701
|
+
query,
|
|
16702
|
+
limit: Math.max(limit * 3, 20),
|
|
16703
|
+
config: options.config,
|
|
16704
|
+
env: options.env,
|
|
16705
|
+
modelRef: options.modelRef,
|
|
16706
|
+
dimensions: options.dimensions,
|
|
16707
|
+
fake: options.fake,
|
|
16708
|
+
batchSize: options.batchSize,
|
|
16709
|
+
maxParallelCalls: options.maxParallelCalls
|
|
16710
|
+
});
|
|
16711
|
+
semanticProvider = semantic.provider;
|
|
16712
|
+
semanticModel = semantic.model;
|
|
16713
|
+
semanticDimensions = semantic.dimensions;
|
|
16714
|
+
semanticCount = semantic.results.length;
|
|
16715
|
+
for (const row of semantic.results) {
|
|
16716
|
+
const result = {
|
|
16717
|
+
kind: "source_chunk",
|
|
16718
|
+
id: row.chunk_id,
|
|
16719
|
+
title: null,
|
|
16720
|
+
text: row.text,
|
|
16721
|
+
score: 0,
|
|
16722
|
+
scores: { semantic: semanticScore(row.score) },
|
|
16723
|
+
source: {
|
|
16724
|
+
uri: row.source_uri,
|
|
16725
|
+
ref: row.source_ref,
|
|
16726
|
+
kind: row.provenance?.source_kind ?? null,
|
|
16727
|
+
revision: row.revision,
|
|
16728
|
+
hash: row.hash
|
|
16729
|
+
},
|
|
16730
|
+
citation: {
|
|
16731
|
+
chunk_id: row.chunk_id,
|
|
16732
|
+
start_offset: row.provenance?.start_offset ?? null,
|
|
16733
|
+
end_offset: row.provenance?.end_offset ?? null
|
|
16734
|
+
},
|
|
16735
|
+
artifact: null,
|
|
16736
|
+
provenance: row.provenance,
|
|
16737
|
+
reasons: ["semantic_match"]
|
|
16738
|
+
};
|
|
16739
|
+
result.score = combinedScore(result.scores, result.citation);
|
|
16740
|
+
mergeResult(merged, result);
|
|
16741
|
+
}
|
|
16742
|
+
} catch (error48) {
|
|
16743
|
+
warnings.push(`semantic_search_failed: ${error48 instanceof Error ? error48.message : String(error48)}`);
|
|
16744
|
+
}
|
|
16745
|
+
}
|
|
16746
|
+
const results = sortResults(Array.from(merged.values())).slice(0, limit);
|
|
16747
|
+
return {
|
|
16748
|
+
query,
|
|
16749
|
+
limit,
|
|
16750
|
+
mode: {
|
|
16751
|
+
keyword: true,
|
|
16752
|
+
catalog: true,
|
|
16753
|
+
semantic: semanticEnabled
|
|
16754
|
+
},
|
|
16755
|
+
semantic_provider: semanticProvider,
|
|
16756
|
+
semantic_model: semanticModel,
|
|
16757
|
+
semantic_dimensions: semanticDimensions,
|
|
16758
|
+
counts: {
|
|
16759
|
+
keyword_results: keywordCount,
|
|
16760
|
+
catalog_results: catalogCount,
|
|
16761
|
+
semantic_results: semanticCount,
|
|
16762
|
+
merged_results: results.length
|
|
16763
|
+
},
|
|
16764
|
+
warnings,
|
|
16765
|
+
results
|
|
16766
|
+
};
|
|
16767
|
+
}
|
|
16768
|
+
|
|
16396
16769
|
// src/storage-contract.ts
|
|
16397
16770
|
import { createHash as createHash6, randomUUID as randomUUID4 } from "crypto";
|
|
16398
16771
|
var GENERATED_ARTIFACTS = [
|
|
@@ -16575,6 +16948,10 @@ function todayParts(now) {
|
|
|
16575
16948
|
function stableId4(prefix, value) {
|
|
16576
16949
|
return `${prefix}_${createHash7("sha256").update(value).digest("hex").slice(0, 20)}`;
|
|
16577
16950
|
}
|
|
16951
|
+
function estimateTokenCount2(text) {
|
|
16952
|
+
const words = text.trim().split(/\s+/).filter(Boolean).length;
|
|
16953
|
+
return Math.max(1, Math.ceil(words * 1.25));
|
|
16954
|
+
}
|
|
16578
16955
|
function agentSchemaTemplate() {
|
|
16579
16956
|
return `# Knowledge Agent Schema v1
|
|
16580
16957
|
|
|
@@ -16685,6 +17062,33 @@ function provenanceFor(artifact) {
|
|
|
16685
17062
|
artifact_key: artifact.key
|
|
16686
17063
|
});
|
|
16687
17064
|
}
|
|
17065
|
+
function recordWikiChunk(db, pageId, title, artifact, body, now) {
|
|
17066
|
+
const provenance = provenanceFor(artifact);
|
|
17067
|
+
const chunkId = stableId4("chk", `${pageId}\x00${artifact.hash ?? artifact.uri}`);
|
|
17068
|
+
const existing = db.query("SELECT id FROM chunks WHERE wiki_page_id = ?").all(pageId);
|
|
17069
|
+
for (const row of existing)
|
|
17070
|
+
db.run("DELETE FROM chunks_fts WHERE chunk_id = ?", [row.id]);
|
|
17071
|
+
db.run("DELETE FROM chunks WHERE wiki_page_id = ?", [pageId]);
|
|
17072
|
+
db.run(`INSERT INTO chunks (id, wiki_page_id, kind, ordinal, text, token_count, start_offset, end_offset, metadata_json, created_at)
|
|
17073
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
|
|
17074
|
+
chunkId,
|
|
17075
|
+
pageId,
|
|
17076
|
+
"wiki",
|
|
17077
|
+
0,
|
|
17078
|
+
body,
|
|
17079
|
+
estimateTokenCount2(body),
|
|
17080
|
+
0,
|
|
17081
|
+
body.length,
|
|
17082
|
+
JSON.stringify({
|
|
17083
|
+
artifact_key: artifact.key,
|
|
17084
|
+
artifact_uri: artifact.uri,
|
|
17085
|
+
content_hash: artifact.hash ?? null,
|
|
17086
|
+
provenance
|
|
17087
|
+
}),
|
|
17088
|
+
now
|
|
17089
|
+
]);
|
|
17090
|
+
db.run("INSERT INTO chunks_fts (chunk_id, text, title, source_uri) VALUES (?, ?, ?, ?)", [chunkId, body, title, artifact.uri]);
|
|
17091
|
+
}
|
|
16688
17092
|
function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
|
|
16689
17093
|
const timestamp = now.toISOString();
|
|
16690
17094
|
const rootIndex = artifacts.find((artifact) => artifact.key.endsWith("indexes/root.md"));
|
|
@@ -16711,6 +17115,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
|
|
|
16711
17115
|
]);
|
|
16712
17116
|
}
|
|
16713
17117
|
if (wikiReadme) {
|
|
17118
|
+
const wikiPageId = stableId4("wiki", "wiki/README.md");
|
|
16714
17119
|
db.run(`INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
|
|
16715
17120
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
16716
17121
|
ON CONFLICT(path) DO UPDATE SET
|
|
@@ -16720,7 +17125,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
|
|
|
16720
17125
|
status = excluded.status,
|
|
16721
17126
|
metadata_json = excluded.metadata_json,
|
|
16722
17127
|
updated_at = excluded.updated_at`, [
|
|
16723
|
-
|
|
17128
|
+
wikiPageId,
|
|
16724
17129
|
"wiki/README.md",
|
|
16725
17130
|
"Wiki",
|
|
16726
17131
|
wikiReadme.uri,
|
|
@@ -16733,6 +17138,7 @@ function recordWikiLayoutCatalog(db, artifacts, now = new Date) {
|
|
|
16733
17138
|
timestamp,
|
|
16734
17139
|
timestamp
|
|
16735
17140
|
]);
|
|
17141
|
+
recordWikiChunk(db, wikiPageId, "Wiki", wikiReadme, wikiReadmeTemplate(), timestamp);
|
|
16736
17142
|
}
|
|
16737
17143
|
}
|
|
16738
17144
|
|
|
@@ -16881,6 +17287,14 @@ class KnowledgeService {
|
|
|
16881
17287
|
config: this.config()
|
|
16882
17288
|
});
|
|
16883
17289
|
}
|
|
17290
|
+
async search(options) {
|
|
17291
|
+
const workspace = this.ensureWorkspace();
|
|
17292
|
+
return hybridSearch({
|
|
17293
|
+
...options,
|
|
17294
|
+
dbPath: workspace.knowledgeDbPath,
|
|
17295
|
+
config: this.config()
|
|
17296
|
+
});
|
|
17297
|
+
}
|
|
16884
17298
|
}
|
|
16885
17299
|
function createKnowledgeService(options = {}) {
|
|
16886
17300
|
return new KnowledgeService(options);
|
|
@@ -17030,6 +17444,22 @@ function buildServer() {
|
|
|
17030
17444
|
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
17031
17445
|
}
|
|
17032
17446
|
});
|
|
17447
|
+
registerTool(server, "ok_search", "Hybrid knowledge search", "Search source chunks, generated wiki pages, sharded indexes, and optional semantic vectors", {
|
|
17448
|
+
scope: scopeField,
|
|
17449
|
+
query: exports_external.string().describe("Search query"),
|
|
17450
|
+
limit: exports_external.number().optional().describe("Maximum results"),
|
|
17451
|
+
semantic: exports_external.boolean().optional().describe("Include vector semantic results"),
|
|
17452
|
+
model: exports_external.string().optional().describe("Embedding model ref, default openai:text-embedding-3-small"),
|
|
17453
|
+
dimensions: exports_external.number().optional().describe("Embedding dimensions for deterministic fake mode"),
|
|
17454
|
+
fake: exports_external.boolean().optional().describe("Use deterministic fake embeddings for local tests")
|
|
17455
|
+
}, async ({ scope, query, limit, semantic, model, dimensions, fake }) => {
|
|
17456
|
+
const service = createKnowledgeService({ scope });
|
|
17457
|
+
try {
|
|
17458
|
+
return jsonText({ ok: true, ...await service.search({ query, limit, semantic, modelRef: model, dimensions, fake }) });
|
|
17459
|
+
} catch (error48) {
|
|
17460
|
+
return errorText(error48 instanceof Error ? error48.message : String(error48));
|
|
17461
|
+
}
|
|
17462
|
+
});
|
|
17033
17463
|
registerTool(server, "ok_add", "Add a knowledge item", "Add a new item to the knowledge store", {
|
|
17034
17464
|
title: exports_external.string().describe("Item title"),
|
|
17035
17465
|
content: exports_external.string().describe("Item content/body"),
|