@hasna/knowledge 0.2.27 → 0.2.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -0
- package/bin/open-knowledge-mcp.js +15 -7
- package/bin/open-knowledge.js +17 -17
- package/dist/agent.d.ts +35 -0
- package/dist/artifact-store.d.ts +63 -0
- package/dist/auth.d.ts +35 -0
- package/dist/embeddings.d.ts +77 -0
- package/dist/index.d.ts +20 -0
- package/dist/index.js +5709 -0
- package/dist/knowledge-db.d.ts +27 -0
- package/dist/manifest-ingest.d.ts +35 -0
- package/dist/outbox-consume.d.ts +25 -0
- package/dist/provenance.d.ts +50 -0
- package/dist/providers.d.ts +89 -0
- package/dist/reindex.d.ts +37 -0
- package/dist/remote-client.d.ts +108 -0
- package/dist/retrieval.d.ts +71 -0
- package/dist/safety.d.ts +70 -0
- package/dist/sdk.d.ts +72 -0
- package/dist/search.d.ts +65 -0
- package/dist/service.d.ts +117 -0
- package/dist/source-ingest.d.ts +18 -0
- package/dist/source-ref.d.ts +30 -0
- package/dist/source-resolver.d.ts +92 -0
- package/dist/storage-contract.d.ts +106 -0
- package/dist/web-search.d.ts +40 -0
- package/dist/wiki-compiler.d.ts +67 -0
- package/dist/wiki-layout.d.ts +23 -0
- package/dist/workspace.d.ts +111 -0
- package/package.json +15 -7
- package/src/agent.ts +0 -367
- package/src/artifact-store.ts +0 -184
- package/src/auth.ts +0 -123
- package/src/cli.ts +0 -1184
- package/src/embeddings.ts +0 -516
- package/src/knowledge-db.ts +0 -354
- package/src/manifest-ingest.ts +0 -515
- package/src/mcp-http.js +0 -110
- package/src/mcp.js +0 -1503
- package/src/outbox-consume.ts +0 -463
- package/src/provenance.ts +0 -93
- package/src/providers.ts +0 -308
- package/src/reindex.ts +0 -260
- package/src/remote-client.ts +0 -268
- package/src/retrieval.ts +0 -326
- package/src/safety.ts +0 -265
- package/src/schema.js +0 -25
- package/src/search.ts +0 -510
- package/src/service.ts +0 -443
- package/src/source-ingest.ts +0 -268
- package/src/source-ref.ts +0 -104
- package/src/source-resolver.ts +0 -436
- package/src/storage-contract.ts +0 -346
- package/src/store.ts +0 -113
- package/src/web-search.ts +0 -330
- package/src/wiki-compiler.ts +0 -711
- package/src/wiki-layout.ts +0 -251
- package/src/workspace.ts +0 -251
package/src/search.ts
DELETED
|
@@ -1,510 +0,0 @@
|
|
|
1
|
-
import type { Database } from 'bun:sqlite';
|
|
2
|
-
import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
|
|
3
|
-
import { searchVectorIndex, type EmbeddingRuntimeOptions } from './embeddings';
|
|
4
|
-
import { sourceProvenance, type GeneratedArtifactProvenance, type KnowledgeProvenance } from './provenance';
|
|
5
|
-
import type { KnowledgeConfig } from './workspace';
|
|
6
|
-
|
|
7
|
-
export type SearchResultKind = 'source_chunk' | 'wiki_chunk' | 'wiki_page' | 'knowledge_index';
|
|
8
|
-
export type SearchProvenance = KnowledgeProvenance | GeneratedArtifactProvenance;
|
|
9
|
-
|
|
10
|
-
export interface HybridSearchOptions extends EmbeddingRuntimeOptions {
|
|
11
|
-
dbPath: string;
|
|
12
|
-
query: string;
|
|
13
|
-
limit?: number;
|
|
14
|
-
semantic?: boolean;
|
|
15
|
-
config?: KnowledgeConfig;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
export interface HybridSearchResult {
|
|
19
|
-
query: string;
|
|
20
|
-
limit: number;
|
|
21
|
-
mode: {
|
|
22
|
-
keyword: true;
|
|
23
|
-
catalog: true;
|
|
24
|
-
semantic: boolean;
|
|
25
|
-
};
|
|
26
|
-
semantic_provider: string | null;
|
|
27
|
-
semantic_model: string | null;
|
|
28
|
-
semantic_dimensions: number | null;
|
|
29
|
-
counts: {
|
|
30
|
-
keyword_results: number;
|
|
31
|
-
catalog_results: number;
|
|
32
|
-
semantic_results: number;
|
|
33
|
-
merged_results: number;
|
|
34
|
-
};
|
|
35
|
-
warnings: string[];
|
|
36
|
-
results: HybridSearchEntry[];
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
export interface HybridSearchEntry {
|
|
40
|
-
kind: SearchResultKind;
|
|
41
|
-
id: string;
|
|
42
|
-
title: string | null;
|
|
43
|
-
text: string | null;
|
|
44
|
-
score: number;
|
|
45
|
-
scores: {
|
|
46
|
-
keyword?: number;
|
|
47
|
-
semantic?: number;
|
|
48
|
-
catalog?: number;
|
|
49
|
-
};
|
|
50
|
-
source: {
|
|
51
|
-
uri: string | null;
|
|
52
|
-
ref: string | null;
|
|
53
|
-
kind: string | null;
|
|
54
|
-
revision: string | null;
|
|
55
|
-
hash: string | null;
|
|
56
|
-
} | null;
|
|
57
|
-
citation: {
|
|
58
|
-
chunk_id: string | null;
|
|
59
|
-
start_offset: number | null;
|
|
60
|
-
end_offset: number | null;
|
|
61
|
-
} | null;
|
|
62
|
-
artifact: {
|
|
63
|
-
uri: string | null;
|
|
64
|
-
path: string | null;
|
|
65
|
-
hash: string | null;
|
|
66
|
-
shard_key: string | null;
|
|
67
|
-
} | null;
|
|
68
|
-
provenance: SearchProvenance | null;
|
|
69
|
-
reasons: string[];
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
interface FtsChunkRow {
|
|
73
|
-
chunk_id: string;
|
|
74
|
-
chunk_kind: string;
|
|
75
|
-
wiki_page_id: string | null;
|
|
76
|
-
text: string;
|
|
77
|
-
token_count: number | null;
|
|
78
|
-
start_offset: number | null;
|
|
79
|
-
end_offset: number | null;
|
|
80
|
-
chunk_metadata_json: string;
|
|
81
|
-
source_revision_id: string | null;
|
|
82
|
-
revision: string | null;
|
|
83
|
-
hash: string | null;
|
|
84
|
-
source_uri: string | null;
|
|
85
|
-
source_kind: string | null;
|
|
86
|
-
source_title: string | null;
|
|
87
|
-
wiki_path: string | null;
|
|
88
|
-
wiki_title: string | null;
|
|
89
|
-
wiki_artifact_uri: string | null;
|
|
90
|
-
wiki_content_hash: string | null;
|
|
91
|
-
wiki_status: string | null;
|
|
92
|
-
wiki_metadata_json: string | null;
|
|
93
|
-
rank: number;
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
interface WikiPageRow {
|
|
97
|
-
id: string;
|
|
98
|
-
path: string;
|
|
99
|
-
title: string;
|
|
100
|
-
artifact_uri: string | null;
|
|
101
|
-
content_hash: string | null;
|
|
102
|
-
status: string;
|
|
103
|
-
metadata_json: string;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
interface IndexRow {
|
|
107
|
-
id: string;
|
|
108
|
-
kind: string;
|
|
109
|
-
name: string;
|
|
110
|
-
artifact_uri: string | null;
|
|
111
|
-
shard_key: string | null;
|
|
112
|
-
metadata_json: string;
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
function parseJsonObject(value: string | null | undefined): Record<string, unknown> {
|
|
116
|
-
if (!value) return {};
|
|
117
|
-
try {
|
|
118
|
-
const parsed = JSON.parse(value);
|
|
119
|
-
return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? parsed as Record<string, unknown> : {};
|
|
120
|
-
} catch {
|
|
121
|
-
return {};
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
function metadataString(metadata: Record<string, unknown>, keys: string[]): string | null {
|
|
126
|
-
for (const key of keys) {
|
|
127
|
-
const value = metadata[key];
|
|
128
|
-
if (typeof value === 'string' && value.length > 0) return value;
|
|
129
|
-
}
|
|
130
|
-
return null;
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
function metadataNumber(metadata: Record<string, unknown>, keys: string[]): number | null {
|
|
134
|
-
for (const key of keys) {
|
|
135
|
-
const value = metadata[key];
|
|
136
|
-
if (typeof value === 'number' && Number.isFinite(value)) return value;
|
|
137
|
-
}
|
|
138
|
-
return null;
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
function unique<T>(values: T[]): T[] {
|
|
142
|
-
return Array.from(new Set(values));
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
function queryTerms(query: string): string[] {
|
|
146
|
-
const terms = query
|
|
147
|
-
.normalize('NFKC')
|
|
148
|
-
.toLowerCase()
|
|
149
|
-
.match(/[\p{L}\p{N}_]+/gu) ?? [];
|
|
150
|
-
return unique(terms.filter((term) => term.length > 0)).slice(0, 16);
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
function ftsQueryForTerms(terms: string[]): string | null {
|
|
154
|
-
if (terms.length === 0) return null;
|
|
155
|
-
return terms.map((term) => `${term}*`).join(' OR ');
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
function escapeLikeTerm(term: string): string {
|
|
159
|
-
return term.replace(/[\\%_]/g, (char) => `\\${char}`);
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
function likeParams(terms: string[], fieldsPerTerm: number): string[] {
|
|
163
|
-
return terms.flatMap((term) => Array.from({ length: fieldsPerTerm }, () => `%${escapeLikeTerm(term)}%`));
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
function scoreFromRank(rank: number, index: number): number {
|
|
167
|
-
const rankScore = Number.isFinite(rank) ? 1 / (1 + Math.abs(rank)) : 0;
|
|
168
|
-
const orderScore = 1 / (1 + index);
|
|
169
|
-
return roundScore(Math.max(rankScore, orderScore));
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
function catalogScore(haystack: string, terms: string[]): number {
|
|
173
|
-
if (terms.length === 0) return 0;
|
|
174
|
-
const matched = terms.filter((term) => haystack.includes(term)).length;
|
|
175
|
-
if (matched === 0) return 0;
|
|
176
|
-
return roundScore(Math.min(0.85, 0.35 + (matched / terms.length) * 0.5));
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
function semanticScore(score: number): number {
|
|
180
|
-
return roundScore(Math.max(0, Math.min(1, (score + 1) / 2)));
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
function roundScore(score: number): number {
|
|
184
|
-
return Number(score.toFixed(6));
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
function combinedScore(scores: HybridSearchEntry['scores'], citation: HybridSearchEntry['citation']): number {
|
|
188
|
-
const keyword = scores.keyword ?? 0;
|
|
189
|
-
const semantic = scores.semantic ?? 0;
|
|
190
|
-
const catalog = scores.catalog ?? 0;
|
|
191
|
-
const citationBoost = citation?.chunk_id ? 0.05 : 0;
|
|
192
|
-
return roundScore(Math.min(1, keyword * 0.55 + semantic * 0.4 + catalog * 0.35 + citationBoost));
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
function existingProvenance(metadata: Record<string, unknown>): SearchProvenance | null {
|
|
196
|
-
const provenance = metadata.provenance;
|
|
197
|
-
return provenance && typeof provenance === 'object' && !Array.isArray(provenance) ? provenance as SearchProvenance : null;
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
function provenanceForChunk(row: FtsChunkRow): SearchProvenance | null {
|
|
201
|
-
const metadata = parseJsonObject(row.chunk_metadata_json);
|
|
202
|
-
const existing = existingProvenance(metadata);
|
|
203
|
-
if (existing) return existing;
|
|
204
|
-
if (!row.source_revision_id && !row.source_uri) return null;
|
|
205
|
-
return sourceProvenance({
|
|
206
|
-
source_ref: metadataString(metadata, ['source_ref']),
|
|
207
|
-
source_uri: row.source_uri ?? metadataString(metadata, ['source_uri']),
|
|
208
|
-
source_kind: row.source_kind ?? metadataString(metadata, ['source_kind']),
|
|
209
|
-
source_revision_id: row.source_revision_id,
|
|
210
|
-
revision: row.revision ?? metadataString(metadata, ['revision']),
|
|
211
|
-
hash: row.hash ?? metadataString(metadata, ['hash']),
|
|
212
|
-
chunk_id: row.chunk_id,
|
|
213
|
-
start_offset: row.start_offset ?? metadataNumber(metadata, ['start_offset']),
|
|
214
|
-
end_offset: row.end_offset ?? metadataNumber(metadata, ['end_offset']),
|
|
215
|
-
status: metadataString(metadata, ['status']),
|
|
216
|
-
resolver: 'open-files-read-only',
|
|
217
|
-
});
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
function selectFtsChunks(db: Database, ftsQuery: string | null, limit: number): FtsChunkRow[] {
|
|
221
|
-
if (!ftsQuery) return [];
|
|
222
|
-
return db.query<FtsChunkRow, [string, number]>(
|
|
223
|
-
`SELECT
|
|
224
|
-
chunks_fts.chunk_id,
|
|
225
|
-
c.kind AS chunk_kind,
|
|
226
|
-
c.wiki_page_id,
|
|
227
|
-
c.text,
|
|
228
|
-
c.token_count,
|
|
229
|
-
c.start_offset,
|
|
230
|
-
c.end_offset,
|
|
231
|
-
c.metadata_json AS chunk_metadata_json,
|
|
232
|
-
c.source_revision_id,
|
|
233
|
-
sr.revision,
|
|
234
|
-
sr.hash,
|
|
235
|
-
s.uri AS source_uri,
|
|
236
|
-
s.kind AS source_kind,
|
|
237
|
-
s.title AS source_title,
|
|
238
|
-
wp.path AS wiki_path,
|
|
239
|
-
wp.title AS wiki_title,
|
|
240
|
-
wp.artifact_uri AS wiki_artifact_uri,
|
|
241
|
-
wp.content_hash AS wiki_content_hash,
|
|
242
|
-
wp.status AS wiki_status,
|
|
243
|
-
wp.metadata_json AS wiki_metadata_json,
|
|
244
|
-
bm25(chunks_fts) AS rank
|
|
245
|
-
FROM chunks_fts
|
|
246
|
-
JOIN chunks c ON c.id = chunks_fts.chunk_id
|
|
247
|
-
LEFT JOIN source_revisions sr ON sr.id = c.source_revision_id
|
|
248
|
-
LEFT JOIN sources s ON s.id = sr.source_id
|
|
249
|
-
LEFT JOIN wiki_pages wp ON wp.id = c.wiki_page_id
|
|
250
|
-
WHERE chunks_fts MATCH ?
|
|
251
|
-
ORDER BY rank ASC
|
|
252
|
-
LIMIT ?`,
|
|
253
|
-
).all(ftsQuery, limit);
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
function catalogWhere(fields: string[], terms: string[]): string {
|
|
257
|
-
if (terms.length === 0) return '1 = 0';
|
|
258
|
-
const clauses = terms.map(() => `(${fields.map((field) => `lower(COALESCE(${field}, '')) LIKE ? ESCAPE '\\'`).join(' OR ')})`);
|
|
259
|
-
return clauses.join(' OR ');
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
function selectWikiPages(db: Database, terms: string[], limit: number): WikiPageRow[] {
|
|
263
|
-
const fields = ['path', 'title', 'artifact_uri', 'metadata_json'];
|
|
264
|
-
return db.query<WikiPageRow, [...string[], number]>(
|
|
265
|
-
`SELECT id, path, title, artifact_uri, content_hash, status, metadata_json
|
|
266
|
-
FROM wiki_pages
|
|
267
|
-
WHERE status = 'active' AND (${catalogWhere(fields, terms)})
|
|
268
|
-
ORDER BY updated_at DESC
|
|
269
|
-
LIMIT ?`,
|
|
270
|
-
).all(...likeParams(terms, fields.length), limit);
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
function selectKnowledgeIndexes(db: Database, terms: string[], limit: number): IndexRow[] {
|
|
274
|
-
const fields = ['kind', 'name', 'shard_key', 'artifact_uri', 'metadata_json'];
|
|
275
|
-
return db.query<IndexRow, [...string[], number]>(
|
|
276
|
-
`SELECT id, kind, name, artifact_uri, shard_key, metadata_json
|
|
277
|
-
FROM knowledge_indexes
|
|
278
|
-
WHERE ${catalogWhere(fields, terms)}
|
|
279
|
-
ORDER BY updated_at DESC
|
|
280
|
-
LIMIT ?`,
|
|
281
|
-
).all(...likeParams(terms, fields.length), limit);
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
function chunkResult(row: FtsChunkRow, keywordScore: number): HybridSearchEntry {
|
|
285
|
-
const metadata = parseJsonObject(row.chunk_metadata_json);
|
|
286
|
-
const provenance = provenanceForChunk(row);
|
|
287
|
-
const sourceRef = metadataString(metadata, ['source_ref']);
|
|
288
|
-
const sourceUri = row.source_uri ?? metadataString(metadata, ['source_uri']);
|
|
289
|
-
const isWiki = Boolean(row.wiki_page_id);
|
|
290
|
-
const result: HybridSearchEntry = {
|
|
291
|
-
kind: isWiki ? 'wiki_chunk' : 'source_chunk',
|
|
292
|
-
id: row.chunk_id,
|
|
293
|
-
title: isWiki ? row.wiki_title : row.source_title,
|
|
294
|
-
text: row.text,
|
|
295
|
-
score: 0,
|
|
296
|
-
scores: { keyword: keywordScore },
|
|
297
|
-
source: sourceUri || sourceRef ? {
|
|
298
|
-
uri: sourceUri,
|
|
299
|
-
ref: sourceRef,
|
|
300
|
-
kind: row.source_kind ?? metadataString(metadata, ['source_kind']),
|
|
301
|
-
revision: row.revision ?? metadataString(metadata, ['revision']),
|
|
302
|
-
hash: row.hash ?? metadataString(metadata, ['hash']),
|
|
303
|
-
} : null,
|
|
304
|
-
citation: {
|
|
305
|
-
chunk_id: row.chunk_id,
|
|
306
|
-
start_offset: row.start_offset,
|
|
307
|
-
end_offset: row.end_offset,
|
|
308
|
-
},
|
|
309
|
-
artifact: isWiki ? {
|
|
310
|
-
uri: row.wiki_artifact_uri,
|
|
311
|
-
path: row.wiki_path,
|
|
312
|
-
hash: row.wiki_content_hash,
|
|
313
|
-
shard_key: row.wiki_path,
|
|
314
|
-
} : null,
|
|
315
|
-
provenance,
|
|
316
|
-
reasons: ['keyword_match'],
|
|
317
|
-
};
|
|
318
|
-
result.score = combinedScore(result.scores, result.citation);
|
|
319
|
-
return result;
|
|
320
|
-
}
|
|
321
|
-
|
|
322
|
-
function wikiPageResult(row: WikiPageRow, terms: string[]): HybridSearchEntry {
|
|
323
|
-
const metadata = parseJsonObject(row.metadata_json);
|
|
324
|
-
const score = catalogScore(`${row.path} ${row.title} ${row.artifact_uri ?? ''} ${row.metadata_json}`.toLowerCase(), terms);
|
|
325
|
-
const result: HybridSearchEntry = {
|
|
326
|
-
kind: 'wiki_page',
|
|
327
|
-
id: row.id,
|
|
328
|
-
title: row.title,
|
|
329
|
-
text: null,
|
|
330
|
-
score: 0,
|
|
331
|
-
scores: { catalog: score },
|
|
332
|
-
source: null,
|
|
333
|
-
citation: null,
|
|
334
|
-
artifact: {
|
|
335
|
-
uri: row.artifact_uri,
|
|
336
|
-
path: row.path,
|
|
337
|
-
hash: row.content_hash,
|
|
338
|
-
shard_key: row.path,
|
|
339
|
-
},
|
|
340
|
-
provenance: existingProvenance(metadata),
|
|
341
|
-
reasons: ['wiki_catalog_match'],
|
|
342
|
-
};
|
|
343
|
-
result.score = combinedScore(result.scores, result.citation);
|
|
344
|
-
return result;
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
function indexResult(row: IndexRow, terms: string[]): HybridSearchEntry {
|
|
348
|
-
const metadata = parseJsonObject(row.metadata_json);
|
|
349
|
-
const score = catalogScore(`${row.kind} ${row.name} ${row.shard_key ?? ''} ${row.artifact_uri ?? ''} ${row.metadata_json}`.toLowerCase(), terms);
|
|
350
|
-
const result: HybridSearchEntry = {
|
|
351
|
-
kind: 'knowledge_index',
|
|
352
|
-
id: row.id,
|
|
353
|
-
title: row.name,
|
|
354
|
-
text: null,
|
|
355
|
-
score: 0,
|
|
356
|
-
scores: { catalog: score },
|
|
357
|
-
source: null,
|
|
358
|
-
citation: null,
|
|
359
|
-
artifact: {
|
|
360
|
-
uri: row.artifact_uri,
|
|
361
|
-
path: metadataString(metadata, ['artifact_key']),
|
|
362
|
-
hash: metadataString(metadata, ['content_hash']),
|
|
363
|
-
shard_key: row.shard_key,
|
|
364
|
-
},
|
|
365
|
-
provenance: existingProvenance(metadata),
|
|
366
|
-
reasons: ['index_catalog_match'],
|
|
367
|
-
};
|
|
368
|
-
result.score = combinedScore(result.scores, result.citation);
|
|
369
|
-
return result;
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
function mergeResult(results: Map<string, HybridSearchEntry>, entry: HybridSearchEntry): void {
|
|
373
|
-
const key = `${entry.kind}:${entry.id}`;
|
|
374
|
-
const existing = results.get(key);
|
|
375
|
-
if (!existing) {
|
|
376
|
-
results.set(key, entry);
|
|
377
|
-
return;
|
|
378
|
-
}
|
|
379
|
-
existing.scores = {
|
|
380
|
-
keyword: Math.max(existing.scores.keyword ?? 0, entry.scores.keyword ?? 0) || undefined,
|
|
381
|
-
semantic: Math.max(existing.scores.semantic ?? 0, entry.scores.semantic ?? 0) || undefined,
|
|
382
|
-
catalog: Math.max(existing.scores.catalog ?? 0, entry.scores.catalog ?? 0) || undefined,
|
|
383
|
-
};
|
|
384
|
-
existing.reasons = unique([...existing.reasons, ...entry.reasons]);
|
|
385
|
-
existing.text = existing.text ?? entry.text;
|
|
386
|
-
existing.title = existing.title ?? entry.title;
|
|
387
|
-
existing.source = existing.source ?? entry.source;
|
|
388
|
-
existing.citation = existing.citation ?? entry.citation;
|
|
389
|
-
existing.artifact = existing.artifact ?? entry.artifact;
|
|
390
|
-
existing.provenance = existing.provenance ?? entry.provenance;
|
|
391
|
-
existing.score = combinedScore(existing.scores, existing.citation);
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
function sortResults(results: HybridSearchEntry[]): HybridSearchEntry[] {
|
|
395
|
-
const kindOrder: Record<SearchResultKind, number> = {
|
|
396
|
-
source_chunk: 0,
|
|
397
|
-
wiki_chunk: 1,
|
|
398
|
-
wiki_page: 2,
|
|
399
|
-
knowledge_index: 3,
|
|
400
|
-
};
|
|
401
|
-
return results.sort((a, b) => {
|
|
402
|
-
if (b.score !== a.score) return b.score - a.score;
|
|
403
|
-
return kindOrder[a.kind] - kindOrder[b.kind] || a.id.localeCompare(b.id);
|
|
404
|
-
});
|
|
405
|
-
}
|
|
406
|
-
|
|
407
|
-
export async function hybridSearch(options: HybridSearchOptions): Promise<HybridSearchResult> {
|
|
408
|
-
const query = options.query.trim();
|
|
409
|
-
if (!query) throw new Error('Search query is required.');
|
|
410
|
-
const limit = Math.max(1, Math.min(options.limit ?? 10, 100));
|
|
411
|
-
const terms = queryTerms(query);
|
|
412
|
-
const ftsQuery = ftsQueryForTerms(terms);
|
|
413
|
-
const semanticEnabled = options.semantic === true || options.fake === true || Boolean(options.modelRef);
|
|
414
|
-
const warnings: string[] = [];
|
|
415
|
-
let semanticProvider: string | null = null;
|
|
416
|
-
let semanticModel: string | null = null;
|
|
417
|
-
let semanticDimensions: number | null = null;
|
|
418
|
-
let keywordCount = 0;
|
|
419
|
-
let catalogCount = 0;
|
|
420
|
-
let semanticCount = 0;
|
|
421
|
-
const merged = new Map<string, HybridSearchEntry>();
|
|
422
|
-
|
|
423
|
-
migrateKnowledgeDb(options.dbPath);
|
|
424
|
-
const db = openKnowledgeDb(options.dbPath);
|
|
425
|
-
try {
|
|
426
|
-
const ftsRows = selectFtsChunks(db, ftsQuery, Math.max(limit * 3, 20));
|
|
427
|
-
keywordCount = ftsRows.length;
|
|
428
|
-
ftsRows.forEach((row, index) => mergeResult(merged, chunkResult(row, scoreFromRank(row.rank, index))));
|
|
429
|
-
|
|
430
|
-
const wikiRows = selectWikiPages(db, terms, Math.max(limit, 10));
|
|
431
|
-
const indexRows = selectKnowledgeIndexes(db, terms, Math.max(limit, 10));
|
|
432
|
-
catalogCount = wikiRows.length + indexRows.length;
|
|
433
|
-
wikiRows.forEach((row) => mergeResult(merged, wikiPageResult(row, terms)));
|
|
434
|
-
indexRows.forEach((row) => mergeResult(merged, indexResult(row, terms)));
|
|
435
|
-
} finally {
|
|
436
|
-
db.close();
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
if (semanticEnabled) {
|
|
440
|
-
try {
|
|
441
|
-
const semantic = await searchVectorIndex({
|
|
442
|
-
dbPath: options.dbPath,
|
|
443
|
-
query,
|
|
444
|
-
limit: Math.max(limit * 3, 20),
|
|
445
|
-
config: options.config,
|
|
446
|
-
env: options.env,
|
|
447
|
-
modelRef: options.modelRef,
|
|
448
|
-
dimensions: options.dimensions,
|
|
449
|
-
fake: options.fake,
|
|
450
|
-
batchSize: options.batchSize,
|
|
451
|
-
maxParallelCalls: options.maxParallelCalls,
|
|
452
|
-
});
|
|
453
|
-
semanticProvider = semantic.provider;
|
|
454
|
-
semanticModel = semantic.model;
|
|
455
|
-
semanticDimensions = semantic.dimensions;
|
|
456
|
-
semanticCount = semantic.results.length;
|
|
457
|
-
for (const row of semantic.results) {
|
|
458
|
-
const result: HybridSearchEntry = {
|
|
459
|
-
kind: 'source_chunk',
|
|
460
|
-
id: row.chunk_id,
|
|
461
|
-
title: null,
|
|
462
|
-
text: row.text,
|
|
463
|
-
score: 0,
|
|
464
|
-
scores: { semantic: semanticScore(row.score) },
|
|
465
|
-
source: {
|
|
466
|
-
uri: row.source_uri,
|
|
467
|
-
ref: row.source_ref,
|
|
468
|
-
kind: row.provenance?.source_kind ?? null,
|
|
469
|
-
revision: row.revision,
|
|
470
|
-
hash: row.hash,
|
|
471
|
-
},
|
|
472
|
-
citation: {
|
|
473
|
-
chunk_id: row.chunk_id,
|
|
474
|
-
start_offset: row.provenance?.start_offset ?? null,
|
|
475
|
-
end_offset: row.provenance?.end_offset ?? null,
|
|
476
|
-
},
|
|
477
|
-
artifact: null,
|
|
478
|
-
provenance: row.provenance,
|
|
479
|
-
reasons: ['semantic_match'],
|
|
480
|
-
};
|
|
481
|
-
result.score = combinedScore(result.scores, result.citation);
|
|
482
|
-
mergeResult(merged, result);
|
|
483
|
-
}
|
|
484
|
-
} catch (error) {
|
|
485
|
-
warnings.push(`semantic_search_failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
486
|
-
}
|
|
487
|
-
}
|
|
488
|
-
|
|
489
|
-
const results = sortResults(Array.from(merged.values())).slice(0, limit);
|
|
490
|
-
return {
|
|
491
|
-
query,
|
|
492
|
-
limit,
|
|
493
|
-
mode: {
|
|
494
|
-
keyword: true,
|
|
495
|
-
catalog: true,
|
|
496
|
-
semantic: semanticEnabled,
|
|
497
|
-
},
|
|
498
|
-
semantic_provider: semanticProvider,
|
|
499
|
-
semantic_model: semanticModel,
|
|
500
|
-
semantic_dimensions: semanticDimensions,
|
|
501
|
-
counts: {
|
|
502
|
-
keyword_results: keywordCount,
|
|
503
|
-
catalog_results: catalogCount,
|
|
504
|
-
semantic_results: semanticCount,
|
|
505
|
-
merged_results: results.length,
|
|
506
|
-
},
|
|
507
|
-
warnings,
|
|
508
|
-
results,
|
|
509
|
-
};
|
|
510
|
-
}
|