@voidwire/lore 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.ts +64 -12
- package/index.ts +3 -0
- package/lib/realtime.ts +3 -1
- package/lib/search.ts +2 -1
- package/lib/semantic.ts +136 -0
- package/package.json +1 -1
package/cli.ts
CHANGED
|
@@ -38,10 +38,12 @@ import {
|
|
|
38
38
|
captureObservation,
|
|
39
39
|
indexAndEmbed,
|
|
40
40
|
semanticSearch,
|
|
41
|
+
hybridSearch,
|
|
41
42
|
formatBriefSearch,
|
|
42
43
|
hasEmbeddings,
|
|
43
44
|
SOURCES,
|
|
44
45
|
type SearchResult,
|
|
46
|
+
type HybridResult,
|
|
45
47
|
type ListResult,
|
|
46
48
|
type ListEntry,
|
|
47
49
|
type Source,
|
|
@@ -89,7 +91,14 @@ function parseArgs(args: string[]): Map<string, string> {
|
|
|
89
91
|
}
|
|
90
92
|
|
|
91
93
|
// Boolean flags that don't take values
|
|
92
|
-
const BOOLEAN_FLAGS = new Set([
|
|
94
|
+
const BOOLEAN_FLAGS = new Set([
|
|
95
|
+
"help",
|
|
96
|
+
"sources",
|
|
97
|
+
"domains",
|
|
98
|
+
"exact",
|
|
99
|
+
"semantic",
|
|
100
|
+
"brief",
|
|
101
|
+
]);
|
|
93
102
|
|
|
94
103
|
function getPositionalArgs(args: string[]): string[] {
|
|
95
104
|
const result: string[] = [];
|
|
@@ -157,6 +166,7 @@ async function handleSearch(args: string[]): Promise<void> {
|
|
|
157
166
|
const parsed = parseArgs(args);
|
|
158
167
|
const positional = getPositionalArgs(args);
|
|
159
168
|
const exact = hasFlag(args, "exact");
|
|
169
|
+
const semanticOnly = hasFlag(args, "semantic");
|
|
160
170
|
|
|
161
171
|
// Handle --sources flag
|
|
162
172
|
if (hasFlag(args, "sources")) {
|
|
@@ -258,33 +268,70 @@ async function handleSearch(args: string[]): Promise<void> {
|
|
|
258
268
|
return;
|
|
259
269
|
}
|
|
260
270
|
|
|
261
|
-
//
|
|
271
|
+
// Check embeddings for semantic/hybrid modes
|
|
262
272
|
if (!hasEmbeddings()) {
|
|
263
273
|
fail("No embeddings found. Run lore-embed-all first.", 2);
|
|
264
274
|
}
|
|
265
275
|
|
|
266
276
|
const brief = hasFlag(args, "brief");
|
|
267
277
|
|
|
278
|
+
// Semantic-only path (explicit --semantic)
|
|
279
|
+
if (semanticOnly) {
|
|
280
|
+
try {
|
|
281
|
+
const results = await semanticSearch(query, { source, limit, project });
|
|
282
|
+
|
|
283
|
+
if (brief) {
|
|
284
|
+
console.log(formatBriefSearch(results));
|
|
285
|
+
} else {
|
|
286
|
+
output({
|
|
287
|
+
success: true,
|
|
288
|
+
results,
|
|
289
|
+
count: results.length,
|
|
290
|
+
mode: "semantic",
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
console.error(
|
|
294
|
+
`✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (semantic)`,
|
|
295
|
+
);
|
|
296
|
+
process.exit(0);
|
|
297
|
+
} catch (error) {
|
|
298
|
+
const message = error instanceof Error ? error.message : "Unknown error";
|
|
299
|
+
fail(`Semantic search failed: ${message}`, 2);
|
|
300
|
+
}
|
|
301
|
+
return;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// Hybrid path (default) - combines vector + keyword
|
|
268
305
|
try {
|
|
269
|
-
const results = await
|
|
306
|
+
const results = await hybridSearch(query, {
|
|
307
|
+
source,
|
|
308
|
+
limit,
|
|
309
|
+
project,
|
|
310
|
+
since,
|
|
311
|
+
});
|
|
270
312
|
|
|
271
313
|
if (brief) {
|
|
272
|
-
|
|
314
|
+
// Format hybrid results for brief output (reuse semantic formatter)
|
|
315
|
+
const asSemanticResults = results.map((r) => ({
|
|
316
|
+
...r,
|
|
317
|
+
distance: 1 - r.score, // Convert score back to distance-like for formatter
|
|
318
|
+
}));
|
|
319
|
+
console.log(formatBriefSearch(asSemanticResults));
|
|
273
320
|
} else {
|
|
274
321
|
output({
|
|
275
322
|
success: true,
|
|
276
323
|
results,
|
|
277
324
|
count: results.length,
|
|
278
|
-
mode: "
|
|
325
|
+
mode: "hybrid",
|
|
279
326
|
});
|
|
280
327
|
}
|
|
281
328
|
console.error(
|
|
282
|
-
`✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (
|
|
329
|
+
`✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (hybrid)`,
|
|
283
330
|
);
|
|
284
331
|
process.exit(0);
|
|
285
332
|
} catch (error) {
|
|
286
333
|
const message = error instanceof Error ? error.message : "Unknown error";
|
|
287
|
-
fail(`
|
|
334
|
+
fail(`Hybrid search failed: ${message}`, 2);
|
|
288
335
|
}
|
|
289
336
|
}
|
|
290
337
|
|
|
@@ -832,13 +879,17 @@ function showSearchHelp(): void {
|
|
|
832
879
|
lore search - Search indexed knowledge
|
|
833
880
|
|
|
834
881
|
Usage:
|
|
835
|
-
lore search <query> Search all sources
|
|
882
|
+
lore search <query> Search all sources (hybrid by default)
|
|
836
883
|
lore search <source> <query> Search specific source
|
|
837
884
|
|
|
885
|
+
Search Modes:
|
|
886
|
+
(default) Hybrid search (vector + keyword merged, 0.7/0.3 weighting)
|
|
887
|
+
--exact FTS5 keyword search only
|
|
888
|
+
--semantic Vector search only
|
|
889
|
+
|
|
838
890
|
Options:
|
|
839
|
-
--exact Use FTS5 text search (bypasses semantic search)
|
|
840
891
|
--limit <n> Maximum results (default: 20)
|
|
841
|
-
--project <name> Filter results by project
|
|
892
|
+
--project <name> Filter results by project/topic
|
|
842
893
|
--brief Compact output (titles only)
|
|
843
894
|
--since <date> Filter by date (today, yesterday, this-week, YYYY-MM-DD)
|
|
844
895
|
--help Show this help
|
|
@@ -867,11 +918,12 @@ See also:
|
|
|
867
918
|
lore sources List all sources with entry counts
|
|
868
919
|
|
|
869
920
|
Examples:
|
|
870
|
-
lore search "authentication"
|
|
921
|
+
lore search "authentication" # hybrid (default)
|
|
922
|
+
lore search --exact "def process_data" # keyword only
|
|
923
|
+
lore search --semantic "login flow concepts" # vector only
|
|
871
924
|
lore search blogs "typescript patterns"
|
|
872
925
|
lore search commits --since this-week "refactor"
|
|
873
926
|
lore search "authentication" --project=momentum --limit 5
|
|
874
|
-
lore search --exact "def process_data"
|
|
875
927
|
lore search prismis "kubernetes security"
|
|
876
928
|
lore search atuin "docker build"
|
|
877
929
|
`);
|
package/index.ts
CHANGED
|
@@ -87,11 +87,14 @@ export {
|
|
|
87
87
|
// Semantic search
|
|
88
88
|
export {
|
|
89
89
|
semanticSearch,
|
|
90
|
+
hybridSearch,
|
|
90
91
|
formatBriefSearch,
|
|
91
92
|
embedQuery,
|
|
92
93
|
hasEmbeddings,
|
|
93
94
|
type SemanticResult,
|
|
94
95
|
type SemanticSearchOptions,
|
|
96
|
+
type HybridResult,
|
|
97
|
+
type HybridSearchOptions,
|
|
95
98
|
} from "./lib/semantic";
|
|
96
99
|
|
|
97
100
|
// Real-time indexing
|
package/lib/realtime.ts
CHANGED
|
@@ -161,11 +161,13 @@ function buildMetadata(event: CaptureEvent): string {
|
|
|
161
161
|
const timestamp = event.timestamp;
|
|
162
162
|
const date = timestamp ? timestamp.substring(0, 10) : "";
|
|
163
163
|
|
|
164
|
+
const content = getContentForEmbedding(event);
|
|
164
165
|
const metadata: Record<string, unknown> = {
|
|
165
166
|
topic: data.topic || "general",
|
|
166
167
|
timestamp,
|
|
167
168
|
date,
|
|
168
|
-
content
|
|
169
|
+
content,
|
|
170
|
+
content_hash: hashContent(content),
|
|
169
171
|
};
|
|
170
172
|
|
|
171
173
|
// Add type-specific fields
|
package/lib/search.ts
CHANGED
|
@@ -10,6 +10,7 @@ import { homedir } from "os";
|
|
|
10
10
|
import { existsSync } from "fs";
|
|
11
11
|
|
|
12
12
|
export interface SearchResult {
|
|
13
|
+
rowid: number;
|
|
13
14
|
source: string;
|
|
14
15
|
title: string;
|
|
15
16
|
content: string;
|
|
@@ -82,7 +83,7 @@ export function search(
|
|
|
82
83
|
params.push(limit);
|
|
83
84
|
|
|
84
85
|
const sql = `
|
|
85
|
-
SELECT source, title, snippet(search, 2, '→', '←', '...', 32) as content, metadata, rank
|
|
86
|
+
SELECT rowid, source, title, snippet(search, 2, '→', '←', '...', 32) as content, metadata, rank
|
|
86
87
|
FROM search
|
|
87
88
|
WHERE ${conditions.join(" AND ")}
|
|
88
89
|
ORDER BY rank
|
package/lib/semantic.ts
CHANGED
|
@@ -10,8 +10,10 @@ import { Database } from "bun:sqlite";
|
|
|
10
10
|
import { existsSync } from "fs";
|
|
11
11
|
import { pipeline } from "@huggingface/transformers";
|
|
12
12
|
import { getDatabasePath, openDatabase } from "./db.js";
|
|
13
|
+
import { search as keywordSearch, type SearchResult } from "./search.js";
|
|
13
14
|
|
|
14
15
|
export interface SemanticResult {
|
|
16
|
+
rowid: number;
|
|
15
17
|
source: string;
|
|
16
18
|
title: string;
|
|
17
19
|
content: string;
|
|
@@ -230,6 +232,7 @@ export async function semanticSearch(
|
|
|
230
232
|
|
|
231
233
|
sql = `
|
|
232
234
|
SELECT
|
|
235
|
+
s.rowid,
|
|
233
236
|
s.source,
|
|
234
237
|
s.title,
|
|
235
238
|
s.content,
|
|
@@ -252,6 +255,139 @@ export async function semanticSearch(
|
|
|
252
255
|
}
|
|
253
256
|
}
|
|
254
257
|
|
|
258
|
+
/**
|
|
259
|
+
* Result from hybrid search with fused score
|
|
260
|
+
*/
|
|
261
|
+
export interface HybridResult {
|
|
262
|
+
rowid: number;
|
|
263
|
+
source: string;
|
|
264
|
+
title: string;
|
|
265
|
+
content: string;
|
|
266
|
+
metadata: string;
|
|
267
|
+
score: number;
|
|
268
|
+
vectorScore: number;
|
|
269
|
+
textScore: number;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
export interface HybridSearchOptions {
|
|
273
|
+
source?: string;
|
|
274
|
+
limit?: number;
|
|
275
|
+
project?: string;
|
|
276
|
+
since?: string;
|
|
277
|
+
vectorWeight?: number;
|
|
278
|
+
textWeight?: number;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* Normalize BM25 rank to 0-1 score (higher = better match)
|
|
283
|
+
* FTS5 rank is negative (more negative = better match)
|
|
284
|
+
*/
|
|
285
|
+
function bm25RankToScore(rank: number): number {
|
|
286
|
+
// rank is negative, more negative = better
|
|
287
|
+
// Convert to positive score: 1 - (1 / (1 + |rank|))
|
|
288
|
+
// rank = -15 → score = 0.94
|
|
289
|
+
// rank = -1 → score = 0.50
|
|
290
|
+
// rank = -0.1 → score = 0.09
|
|
291
|
+
return 1 - 1 / (1 + Math.abs(rank));
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Normalize vector distance to 0-1 score (higher = better match)
|
|
296
|
+
* Cosine distance is 0-2 (0 = identical, 2 = opposite)
|
|
297
|
+
*/
|
|
298
|
+
function distanceToScore(distance: number): number {
|
|
299
|
+
// distance 0 = score 1, distance 2 = score 0
|
|
300
|
+
return Math.max(0, 1 - distance / 2);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* Perform hybrid search combining vector and keyword results
|
|
305
|
+
* Runs both searches in parallel, merges by rowid, fuses scores
|
|
306
|
+
*
|
|
307
|
+
* @param query - Search query
|
|
308
|
+
* @param options - Search options including optional weight tuning
|
|
309
|
+
* @returns Results sorted by fused score (0.7 vector + 0.3 keyword by default)
|
|
310
|
+
*/
|
|
311
|
+
export async function hybridSearch(
|
|
312
|
+
query: string,
|
|
313
|
+
options: HybridSearchOptions = {},
|
|
314
|
+
): Promise<HybridResult[]> {
|
|
315
|
+
const vectorWeight = options.vectorWeight ?? 0.7;
|
|
316
|
+
const textWeight = options.textWeight ?? 0.3;
|
|
317
|
+
const limit = options.limit ?? 20;
|
|
318
|
+
|
|
319
|
+
// Fetch more results from each search to ensure good merge coverage
|
|
320
|
+
const fetchLimit = Math.max(limit * 2, 50);
|
|
321
|
+
|
|
322
|
+
// Run both searches in parallel
|
|
323
|
+
const [vectorResults, keywordResults] = await Promise.all([
|
|
324
|
+
semanticSearch(query, {
|
|
325
|
+
source: options.source,
|
|
326
|
+
limit: fetchLimit,
|
|
327
|
+
project: options.project,
|
|
328
|
+
}),
|
|
329
|
+
Promise.resolve(
|
|
330
|
+
keywordSearch(query, {
|
|
331
|
+
source: options.source,
|
|
332
|
+
limit: fetchLimit,
|
|
333
|
+
since: options.since,
|
|
334
|
+
}),
|
|
335
|
+
),
|
|
336
|
+
]);
|
|
337
|
+
|
|
338
|
+
// Merge by rowid
|
|
339
|
+
const merged = new Map<number, HybridResult>();
|
|
340
|
+
|
|
341
|
+
// Add vector results
|
|
342
|
+
for (const r of vectorResults) {
|
|
343
|
+
const vectorScore = distanceToScore(r.distance);
|
|
344
|
+
merged.set(r.rowid, {
|
|
345
|
+
rowid: r.rowid,
|
|
346
|
+
source: r.source,
|
|
347
|
+
title: r.title,
|
|
348
|
+
content: r.content,
|
|
349
|
+
metadata: r.metadata,
|
|
350
|
+
vectorScore,
|
|
351
|
+
textScore: 0,
|
|
352
|
+
score: vectorWeight * vectorScore,
|
|
353
|
+
});
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// Merge keyword results
|
|
357
|
+
for (const r of keywordResults) {
|
|
358
|
+
const textScore = bm25RankToScore(r.rank);
|
|
359
|
+
const existing = merged.get(r.rowid);
|
|
360
|
+
|
|
361
|
+
if (existing) {
|
|
362
|
+
// Update with keyword score
|
|
363
|
+
existing.textScore = textScore;
|
|
364
|
+
existing.score =
|
|
365
|
+
vectorWeight * existing.vectorScore + textWeight * textScore;
|
|
366
|
+
// Use keyword content (has snippets with highlights)
|
|
367
|
+
existing.content = r.content;
|
|
368
|
+
} else {
|
|
369
|
+
// New entry from keyword only
|
|
370
|
+
merged.set(r.rowid, {
|
|
371
|
+
rowid: r.rowid,
|
|
372
|
+
source: r.source,
|
|
373
|
+
title: r.title,
|
|
374
|
+
content: r.content,
|
|
375
|
+
metadata: r.metadata,
|
|
376
|
+
vectorScore: 0,
|
|
377
|
+
textScore,
|
|
378
|
+
score: textWeight * textScore,
|
|
379
|
+
});
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// Sort by fused score (descending) and limit
|
|
384
|
+
const results = Array.from(merged.values())
|
|
385
|
+
.sort((a, b) => b.score - a.score)
|
|
386
|
+
.slice(0, limit);
|
|
387
|
+
|
|
388
|
+
return results;
|
|
389
|
+
}
|
|
390
|
+
|
|
255
391
|
/**
|
|
256
392
|
* Extract project from result metadata
|
|
257
393
|
*/
|