@voidwire/lore 1.7.3 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/about.ts CHANGED
@@ -24,7 +24,7 @@ export interface AboutResult {
24
24
  /**
25
25
  * Sources to query for project knowledge
26
26
  * Each source has a different field for project mapping (handled by list.ts)
27
- * Note: "insights" will be added when task 2.1 is complete
27
+ * Covers commits, captures, flux, teachings, and sessions
28
28
  */
29
29
  const ABOUT_SOURCES: Source[] = [
30
30
  "commits",
@@ -6,12 +6,10 @@
6
6
  */
7
7
 
8
8
  import type { IndexerFunction } from "../indexer";
9
- import { indexEvents } from "./events";
10
9
  import { indexReadmes } from "./readmes";
11
10
  import { indexDevelopment } from "./development";
12
11
  import { indexCaptures } from "./captures";
13
12
  import { indexTeachings } from "./teachings";
14
- import { indexInsights } from "./insights";
15
13
  import { indexObservations } from "./observations";
16
14
  import { indexExplorations } from "./explorations";
17
15
  import { indexSessions } from "./sessions";
@@ -22,12 +20,10 @@ import { indexBlogs } from "./blogs";
22
20
  import { indexPersonal } from "./personal";
23
21
 
24
22
  export const indexers: Record<string, IndexerFunction> = {
25
- events: indexEvents,
26
23
  readmes: indexReadmes,
27
24
  development: indexDevelopment,
28
25
  captures: indexCaptures,
29
26
  teachings: indexTeachings,
30
- insights: indexInsights,
31
27
  observations: indexObservations,
32
28
  explorations: indexExplorations,
33
29
  sessions: indexSessions,
package/lib/list.ts CHANGED
@@ -13,7 +13,6 @@ import { getDatabasePath } from "./db.js";
13
13
  export type Source =
14
14
  | "development"
15
15
  | "flux"
16
- | "events"
17
16
  | "blogs"
18
17
  | "commits"
19
18
  | "explorations"
@@ -28,13 +27,11 @@ export type Source =
28
27
  | "habits"
29
28
  | "teachings"
30
29
  | "sessions"
31
- | "insights"
32
30
  | "observations";
33
31
 
34
32
  export const SOURCES: Source[] = [
35
33
  "development",
36
34
  "flux",
37
- "events",
38
35
  "blogs",
39
36
  "commits",
40
37
  "explorations",
@@ -49,7 +46,6 @@ export const SOURCES: Source[] = [
49
46
  "habits",
50
47
  "teachings",
51
48
  "sessions",
52
- "insights",
53
49
  "observations",
54
50
  ];
55
51
 
package/lib/projects.ts CHANGED
@@ -13,7 +13,6 @@ const PROJECT_SOURCES = [
13
13
  "commits",
14
14
  "sessions",
15
15
  "flux",
16
- "insights",
17
16
  "captures",
18
17
  "teachings",
19
18
  "observations",
package/lib/semantic.ts CHANGED
@@ -1,54 +1,19 @@
1
1
  /**
2
- * lib/semantic.ts - Semantic search via local embeddings
2
+ * lib/semantic.ts - Semantic search via embeddings
3
3
  *
4
- * Query embedding using @huggingface/transformers with nomic-embed-text-v1.5.
4
+ * Embedding via llm-core's embed() requires embed server running.
5
+ * No in-process fallback. Start with: llm embed-server start
5
6
  * KNN search against sqlite-vec virtual table.
6
7
  * Uses Bun's built-in SQLite with sqlite-vec extension.
7
8
  */
8
9
 
9
10
  import { Database } from "bun:sqlite";
10
11
  import { existsSync } from "fs";
11
- import { pipeline } from "@huggingface/transformers";
12
+ import { embed } from "@voidwire/llm-core";
12
13
  import { getDatabasePath, openDatabase } from "./db.js";
13
14
  import { search as keywordSearch, type SearchResult } from "./search.js";
14
15
  import { getConfig } from "./config.js";
15
16
 
16
- // ─── Embedding Server (server-first, in-process fallback) ────────────────────
17
-
18
- const EMBED_SERVER = process.env.EMBED_SERVER_URL || "http://localhost:8090";
19
-
20
- /**
21
- * Try the persistent embedding server first (warm: ~9ms vs 244ms in-process).
22
- * Returns null on any failure — caller falls back to in-process.
23
- */
24
- async function serverEmbed(
25
- text: string,
26
- prefix: string,
27
- ): Promise<number[] | null> {
28
- try {
29
- const resp = await fetch(`${EMBED_SERVER}/embed`, {
30
- method: "POST",
31
- headers: { "Content-Type": "application/json" },
32
- body: JSON.stringify({ text, prefix }),
33
- signal: AbortSignal.timeout(500),
34
- });
35
- if (!resp.ok) return null;
36
- const data = (await resp.json()) as {
37
- embedding?: number[];
38
- dims?: number;
39
- };
40
- if (
41
- !Array.isArray(data.embedding) ||
42
- data.embedding.length !== EMBEDDING_DIM
43
- ) {
44
- return null;
45
- }
46
- return data.embedding;
47
- } catch {
48
- return null; // Server not running or timed out — fall back silently
49
- }
50
- }
51
-
52
17
  // ─── Types ───────────────────────────────────────────────────────────────────
53
18
 
54
19
  export interface SemanticResult {
@@ -72,148 +37,37 @@ export interface SemanticSearchOptions {
72
37
 
73
38
  const { model: MODEL_NAME, dimensions: EMBEDDING_DIM } = getConfig().embedding;
74
39
 
75
- interface EmbeddingPipeline {
76
- (
77
- text: string,
78
- options?: { pooling?: string; normalize?: boolean },
79
- ): Promise<{
80
- data: Float32Array;
81
- }>;
82
- }
83
-
84
- // Cache the pipeline to avoid reloading on every query
85
- let cachedPipeline: EmbeddingPipeline | null = null;
86
-
87
- /**
88
- * Get or create the embedding pipeline
89
- * Pipeline is cached after first load for performance
90
- */
91
- async function getEmbeddingPipeline(): Promise<EmbeddingPipeline> {
92
- if (cachedPipeline) {
93
- return cachedPipeline;
94
- }
95
-
96
- try {
97
- const p = await pipeline("feature-extraction", MODEL_NAME, {
98
- dtype: "fp32",
99
- });
100
- cachedPipeline = p as unknown as EmbeddingPipeline;
101
- return cachedPipeline;
102
- } catch (error) {
103
- const message = error instanceof Error ? error.message : String(error);
104
- throw new Error(
105
- `Failed to load embedding model: ${message}\n` +
106
- `Note: First run downloads ~500MB model to ~/.cache/huggingface/hub`,
107
- );
108
- }
109
- }
110
-
111
40
  /**
112
- * Embed a query string using local transformers.js model
113
- * Uses "search_query: " prefix as required by nomic-embed-text
41
+ * Embed a query string via the embed server
42
+ * Uses "search_query" prefix as required by nomic-embed-text
114
43
  * @returns 768-dimensional embedding vector
115
44
  */
116
45
  export async function embedQuery(query: string): Promise<number[]> {
117
- // Try persistent server first (~9ms warm vs 244ms in-process)
118
- const serverResult = await serverEmbed(query, "search_query");
119
- if (serverResult) return serverResult;
120
-
121
- // Fall back to in-process model loading
122
- const embedder = await getEmbeddingPipeline();
123
-
124
- // nomic model requires "search_query: " prefix for queries
125
- // (FastEmbed uses "search_document: " prefix during indexing)
126
- const prefixedQuery = `search_query: ${query}`;
127
- const output = await embedder(prefixedQuery, {
128
- pooling: "mean",
129
- normalize: true,
130
- });
131
-
132
- // Output is a Tensor, convert to array
133
- const embedding = Array.from(output.data as Float32Array);
134
-
135
- if (embedding.length !== EMBEDDING_DIM) {
136
- throw new Error(
137
- `Invalid embedding: expected ${EMBEDDING_DIM} dims, got ${embedding.length}`,
138
- );
139
- }
140
-
141
- return embedding;
46
+ const result = await embed({ text: query, prefix: "search_query" });
47
+ return result.embedding;
142
48
  }
143
49
 
144
50
  /**
145
- * Embed a document string using local transformers.js model
146
- * Uses "search_document: " prefix as required by nomic-embed-text
51
+ * Embed a document string via the embed server
52
+ * Uses "search_document" prefix as required by nomic-embed-text
147
53
  * @returns 768-dimensional embedding vector
148
54
  */
149
55
  export async function embedDocument(text: string): Promise<number[]> {
150
- // Try persistent server first (~9ms warm vs 244ms in-process)
151
- const serverResult = await serverEmbed(text, "search_document");
152
- if (serverResult) return serverResult;
153
-
154
- // Fall back to in-process model loading
155
- const embedder = await getEmbeddingPipeline();
156
-
157
- const prefixedText = `search_document: ${text}`;
158
- const output = await embedder(prefixedText, {
159
- pooling: "mean",
160
- normalize: true,
161
- });
162
-
163
- const embedding = Array.from(output.data as Float32Array);
164
-
165
- if (embedding.length !== EMBEDDING_DIM) {
166
- throw new Error(
167
- `Invalid embedding: expected ${EMBEDDING_DIM} dims, got ${embedding.length}`,
168
- );
169
- }
170
-
171
- return embedding;
56
+ const result = await embed({ text, prefix: "search_document" });
57
+ return result.embedding;
172
58
  }
173
59
 
174
60
  /**
175
- * Batch embed multiple documents
176
- * More efficient than individual calls when embedding several documents
61
+ * Batch embed multiple documents via the embed server
177
62
  * @returns array of 768-dimensional embedding vectors
178
63
  */
179
64
  export async function embedDocuments(texts: string[]): Promise<number[][]> {
180
65
  if (texts.length === 0) return [];
181
-
182
- const results: number[][] = [];
183
-
184
- // Try persistent server first for each document
185
- let serverAvailable = true;
186
- for (const text of texts) {
187
- if (serverAvailable) {
188
- const serverResult = await serverEmbed(text, "search_document");
189
- if (serverResult) {
190
- results.push(serverResult);
191
- continue;
192
- }
193
- // Server failed — stop trying and fall back for remaining
194
- serverAvailable = false;
195
- }
196
-
197
- // Fall back to in-process
198
- const embedder = await getEmbeddingPipeline();
199
- const prefixedText = `search_document: ${text}`;
200
- const output = await embedder(prefixedText, {
201
- pooling: "mean",
202
- normalize: true,
203
- });
204
-
205
- const embedding = Array.from(output.data as Float32Array);
206
-
207
- if (embedding.length !== EMBEDDING_DIM) {
208
- throw new Error(
209
- `Invalid embedding: expected ${EMBEDDING_DIM} dims, got ${embedding.length}`,
210
- );
211
- }
212
-
213
- results.push(embedding);
214
- }
215
-
216
- return results;
66
+ return Promise.all(
67
+ texts.map((t) =>
68
+ embed({ text: t, prefix: "search_document" }).then((r) => r.embedding),
69
+ ),
70
+ );
217
71
  }
218
72
 
219
73
  /**
package/lib/source-map.ts CHANGED
@@ -19,7 +19,9 @@ export function getSourceForEvent(event: CaptureEvent): string {
19
19
  case "observation":
20
20
  return "observations";
21
21
  case "insight":
22
- return "insights";
22
+ throw new Error(
23
+ "getSourceForEvent: insight events should not be indexed — they go to log.jsonl only",
24
+ );
23
25
  case "task":
24
26
  return "flux";
25
27
  case "note":
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@voidwire/lore",
3
- "version": "1.7.3",
3
+ "version": "1.8.0",
4
4
  "description": "Unified knowledge CLI - Search, list, and capture your indexed knowledge",
5
5
  "type": "module",
6
6
  "main": "./index.ts",
@@ -19,6 +19,11 @@
19
19
  "README.md",
20
20
  "LICENSE"
21
21
  ],
22
+ "scripts": {
23
+ "build": "tsc --noEmit false --outDir dist --declaration",
24
+ "typecheck": "tsc --noEmit",
25
+ "test": "bun test"
26
+ },
22
27
  "keywords": [
23
28
  "knowledge",
24
29
  "search",
@@ -43,16 +48,10 @@
43
48
  "bun": ">=1.0.0"
44
49
  },
45
50
  "dependencies": {
46
- "@huggingface/transformers": "^3.2.6",
47
51
  "@iarna/toml": "^2.2.5",
48
- "@voidwire/llm-core": "^0.3.1"
52
+ "@voidwire/llm-core": "0.4.0"
49
53
  },
50
54
  "devDependencies": {
51
55
  "bun-types": "1.3.5"
52
- },
53
- "scripts": {
54
- "build": "tsc --noEmit false --outDir dist --declaration",
55
- "typecheck": "tsc --noEmit",
56
- "test": "bun test"
57
56
  }
58
- }
57
+ }
@@ -1,65 +0,0 @@
1
- /**
2
- * lib/indexers/events.ts - Events indexer
3
- *
4
- * Aggregates development events from log.jsonl by project.
5
- * Each project gets one entry with all event lines.
6
- *
7
- * Source: events
8
- * Topic: project name
9
- * Type: (empty)
10
- * Timestamp: last event timestamp per project
11
- */
12
-
13
- import { readFileSync } from "fs";
14
- import { checkPath, type IndexerContext } from "../indexer";
15
-
16
- export async function indexEvents(ctx: IndexerContext): Promise<void> {
17
- const logPath = `${ctx.config.paths.data}/log.jsonl`;
18
- if (
19
- !checkPath(
20
- "events",
21
- "log.jsonl",
22
- logPath,
23
- "populated by Sable session hooks",
24
- )
25
- )
26
- return;
27
-
28
- const lines = readFileSync(logPath, "utf-8").split("\n").filter(Boolean);
29
- const projectData = new Map<
30
- string,
31
- { lines: string[]; lastTimestamp: string }
32
- >();
33
-
34
- for (const line of lines) {
35
- try {
36
- const event = JSON.parse(line);
37
- const project = event.data?.topic || "general";
38
- if (!projectData.has(project)) {
39
- projectData.set(project, { lines: [], lastTimestamp: "" });
40
- }
41
- const data = projectData.get(project)!;
42
- data.lines.push(
43
- `[${event.timestamp}] ${event.event}: ${event.type || ""}`,
44
- );
45
- if (event.timestamp) {
46
- data.lastTimestamp = event.timestamp;
47
- }
48
- } catch {
49
- // Skip malformed JSON
50
- continue;
51
- }
52
- }
53
-
54
- for (const [project, data] of projectData) {
55
- const content = data.lines.join("\n");
56
-
57
- ctx.insert({
58
- source: "events",
59
- title: `Development events: ${project}`,
60
- content,
61
- topic: project,
62
- timestamp: data.lastTimestamp,
63
- });
64
- }
65
- }
@@ -1,58 +0,0 @@
1
- /**
2
- * lib/indexers/insights.ts - Insights indexer
3
- *
4
- * Reads log.jsonl and indexes insight summary captures.
5
- * Filters for event=captured AND type=insight AND data.subtype=summary.
6
- *
7
- * Source: insights
8
- * Topic: data.topic or "assistant"
9
- * Type: summary (fixed)
10
- * Timestamp: event timestamp
11
- */
12
-
13
- import { readFileSync } from "fs";
14
- import { checkPath, type IndexerContext } from "../indexer";
15
-
16
- export async function indexInsights(ctx: IndexerContext): Promise<void> {
17
- const logPath = `${ctx.config.paths.data}/log.jsonl`;
18
- if (
19
- !checkPath(
20
- "insights",
21
- "log.jsonl",
22
- logPath,
23
- "populated by Sable session hooks",
24
- )
25
- )
26
- return;
27
-
28
- const lines = readFileSync(logPath, "utf-8").split("\n").filter(Boolean);
29
-
30
- for (const line of lines) {
31
- try {
32
- const event = JSON.parse(line);
33
- if (event.event !== "captured" || event.type !== "insight") continue;
34
- if (event.data?.subtype !== "summary") continue;
35
-
36
- const topic = event.data?.topic || "assistant";
37
- const content = event.data?.content || "";
38
- const sessionId = event.data?.session_id;
39
-
40
- if (!content) continue;
41
-
42
- const metadata: Record<string, unknown> = {};
43
- if (sessionId) metadata.session_id = sessionId;
44
-
45
- ctx.insert({
46
- source: "insights",
47
- title: topic,
48
- content,
49
- topic,
50
- type: "summary",
51
- timestamp: event.timestamp,
52
- metadata: Object.keys(metadata).length > 0 ? metadata : undefined,
53
- });
54
- } catch (e) {
55
- continue;
56
- }
57
- }
58
- }