npm - @voidwire/lore - Versions diffs - 1.7.3 → 1.8.0 - Mend

@voidwire/lore 1.7.3 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/lib/about.ts CHANGED Viewed

@@ -24,7 +24,7 @@ export interface AboutResult {
 /**
  * Sources to query for project knowledge
  * Each source has a different field for project mapping (handled by list.ts)
- * Note: "insights" will be added when task 2.1 is complete
+ * Covers commits, captures, flux, teachings, and sessions
  */
 const ABOUT_SOURCES: Source[] = [
   "commits",

package/lib/indexers/index.ts CHANGED Viewed

@@ -6,12 +6,10 @@
  */
 import type { IndexerFunction } from "../indexer";
-import { indexEvents } from "./events";
 import { indexReadmes } from "./readmes";
 import { indexDevelopment } from "./development";
 import { indexCaptures } from "./captures";
 import { indexTeachings } from "./teachings";
-import { indexInsights } from "./insights";
 import { indexObservations } from "./observations";
 import { indexExplorations } from "./explorations";
 import { indexSessions } from "./sessions";
@@ -22,12 +20,10 @@ import { indexBlogs } from "./blogs";
 import { indexPersonal } from "./personal";
 export const indexers: Record<string, IndexerFunction> = {
-  events: indexEvents,
   readmes: indexReadmes,
   development: indexDevelopment,
   captures: indexCaptures,
   teachings: indexTeachings,
-  insights: indexInsights,
   observations: indexObservations,
   explorations: indexExplorations,
   sessions: indexSessions,

package/lib/list.ts CHANGED Viewed

@@ -13,7 +13,6 @@ import { getDatabasePath } from "./db.js";
 export type Source =
   | "development"
   | "flux"
-  | "events"
   | "blogs"
   | "commits"
   | "explorations"
@@ -28,13 +27,11 @@ export type Source =
   | "habits"
   | "teachings"
   | "sessions"
-  | "insights"
   | "observations";
 export const SOURCES: Source[] = [
   "development",
   "flux",
-  "events",
   "blogs",
   "commits",
   "explorations",
@@ -49,7 +46,6 @@ export const SOURCES: Source[] = [
   "habits",
   "teachings",
   "sessions",
-  "insights",
   "observations",
 ];

package/lib/projects.ts CHANGED Viewed

@@ -13,7 +13,6 @@ const PROJECT_SOURCES = [
   "commits",
   "sessions",
   "flux",
-  "insights",
   "captures",
   "teachings",
   "observations",

package/lib/semantic.ts CHANGED Viewed

@@ -1,54 +1,19 @@
 /**
- * lib/semantic.ts - Semantic search via local embeddings
+ * lib/semantic.ts - Semantic search via embeddings
  *
- * Query embedding using @huggingface/transformers with nomic-embed-text-v1.5.
+ * Embedding via llm-core's embed() — requires embed server running.
+ * No in-process fallback. Start with: llm embed-server start
  * KNN search against sqlite-vec virtual table.
  * Uses Bun's built-in SQLite with sqlite-vec extension.
  */
 import { Database } from "bun:sqlite";
 import { existsSync } from "fs";
-import { pipeline } from "@huggingface/transformers";
+import { embed } from "@voidwire/llm-core";
 import { getDatabasePath, openDatabase } from "./db.js";
 import { search as keywordSearch, type SearchResult } from "./search.js";
 import { getConfig } from "./config.js";
-// ─── Embedding Server (server-first, in-process fallback) ────────────────────
-const EMBED_SERVER = process.env.EMBED_SERVER_URL || "http://localhost:8090";
-/**
- * Try the persistent embedding server first (warm: ~9ms vs 244ms in-process).
- * Returns null on any failure — caller falls back to in-process.
- */
-async function serverEmbed(
-  text: string,
-  prefix: string,
-): Promise<number[] | null> {
-  try {
-    const resp = await fetch(`${EMBED_SERVER}/embed`, {
-      method: "POST",
-      headers: { "Content-Type": "application/json" },
-      body: JSON.stringify({ text, prefix }),
-      signal: AbortSignal.timeout(500),
-    });
-    if (!resp.ok) return null;
-    const data = (await resp.json()) as {
-      embedding?: number[];
-      dims?: number;
-    };
-    if (
-      !Array.isArray(data.embedding) ||
-      data.embedding.length !== EMBEDDING_DIM
-    ) {
-      return null;
-    }
-    return data.embedding;
-  } catch {
-    return null; // Server not running or timed out — fall back silently
-  }
-}
 // ─── Types ───────────────────────────────────────────────────────────────────
 export interface SemanticResult {
@@ -72,148 +37,37 @@ export interface SemanticSearchOptions {
 const { model: MODEL_NAME, dimensions: EMBEDDING_DIM } = getConfig().embedding;
-interface EmbeddingPipeline {
-  (
-    text: string,
-    options?: { pooling?: string; normalize?: boolean },
-  ): Promise<{
-    data: Float32Array;
-  }>;
-}
-// Cache the pipeline to avoid reloading on every query
-let cachedPipeline: EmbeddingPipeline | null = null;
-/**
- * Get or create the embedding pipeline
- * Pipeline is cached after first load for performance
- */
-async function getEmbeddingPipeline(): Promise<EmbeddingPipeline> {
-  if (cachedPipeline) {
-    return cachedPipeline;
-  }
-  try {
-    const p = await pipeline("feature-extraction", MODEL_NAME, {
-      dtype: "fp32",
-    });
-    cachedPipeline = p as unknown as EmbeddingPipeline;
-    return cachedPipeline;
-  } catch (error) {
-    const message = error instanceof Error ? error.message : String(error);
-    throw new Error(
-      `Failed to load embedding model: ${message}\n` +
-        `Note: First run downloads ~500MB model to ~/.cache/huggingface/hub`,
-    );
-  }
-}
 /**
- * Embed a query string using local transformers.js model
- * Uses "search_query: " prefix as required by nomic-embed-text
+ * Embed a query string via the embed server
+ * Uses "search_query" prefix as required by nomic-embed-text
  * @returns 768-dimensional embedding vector
  */
 export async function embedQuery(query: string): Promise<number[]> {
-  // Try persistent server first (~9ms warm vs 244ms in-process)
-  const serverResult = await serverEmbed(query, "search_query");
-  if (serverResult) return serverResult;
-  // Fall back to in-process model loading
-  const embedder = await getEmbeddingPipeline();
-  // nomic model requires "search_query: " prefix for queries
-  // (FastEmbed uses "search_document: " prefix during indexing)
-  const prefixedQuery = `search_query: ${query}`;
-  const output = await embedder(prefixedQuery, {
-    pooling: "mean",
-    normalize: true,
-  });
-  // Output is a Tensor, convert to array
-  const embedding = Array.from(output.data as Float32Array);
-  if (embedding.length !== EMBEDDING_DIM) {
-    throw new Error(
-      `Invalid embedding: expected ${EMBEDDING_DIM} dims, got ${embedding.length}`,
-    );
-  }
-  return embedding;
+  const result = await embed({ text: query, prefix: "search_query" });
+  return result.embedding;
 }
 /**
- * Embed a document string using local transformers.js model
- * Uses "search_document: " prefix as required by nomic-embed-text
+ * Embed a document string via the embed server
+ * Uses "search_document" prefix as required by nomic-embed-text
  * @returns 768-dimensional embedding vector
  */
 export async function embedDocument(text: string): Promise<number[]> {
-  // Try persistent server first (~9ms warm vs 244ms in-process)
-  const serverResult = await serverEmbed(text, "search_document");
-  if (serverResult) return serverResult;
-  // Fall back to in-process model loading
-  const embedder = await getEmbeddingPipeline();
-  const prefixedText = `search_document: ${text}`;
-  const output = await embedder(prefixedText, {
-    pooling: "mean",
-    normalize: true,
-  });
-  const embedding = Array.from(output.data as Float32Array);
-  if (embedding.length !== EMBEDDING_DIM) {
-    throw new Error(
-      `Invalid embedding: expected ${EMBEDDING_DIM} dims, got ${embedding.length}`,
-    );
-  }
-  return embedding;
+  const result = await embed({ text, prefix: "search_document" });
+  return result.embedding;
 }
 /**
- * Batch embed multiple documents
- * More efficient than individual calls when embedding several documents
+ * Batch embed multiple documents via the embed server
  * @returns array of 768-dimensional embedding vectors
  */
 export async function embedDocuments(texts: string[]): Promise<number[][]> {
   if (texts.length === 0) return [];
-  const results: number[][] = [];
-  // Try persistent server first for each document
-  let serverAvailable = true;
-  for (const text of texts) {
-    if (serverAvailable) {
-      const serverResult = await serverEmbed(text, "search_document");
-      if (serverResult) {
-        results.push(serverResult);
-        continue;
-      }
-      // Server failed — stop trying and fall back for remaining
-      serverAvailable = false;
-    }
-    // Fall back to in-process
-    const embedder = await getEmbeddingPipeline();
-    const prefixedText = `search_document: ${text}`;
-    const output = await embedder(prefixedText, {
-      pooling: "mean",
-      normalize: true,
-    });
-    const embedding = Array.from(output.data as Float32Array);
-    if (embedding.length !== EMBEDDING_DIM) {
-      throw new Error(
-        `Invalid embedding: expected ${EMBEDDING_DIM} dims, got ${embedding.length}`,
-      );
-    }
-    results.push(embedding);
-  }
-  return results;
+  return Promise.all(
+    texts.map((t) =>
+      embed({ text: t, prefix: "search_document" }).then((r) => r.embedding),
+    ),
+  );
 }
 /**

package/lib/source-map.ts CHANGED Viewed

@@ -19,7 +19,9 @@ export function getSourceForEvent(event: CaptureEvent): string {
     case "observation":
       return "observations";
     case "insight":
-      return "insights";
+      throw new Error(
+        "getSourceForEvent: insight events should not be indexed — they go to log.jsonl only",
+      );
     case "task":
       return "flux";
     case "note":

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@voidwire/lore",
-  "version": "1.7.3",
+  "version": "1.8.0",
   "description": "Unified knowledge CLI - Search, list, and capture your indexed knowledge",
   "type": "module",
   "main": "./index.ts",
@@ -19,6 +19,11 @@
     "README.md",
     "LICENSE"
   ],
+  "scripts": {
+    "build": "tsc --noEmit false --outDir dist --declaration",
+    "typecheck": "tsc --noEmit",
+    "test": "bun test"
+  },
   "keywords": [
     "knowledge",
     "search",
@@ -43,16 +48,10 @@
     "bun": ">=1.0.0"
   },
   "dependencies": {
-    "@huggingface/transformers": "^3.2.6",
     "@iarna/toml": "^2.2.5",
-    "@voidwire/llm-core": "^0.3.1"
+    "@voidwire/llm-core": "0.4.0"
   },
   "devDependencies": {
     "bun-types": "1.3.5"
-  },
-  "scripts": {
-    "build": "tsc --noEmit false --outDir dist --declaration",
-    "typecheck": "tsc --noEmit",
-    "test": "bun test"
   }
-}
+}

package/lib/indexers/events.ts DELETED Viewed

@@ -1,65 +0,0 @@
-/**
- * lib/indexers/events.ts - Events indexer
- *
- * Aggregates development events from log.jsonl by project.
- * Each project gets one entry with all event lines.
- *
- * Source: events
- * Topic: project name
- * Type: (empty)
- * Timestamp: last event timestamp per project
- */
-import { readFileSync } from "fs";
-import { checkPath, type IndexerContext } from "../indexer";
-export async function indexEvents(ctx: IndexerContext): Promise<void> {
-  const logPath = `${ctx.config.paths.data}/log.jsonl`;
-  if (
-    !checkPath(
-      "events",
-      "log.jsonl",
-      logPath,
-      "populated by Sable session hooks",
-    )
-  )
-    return;
-  const lines = readFileSync(logPath, "utf-8").split("\n").filter(Boolean);
-  const projectData = new Map<
-    string,
-    { lines: string[]; lastTimestamp: string }
-  >();
-  for (const line of lines) {
-    try {
-      const event = JSON.parse(line);
-      const project = event.data?.topic || "general";
-      if (!projectData.has(project)) {
-        projectData.set(project, { lines: [], lastTimestamp: "" });
-      }
-      const data = projectData.get(project)!;
-      data.lines.push(
-        `[${event.timestamp}] ${event.event}: ${event.type || ""}`,
-      );
-      if (event.timestamp) {
-        data.lastTimestamp = event.timestamp;
-      }
-    } catch {
-      // Skip malformed JSON
-      continue;
-    }
-  }
-  for (const [project, data] of projectData) {
-    const content = data.lines.join("\n");
-    ctx.insert({
-      source: "events",
-      title: `Development events: ${project}`,
-      content,
-      topic: project,
-      timestamp: data.lastTimestamp,
-    });
-  }
-}

package/lib/indexers/insights.ts DELETED Viewed

@@ -1,58 +0,0 @@
-/**
- * lib/indexers/insights.ts - Insights indexer
- *
- * Reads log.jsonl and indexes insight summary captures.
- * Filters for event=captured AND type=insight AND data.subtype=summary.
- *
- * Source: insights
- * Topic: data.topic or "assistant"
- * Type: summary (fixed)
- * Timestamp: event timestamp
- */
-import { readFileSync } from "fs";
-import { checkPath, type IndexerContext } from "../indexer";
-export async function indexInsights(ctx: IndexerContext): Promise<void> {
-  const logPath = `${ctx.config.paths.data}/log.jsonl`;
-  if (
-    !checkPath(
-      "insights",
-      "log.jsonl",
-      logPath,
-      "populated by Sable session hooks",
-    )
-  )
-    return;
-  const lines = readFileSync(logPath, "utf-8").split("\n").filter(Boolean);
-  for (const line of lines) {
-    try {
-      const event = JSON.parse(line);
-      if (event.event !== "captured" || event.type !== "insight") continue;
-      if (event.data?.subtype !== "summary") continue;
-      const topic = event.data?.topic || "assistant";
-      const content = event.data?.content || "";
-      const sessionId = event.data?.session_id;
-      if (!content) continue;
-      const metadata: Record<string, unknown> = {};
-      if (sessionId) metadata.session_id = sessionId;
-      ctx.insert({
-        source: "insights",
-        title: topic,
-        content,
-        topic,
-        type: "summary",
-        timestamp: event.timestamp,
-        metadata: Object.keys(metadata).length > 0 ? metadata : undefined,
-      });
-    } catch (e) {
-      continue;
-    }
-  }
-}