npm - @loreai/core - Versions diffs - 0.16.0 → 0.17.1 - Mend

@loreai/core 0.16.0 → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (155) hide show

package/README.md +11 -0
package/dist/bun/agents-file.d.ts +13 -1
package/dist/bun/agents-file.d.ts.map +1 -1
package/dist/bun/config.d.ts +20 -1
package/dist/bun/config.d.ts.map +1 -1
package/dist/bun/data.d.ts +174 -0
package/dist/bun/data.d.ts.map +1 -0
package/dist/bun/db.d.ts +65 -0
package/dist/bun/db.d.ts.map +1 -1
package/dist/bun/distillation.d.ts +49 -6
package/dist/bun/distillation.d.ts.map +1 -1
package/dist/bun/embedding-vendor.d.ts +66 -0
package/dist/bun/embedding-vendor.d.ts.map +1 -0
package/dist/bun/embedding-worker-types.d.ts +66 -0
package/dist/bun/embedding-worker-types.d.ts.map +1 -0
package/dist/bun/embedding-worker.d.ts +16 -0
package/dist/bun/embedding-worker.d.ts.map +1 -0
package/dist/bun/embedding-worker.js +100 -0
package/dist/bun/embedding-worker.js.map +7 -0
package/dist/bun/embedding.d.ts +91 -8
package/dist/bun/embedding.d.ts.map +1 -1
package/dist/bun/git.d.ts +47 -0
package/dist/bun/git.d.ts.map +1 -0
package/dist/bun/gradient.d.ts +19 -1
package/dist/bun/gradient.d.ts.map +1 -1
package/dist/bun/index.d.ts +9 -6
package/dist/bun/index.d.ts.map +1 -1
package/dist/bun/index.js +13029 -10885
package/dist/bun/index.js.map +4 -4
package/dist/bun/lat-reader.d.ts +1 -1
package/dist/bun/lat-reader.d.ts.map +1 -1
package/dist/bun/ltm.d.ts.map +1 -1
package/dist/bun/markdown.d.ts +11 -0
package/dist/bun/markdown.d.ts.map +1 -1
package/dist/bun/prompt.d.ts +1 -1
package/dist/bun/prompt.d.ts.map +1 -1
package/dist/bun/recall.d.ts +53 -0
package/dist/bun/recall.d.ts.map +1 -1
package/dist/bun/search.d.ts +29 -0
package/dist/bun/search.d.ts.map +1 -1
package/dist/bun/temporal.d.ts +2 -0
package/dist/bun/temporal.d.ts.map +1 -1
package/dist/bun/types.d.ts +15 -0
package/dist/bun/types.d.ts.map +1 -1
package/dist/bun/worker-model.d.ts +12 -9
package/dist/bun/worker-model.d.ts.map +1 -1
package/dist/node/agents-file.d.ts +13 -1
package/dist/node/agents-file.d.ts.map +1 -1
package/dist/node/config.d.ts +20 -1
package/dist/node/config.d.ts.map +1 -1
package/dist/node/data.d.ts +174 -0
package/dist/node/data.d.ts.map +1 -0
package/dist/node/db.d.ts +65 -0
package/dist/node/db.d.ts.map +1 -1
package/dist/node/distillation.d.ts +49 -6
package/dist/node/distillation.d.ts.map +1 -1
package/dist/node/embedding-vendor.d.ts +66 -0
package/dist/node/embedding-vendor.d.ts.map +1 -0
package/dist/node/embedding-worker-types.d.ts +66 -0
package/dist/node/embedding-worker-types.d.ts.map +1 -0
package/dist/node/embedding-worker.d.ts +16 -0
package/dist/node/embedding-worker.d.ts.map +1 -0
package/dist/node/embedding-worker.js +100 -0
package/dist/node/embedding-worker.js.map +7 -0
package/dist/node/embedding.d.ts +91 -8
package/dist/node/embedding.d.ts.map +1 -1
package/dist/node/git.d.ts +47 -0
package/dist/node/git.d.ts.map +1 -0
package/dist/node/gradient.d.ts +19 -1
package/dist/node/gradient.d.ts.map +1 -1
package/dist/node/index.d.ts +9 -6
package/dist/node/index.d.ts.map +1 -1
package/dist/node/index.js +13029 -10885
package/dist/node/index.js.map +4 -4
package/dist/node/lat-reader.d.ts +1 -1
package/dist/node/lat-reader.d.ts.map +1 -1
package/dist/node/ltm.d.ts.map +1 -1
package/dist/node/markdown.d.ts +11 -0
package/dist/node/markdown.d.ts.map +1 -1
package/dist/node/prompt.d.ts +1 -1
package/dist/node/prompt.d.ts.map +1 -1
package/dist/node/recall.d.ts +53 -0
package/dist/node/recall.d.ts.map +1 -1
package/dist/node/search.d.ts +29 -0
package/dist/node/search.d.ts.map +1 -1
package/dist/node/temporal.d.ts +2 -0
package/dist/node/temporal.d.ts.map +1 -1
package/dist/node/types.d.ts +15 -0
package/dist/node/types.d.ts.map +1 -1
package/dist/node/worker-model.d.ts +12 -9
package/dist/node/worker-model.d.ts.map +1 -1
package/dist/types/agents-file.d.ts +13 -1
package/dist/types/agents-file.d.ts.map +1 -1
package/dist/types/config.d.ts +20 -1
package/dist/types/config.d.ts.map +1 -1
package/dist/types/data.d.ts +174 -0
package/dist/types/data.d.ts.map +1 -0
package/dist/types/db.d.ts +65 -0
package/dist/types/db.d.ts.map +1 -1
package/dist/types/distillation.d.ts +49 -6
package/dist/types/distillation.d.ts.map +1 -1
package/dist/types/embedding-vendor.d.ts +66 -0
package/dist/types/embedding-vendor.d.ts.map +1 -0
package/dist/types/embedding-worker-types.d.ts +66 -0
package/dist/types/embedding-worker-types.d.ts.map +1 -0
package/dist/types/embedding-worker.d.ts +16 -0
package/dist/types/embedding-worker.d.ts.map +1 -0
package/dist/types/embedding.d.ts +91 -8
package/dist/types/embedding.d.ts.map +1 -1
package/dist/types/git.d.ts +47 -0
package/dist/types/git.d.ts.map +1 -0
package/dist/types/gradient.d.ts +19 -1
package/dist/types/gradient.d.ts.map +1 -1
package/dist/types/index.d.ts +9 -6
package/dist/types/index.d.ts.map +1 -1
package/dist/types/lat-reader.d.ts +1 -1
package/dist/types/lat-reader.d.ts.map +1 -1
package/dist/types/ltm.d.ts.map +1 -1
package/dist/types/markdown.d.ts +11 -0
package/dist/types/markdown.d.ts.map +1 -1
package/dist/types/prompt.d.ts +1 -1
package/dist/types/prompt.d.ts.map +1 -1
package/dist/types/recall.d.ts +53 -0
package/dist/types/recall.d.ts.map +1 -1
package/dist/types/search.d.ts +29 -0
package/dist/types/search.d.ts.map +1 -1
package/dist/types/temporal.d.ts +2 -0
package/dist/types/temporal.d.ts.map +1 -1
package/dist/types/types.d.ts +15 -0
package/dist/types/types.d.ts.map +1 -1
package/dist/types/worker-model.d.ts +12 -9
package/dist/types/worker-model.d.ts.map +1 -1
package/package.json +5 -2
package/src/agents-file.ts +87 -4
package/src/config.ts +68 -5
package/src/curator.ts +2 -2
package/src/data.ts +768 -0
package/src/db.ts +386 -7
package/src/distillation.ts +178 -35
package/src/embedding-vendor.ts +102 -0
package/src/embedding-worker-types.ts +82 -0
package/src/embedding-worker.ts +185 -0
package/src/embedding.ts +607 -61
package/src/git.ts +144 -0
package/src/gradient.ts +174 -17
package/src/index.ts +20 -0
package/src/lat-reader.ts +5 -11
package/src/ltm.ts +17 -44
package/src/markdown.ts +15 -0
package/src/prompt.ts +1 -2
package/src/recall.ts +401 -70
package/src/search.ts +71 -1
package/src/temporal.ts +42 -35
package/src/types.ts +15 -0
package/src/worker-model.ts +14 -9

package/src/distillation.ts CHANGED Viewed

@@ -12,7 +12,7 @@ import {
   RECURSIVE_SYSTEM,
   recursiveUser,
 } from "./prompt";
-import { needsUrgentDistillation, toolStripAnnotation } from "./gradient";
+import { toolStripAnnotation } from "./gradient";
 import { workerSessionIDs } from "./worker";
 import type { LLMClient } from "./types";
@@ -40,29 +40,91 @@ export function compressionRatio(
   return distilledTokens / Math.sqrt(sourceTokens);
 }
+/**
+ * Maximum allowed expansion for distillation output.
+ *
+ * Tiny segments can't meaningfully compress — distillation adds metadata
+ * (timestamps, importance markers, cross-references) that necessarily
+ * exceeds the source. Allow generous expansion for small segments while
+ * still enforcing compression on large ones.
+ *
+ * @returns Maximum allowed distilled tokens for a given source token count.
+ */
+export function maxAllowedExpansion(sourceTokens: number): number {
+  if (sourceTokens < 100) return sourceTokens * 5; // tiny: 8→40 is fine
+  if (sourceTokens < 500) return sourceTokens * 2; // small: 2x headroom
+  return sourceTokens; // large: must compress
+}
 /**
  * Segment detection: group related messages into distillation-sized chunks.
  *
- * When the message count exceeds `maxSegment`, prefers splitting at the
+ * When the total token count exceeds `maxTokens`, prefers splitting at the
  * largest inter-message time gap (if it's ≥ 3× the median gap) to respect
- * natural conversation boundaries. Falls back to count-based splitting at
- * `maxSegment` when timestamps are uniform.
+ * natural conversation boundaries. Falls back to token-boundary splitting
+ * when timestamps are uniform.
  *
- * Trailing segments with < 3 messages are merged into the previous segment
- * to avoid tiny distillation inputs with too little context.
+ * Trailing segments whose token sum is below {@link MIN_SEGMENT_TOKENS}
+ * are merged into the previous segment to avoid tiny distillation inputs
+ * with too little context.
  *
  * Exported for testing; `run()` is the production caller.
  */
 export function detectSegments(
   messages: TemporalMessage[],
-  maxSegment: number,
+  maxTokens: number,
 ): TemporalMessage[][] {
-  if (messages.length <= maxSegment) return [messages];
-  return splitSegments(messages, maxSegment);
+  const totalTokens = messages.reduce((s, m) => s + m.tokens, 0);
+  if (totalTokens <= maxTokens) return [messages];
+  return splitSegments(messages, maxTokens);
+}
+/**
+ * Compute the max_tokens budget for a worker LLM call.
+ *
+ * @param inputTokens  Estimated source token count
+ * @param ratio        Compression ratio (0.0–1.0) — output ≈ ratio × input
+ * @param floor        Minimum output tokens
+ * @param cap          Maximum output tokens
+ */
+export function workerTokenBudget(
+  inputTokens: number,
+  ratio: number,
+  floor: number,
+  cap: number,
+): number {
+  return Math.max(floor, Math.min(Math.ceil(inputTokens * ratio), cap));
+}
+/**
+ * Compute the max_tokens budget for gen-0 distillation of raw messages.
+ *
+ * Uses a √N-based formula (8 × √N) instead of a linear ratio so that the
+ * budget grows sub-linearly with input size. This naturally constrains the
+ * LLM to produce output at ~R ≈ 2–4 (the square-root boundary) and avoids
+ * expansion on small segments where a linear 0.25 ratio + 1024 floor gave
+ * the model far too much room.
+ *
+ * The multiplier (8) gives ~4× headroom above the R=2.0 target, accounting
+ * for the detailed observation format (emoji markers, timestamps, entity
+ * tags, exact numbers) required by the distillation prompt.
+ *
+ * @param sourceTokens  Estimated source token count from raw messages
+ * @returns             Token budget clamped to [256, 4096]
+ */
+export function distillTokenBudget(sourceTokens: number): number {
+  const MULTIPLIER = 8;
+  const FLOOR = 256;
+  const CAP = 4096;
+  return Math.max(FLOOR, Math.min(Math.ceil(MULTIPLIER * Math.sqrt(sourceTokens)), CAP));
 }
-/** Minimum segment size — segments smaller than this get merged. */
-const MIN_SEGMENT = 3;
+/**
+ * Minimum segment token count — trailing segments smaller than this get
+ * merged into the previous segment during splitting to avoid producing
+ * segments too small to compress meaningfully.
+ */
+const MIN_SEGMENT_TOKENS = 64;
 /**
  * Multiplier for the median gap threshold: a time gap must be at least
@@ -70,26 +132,35 @@ const MIN_SEGMENT = 3;
  */
 const GAP_THRESHOLD_MULTIPLIER = 3;
+/** Sum tokens for a slice of messages. */
+function sliceTokens(messages: TemporalMessage[], start: number, end: number): number {
+  let sum = 0;
+  for (let i = start; i < end; i++) sum += messages[i].tokens;
+  return sum;
+}
 function splitSegments(
   messages: TemporalMessage[],
-  maxSegment: number,
+  maxTokens: number,
 ): TemporalMessage[][] {
-  if (messages.length <= maxSegment) return [messages];
+  const totalTokens = messages.reduce((s, m) => s + m.tokens, 0);
+  if (totalTokens <= maxTokens) return [messages];
   // Find the split point: prefer the largest time gap if it's significant
-  const splitIdx = findSplitIndex(messages, maxSegment);
+  const splitIdx = findSplitIndex(messages, maxTokens);
   const left = messages.slice(0, splitIdx);
   const right = messages.slice(splitIdx);
   // Recurse on both halves
-  const result = splitSegments(left, maxSegment);
+  const result = splitSegments(left, maxTokens);
-  if (right.length < MIN_SEGMENT) {
+  const rightTokens = right.reduce((s, m) => s + m.tokens, 0);
+  if (rightTokens < MIN_SEGMENT_TOKENS) {
     // Merge tiny trailing segment into the last segment
     result[result.length - 1].push(...right);
   } else {
-    result.push(...splitSegments(right, maxSegment));
+    result.push(...splitSegments(right, maxTokens));
   }
   return result;
@@ -99,12 +170,13 @@ function splitSegments(
  * Choose where to split an oversized message array.
  *
  * If there's a time gap ≥ 3× the median gap AND it falls within a range
- * that would produce segments of at least MIN_SEGMENT size, use it.
- * Otherwise fall back to the count-based boundary at `maxSegment`.
+ * that would produce segments of at least MIN_SEGMENT_TOKENS on each side,
+ * use it. Otherwise fall back to the token-boundary split point (the index
+ * where cumulative tokens first exceed `maxTokens`).
  */
 function findSplitIndex(
   messages: TemporalMessage[],
-  maxSegment: number,
+  maxTokens: number,
 ): number {
   // Compute consecutive time gaps
   const gaps: Array<{ index: number; gap: number }> = [];
@@ -115,19 +187,35 @@ function findSplitIndex(
     });
   }
-  if (gaps.length === 0) return maxSegment;
+  // Compute the token-boundary fallback: first index where cumulative tokens exceed maxTokens
+  let cumulative = 0;
+  let tokenBoundary = messages.length; // fallback if all messages fit (shouldn't happen)
+  for (let i = 0; i < messages.length; i++) {
+    cumulative += messages[i].tokens;
+    if (cumulative > maxTokens) {
+      // Split so left half has indices [0, i), right half starts at i.
+      // Ensure at least 1 message on each side.
+      tokenBoundary = Math.max(1, i);
+      break;
+    }
+  }
+  if (gaps.length === 0) return tokenBoundary;
   // Find median gap
   const sortedGaps = gaps.map((g) => g.gap).sort((a, b) => a - b);
   const medianGap = sortedGaps[Math.floor(sortedGaps.length / 2)];
-  // Find the largest gap that would produce viable segments (≥ MIN_SEGMENT on each side)
+  // Find the largest gap that would produce viable segments
+  // (≥ MIN_SEGMENT_TOKENS on each side)
   let bestGap = { index: -1, gap: 0 };
   for (const g of gaps) {
+    const leftTokens = sliceTokens(messages, 0, g.index);
+    const rightTokens = sliceTokens(messages, g.index, messages.length);
     if (
       g.gap > bestGap.gap &&
-      g.index >= MIN_SEGMENT &&
-      messages.length - g.index >= MIN_SEGMENT
+      leftTokens >= MIN_SEGMENT_TOKENS &&
+      rightTokens >= MIN_SEGMENT_TOKENS
     ) {
       bestGap = g;
     }
@@ -138,8 +226,8 @@ function findSplitIndex(
     return bestGap.index;
   }
-  // Fall back to count-based splitting
-  return maxSegment;
+  // Fall back to token-boundary splitting
+  return tokenBoundary;
 }
 function formatTime(ms: number): string {
@@ -387,6 +475,7 @@ function storeDistillation(input: {
   generation: number;
   rCompression?: number;
   cNorm?: number;
+  callType?: "batch" | "direct";
 }): string {
   const pid = ensureProject(input.projectPath);
   const id = crypto.randomUUID();
@@ -394,8 +483,8 @@ function storeDistillation(input: {
   const tokens = Math.ceil(input.observations.length / 3);
   db()
     .query(
-      `INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at, r_compression, c_norm)
-       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+      `INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at, r_compression, c_norm, call_type)
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
     )
     .run(
       id,
@@ -410,13 +499,14 @@ function storeDistillation(input: {
       Date.now(),
       input.rCompression ?? null,
       input.cNorm ?? null,
+      input.callType ?? null,
     );
   return id;
 }
 // Count non-archived gen-0 distillations — these are the ones awaiting
 // meta-distillation. Archived gen-0 entries have already been consolidated.
-function gen0Count(projectPath: string, sessionID: string): number {
+export function gen0Count(projectPath: string, sessionID: string): number {
   const pid = ensureProject(projectPath);
   return (
     db()
@@ -539,6 +629,9 @@ export async function run(input: {
    *  where the caller is blocking on the result. Background/idle distillation
    *  should leave this false to benefit from batch API 50% cost savings. */
   urgent?: boolean;
+  /** Whether the LLM call will use batch or direct pricing. Recorded on the
+   *  distillation row for accurate historical cost estimates. */
+  callType?: "batch" | "direct";
 }): Promise<{ rounds: number; distilled: number }> {
   // Reset orphaned messages (marked distilled by a deleted/migrated distillation)
   const orphans = resetOrphans(input.projectPath, input.sessionID);
@@ -564,8 +657,22 @@ export async function run(input: {
       break;
     if (pending.length > 0) {
-      const segments = detectSegments(pending, cfg.distillation.maxSegment);
+      const segments = detectSegments(pending, cfg.distillation.maxSegmentTokens);
       for (const segment of segments) {
+        const segTokens = segment.reduce((s, m) => s + m.tokens, 0);
+        if (segTokens < cfg.distillation.minSegmentTokens) {
+          if (input.force) {
+            // Absorb: mark distilled without LLM call to avoid blocking
+            // the caller on useless work. Messages remain searchable via
+            // BM25/vector recall on the temporal table.
+            temporal.markDistilled(segment.map((m) => m.id));
+            log.info(
+              `absorb tiny segment: ${segment.length} msgs, ${segTokens} tokens (below min ${cfg.distillation.minSegmentTokens})`,
+            );
+          }
+          // else: leave undistilled to accumulate with future messages
+          continue;
+        }
         const result = await distillSegment({
           llm: input.llm,
           projectPath: input.projectPath,
@@ -573,6 +680,7 @@ export async function run(input: {
           messages: segment,
           model: input.model,
           urgent: input.urgent,
+          callType: input.callType,
         });
         if (result) {
           distilled += segment.length;
@@ -595,12 +703,15 @@ export async function run(input: {
         sessionID: input.sessionID,
         model: input.model,
         urgent: input.urgent,
+        callType: input.callType,
       });
       rounds++;
     }
-    // Check if we still need urgent distillation
-    if (!needsUrgentDistillation()) break;
+    // Continue looping only when explicitly forced (urgent/overflow recovery).
+    // Previously re-polled needsUrgentDistillation() here, but that consumed
+    // the per-session flag and raced with the caller that already checked it.
+    if (!input.force) break;
   }
   return { rounds, distilled };
@@ -613,6 +724,7 @@ async function distillSegment(input: {
   messages: TemporalMessage[];
   model?: { providerID: string; modelID: string };
   urgent?: boolean;
+  callType?: "batch" | "direct";
 }): Promise<DistillationResult | null> {
   const prior = latestObservations(input.projectPath, input.sessionID);
   const text = messagesToText(input.messages);
@@ -632,10 +744,12 @@ async function distillSegment(input: {
   });
   const model = input.model ?? config().model;
+  const sourceTokens = input.messages.reduce((sum, m) => sum + m.tokens, 0);
+  const maxTokens = distillTokenBudget(sourceTokens);
   const responseText = await input.llm.prompt(
     DISTILLATION_SYSTEM,
     userContent,
-    { model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID },
+    { model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID, maxTokens },
   );
   if (!responseText) return null;
@@ -644,10 +758,24 @@ async function distillSegment(input: {
   // Compute context health metrics before storing.
   const distilledTokens = Math.ceil(result.observations.length / 3);
-  const sourceTokens = input.messages.reduce((sum, m) => sum + m.tokens, 0);
   const rComp = compressionRatio(distilledTokens, sourceTokens);
   const cNorm = temporal.temporalCnorm(input.messages.map((m) => m.created_at));
+  // Expansion guard: discard distillation output that exceeds the allowed
+  // expansion limit. Tiny segments (< 100 tokens) get generous headroom
+  // because distillation necessarily adds metadata; large segments must
+  // actually compress. Still marks source messages as distilled to prevent
+  // infinite retry loops — they remain searchable via BM25/vector recall.
+  const expansionLimit = maxAllowedExpansion(sourceTokens);
+  if (distilledTokens > expansionLimit) {
+    temporal.markDistilled(input.messages.map((m) => m.id));
+    log.warn(
+      `distill expansion discarded: ${input.messages.length} msgs, ` +
+        `${sourceTokens}→${distilledTokens} tokens (exceeds ${expansionLimit} limit)`,
+    );
+    return null;
+  }
   const distillId = storeDistillation({
     projectPath: input.projectPath,
     sessionID: input.sessionID,
@@ -656,6 +784,7 @@ async function distillSegment(input: {
     generation: 0,
     rCompression: rComp,
     cNorm,
+    callType: input.callType,
   });
   temporal.markDistilled(input.messages.map((m) => m.id));
@@ -665,6 +794,16 @@ async function distillSegment(input: {
       `R=${rComp.toFixed(2)}, C_norm=${cNorm.toFixed(3)}`,
   );
+  // Soft quality warning: R < 1.0 means the distillation is below the √N
+  // boundary, suggesting potentially lossy compression. Stored for
+  // monitoring — not a hard gate.
+  if (rComp < 1.0) {
+    log.warn(
+      `distill quality low: R=${rComp.toFixed(2)} (<1.0) on ${input.messages.length} msgs, ` +
+        `${sourceTokens}→${distilledTokens} tokens — may have lost detail`,
+    );
+  }
   // Fire-and-forget: embed the distillation for vector search
   if (embedding.isAvailable()) {
     embedding.embedDistillation(distillId, result.observations);
@@ -705,6 +844,7 @@ export async function metaDistill(input: {
   sessionID: string;
   model?: { providerID: string; modelID: string };
   urgent?: boolean;
+  callType?: "batch" | "direct";
 }): Promise<DistillationResult | null> {
   const existing = loadGen0(input.projectPath, input.sessionID);
@@ -729,10 +869,12 @@ export async function metaDistill(input: {
   const userContent = recursiveUser(existing, priorMeta?.observations);
   const model = input.model ?? config().model;
+  const inputTokens = Math.ceil(userContent.length / 3);
+  const maxTokens = workerTokenBudget(inputTokens, 0.25, 1024, 8192);
   const responseText = await input.llm.prompt(
     RECURSIVE_SYSTEM,
     userContent,
-    { model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID },
+    { model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID, maxTokens },
   );
   if (!responseText) return null;
@@ -767,6 +909,7 @@ export async function metaDistill(input: {
       observations: result.observations,
       sourceIDs: allSourceIDs,
       generation: maxGen + 1,
+      callType: input.callType,
     });
     // Archive the gen-0 distillations that were merged into gen-1+.
     // They remain searchable via BM25 recall but are excluded from the

package/src/embedding-vendor.ts ADDED Viewed

@@ -0,0 +1,102 @@
+/**
+ * Vendored bge-small registration for the standalone Lore binary.
+ *
+ * The Bun-compiled `lore` binary uses `bun build --compile` to bundle
+ * `fastembed` + `onnxruntime-node` + `@anush008/tokenizers-<platform>`
+ * directly into the executable — including the platform-specific
+ * `.node` addons which Bun embeds and dlopens from `$bunfs` at runtime.
+ *
+ * Two pieces don't fit into Bun's automatic bundling and need our help:
+ *
+ *  1. **Side-load shared libraries**. `onnxruntime_binding.node` does a
+ *     runtime `dlopen("libonnxruntime.so.1")` (or the .dylib / .dll
+ *     equivalent) for the actual ONNX Runtime computation library. Bun
+ *     doesn't follow this kind of dependency. The binary's wrapper
+ *     pre-loads these libs via `bun:ffi` *before* fastembed evaluates,
+ *     so when the addon's dlopen fires it finds the cached handle.
+ *
+ *  2. **Model weights + tokenizer**. fastembed downloads from the HF
+ *     Hub on first use; we want zero network on first run. The wrapper
+ *     embeds the bge-small INT8 files as Bun assets, writes them to a
+ *     real disk dir on first run, and sets `globalThis.__LORE_VENDOR_MODEL__`
+ *     to that path. This module exposes that registration to the
+ *     LocalProvider so it can hand the path to fastembed's CUSTOM-mode
+ *     init (`modelAbsoluteDirPath` + `modelName`).
+ *
+ * In npm-mode usage from `@loreai/opencode` / `@loreai/pi` the global
+ * is unset and `vendorModelInfo()` returns `null`, so the LocalProvider
+ * falls through to fastembed's default Qdrant repo + cache.
+ */
+// ---------------------------------------------------------------------------
+// Vendor registration (set by the binary wrapper, read here)
+// ---------------------------------------------------------------------------
+/** What the binary wrapper writes to globalThis after extracting model files. */
+export interface VendorRegistration {
+  /** Absolute path to the dir containing the bge-small files
+   *  (config.json, tokenizer.json, model_quantized.onnx, …). Pass to
+   *  fastembed as `modelAbsoluteDirPath` in CUSTOM init. */
+  modelAbsoluteDirPath: string;
+  /** Filename of the ONNX weights inside that dir. Pass to fastembed
+   *  as `modelName` in CUSTOM init. */
+  modelName: string;
+  /** Target identifier the binary was built for, e.g. "linux-x64".
+   *  Diagnostic only — the runtime doesn't branch on it. */
+  target: string;
+  /** Lore CLI version that produced the binary. Diagnostic only. */
+  version: string;
+}
+const REGISTRATION_KEY = "__LORE_VENDOR_MODEL__";
+/** Read the vendor registration written by the binary wrapper, if any. */
+function getRegistration(): VendorRegistration | null {
+  const g = globalThis as unknown as Record<string, VendorRegistration | undefined>;
+  return g[REGISTRATION_KEY] ?? null;
+}
+/** Test-only: programmatically set/clear the registration to exercise
+ *  both binary-mode and npm-mode code paths without spinning up a real
+ *  compiled binary. */
+export function _setVendorRegistration(reg: VendorRegistration | null): void {
+  const g = globalThis as unknown as Record<string, VendorRegistration | undefined>;
+  if (reg) g[REGISTRATION_KEY] = reg;
+  else delete g[REGISTRATION_KEY];
+}
+// ---------------------------------------------------------------------------
+// Public entry
+// ---------------------------------------------------------------------------
+/** Subset of the registration fastembed needs. Stripped of the
+ *  diagnostic fields so the LocalProvider has exactly what it should
+ *  hand to `FlagEmbedding.init`. */
+export interface VendorModelInfo {
+  modelAbsoluteDirPath: string;
+  modelName: string;
+}
+/**
+ * Resolve the bundled-model arguments for fastembed CUSTOM init. Returns
+ * `null` when no vendor is registered (npm-mode), so the caller can fall
+ * through to fastembed's default cacheDir/HF Hub flow.
+ */
+export function vendorModelInfo(): VendorModelInfo | null {
+  const reg = getRegistration();
+  if (!reg) return null;
+  return {
+    modelAbsoluteDirPath: reg.modelAbsoluteDirPath,
+    modelName: reg.modelName,
+  };
+}
+/** True iff this process is running inside a vendored Lore binary. */
+export function isVendoredBinary(): boolean {
+  return getRegistration() !== null;
+}
+/** The full registration, for diagnostics (`lore --print-vendor-info`). */
+export function vendorRegistration(): VendorRegistration | null {
+  return getRegistration();
+}

package/src/embedding-worker-types.ts ADDED Viewed

@@ -0,0 +1,82 @@
+/**
+ * Shared message types for the embedding worker thread.
+ *
+ * The embedding worker (`embedding-worker.ts`) runs fastembed/ONNX inference
+ * in a separate `node:worker_threads` Worker so the main thread's event loop
+ * stays free during inference. This file defines the message protocol between
+ * the main thread (`LocalProvider` in `embedding.ts`) and the worker.
+ *
+ * Imported by both sides — keep this file free of runtime dependencies.
+ */
+// ---------------------------------------------------------------------------
+// Main thread → Worker
+// ---------------------------------------------------------------------------
+/** Request an embedding batch. */
+export interface EmbedRequest {
+  type: "embed";
+  /** Monotonic request ID for correlating responses. */
+  id: number;
+  /** Texts to embed. */
+  texts: string[];
+  /** "document" for storage, "query" for search. */
+  inputType: "document" | "query";
+  /** "high" = recall queries (jump the queue), "normal" = backfill. */
+  priority: "high" | "normal";
+}
+/** Ask the worker to exit cleanly. */
+export interface ShutdownRequest {
+  type: "shutdown";
+}
+export type WorkerInbound = EmbedRequest | ShutdownRequest;
+// ---------------------------------------------------------------------------
+// Worker → Main thread
+// ---------------------------------------------------------------------------
+/** Embedding result — vectors are Float32Array[], sent via structured clone. */
+export interface EmbedResult {
+  type: "result";
+  /** Matches the request ID. */
+  id: number;
+  /** One Float32Array per input text. Sent via structured clone
+   *  (Bun preserves Float32Array identity across threads). */
+  vectors: Float32Array[];
+}
+/** A single embed request failed (ONNX error, etc.). */
+export interface EmbedError {
+  type: "error";
+  /** Matches the request ID. */
+  id: number;
+  /** Human-readable error message. */
+  error: string;
+}
+/** Model initialization failed inside the worker. All pending and future
+ *  requests should be rejected — the worker is unusable. */
+export interface InitError {
+  type: "init-error";
+  /** Human-readable error message. */
+  error: string;
+}
+export type WorkerOutbound = EmbedResult | EmbedError | InitError;
+// ---------------------------------------------------------------------------
+// workerData contract
+// ---------------------------------------------------------------------------
+/** Passed to the worker via `workerData` at construction time. */
+export interface WorkerInitData {
+  /** fastembed model name, e.g. "BGESmallENV15". */
+  modelName: string;
+  /** Vendored model info for binary mode, or null for npm mode.
+   *  Mirrors the `globalThis.__LORE_VENDOR_MODEL__` registration which
+   *  only exists on the main thread — passed explicitly so the worker
+   *  can hand it to `FlagEmbedding.init()`. */
+  vendorModel: { modelAbsoluteDirPath: string; modelName: string } | null;
+}