@loreai/core 0.16.0 → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/README.md +11 -0
  2. package/dist/bun/agents-file.d.ts +13 -1
  3. package/dist/bun/agents-file.d.ts.map +1 -1
  4. package/dist/bun/config.d.ts +20 -1
  5. package/dist/bun/config.d.ts.map +1 -1
  6. package/dist/bun/data.d.ts +174 -0
  7. package/dist/bun/data.d.ts.map +1 -0
  8. package/dist/bun/db.d.ts +65 -0
  9. package/dist/bun/db.d.ts.map +1 -1
  10. package/dist/bun/distillation.d.ts +49 -6
  11. package/dist/bun/distillation.d.ts.map +1 -1
  12. package/dist/bun/embedding-vendor.d.ts +66 -0
  13. package/dist/bun/embedding-vendor.d.ts.map +1 -0
  14. package/dist/bun/embedding-worker-types.d.ts +66 -0
  15. package/dist/bun/embedding-worker-types.d.ts.map +1 -0
  16. package/dist/bun/embedding-worker.d.ts +16 -0
  17. package/dist/bun/embedding-worker.d.ts.map +1 -0
  18. package/dist/bun/embedding-worker.js +100 -0
  19. package/dist/bun/embedding-worker.js.map +7 -0
  20. package/dist/bun/embedding.d.ts +91 -8
  21. package/dist/bun/embedding.d.ts.map +1 -1
  22. package/dist/bun/git.d.ts +47 -0
  23. package/dist/bun/git.d.ts.map +1 -0
  24. package/dist/bun/gradient.d.ts +19 -1
  25. package/dist/bun/gradient.d.ts.map +1 -1
  26. package/dist/bun/index.d.ts +9 -6
  27. package/dist/bun/index.d.ts.map +1 -1
  28. package/dist/bun/index.js +13029 -10885
  29. package/dist/bun/index.js.map +4 -4
  30. package/dist/bun/lat-reader.d.ts +1 -1
  31. package/dist/bun/lat-reader.d.ts.map +1 -1
  32. package/dist/bun/ltm.d.ts.map +1 -1
  33. package/dist/bun/markdown.d.ts +11 -0
  34. package/dist/bun/markdown.d.ts.map +1 -1
  35. package/dist/bun/prompt.d.ts +1 -1
  36. package/dist/bun/prompt.d.ts.map +1 -1
  37. package/dist/bun/recall.d.ts +53 -0
  38. package/dist/bun/recall.d.ts.map +1 -1
  39. package/dist/bun/search.d.ts +29 -0
  40. package/dist/bun/search.d.ts.map +1 -1
  41. package/dist/bun/temporal.d.ts +2 -0
  42. package/dist/bun/temporal.d.ts.map +1 -1
  43. package/dist/bun/types.d.ts +15 -0
  44. package/dist/bun/types.d.ts.map +1 -1
  45. package/dist/bun/worker-model.d.ts +12 -9
  46. package/dist/bun/worker-model.d.ts.map +1 -1
  47. package/dist/node/agents-file.d.ts +13 -1
  48. package/dist/node/agents-file.d.ts.map +1 -1
  49. package/dist/node/config.d.ts +20 -1
  50. package/dist/node/config.d.ts.map +1 -1
  51. package/dist/node/data.d.ts +174 -0
  52. package/dist/node/data.d.ts.map +1 -0
  53. package/dist/node/db.d.ts +65 -0
  54. package/dist/node/db.d.ts.map +1 -1
  55. package/dist/node/distillation.d.ts +49 -6
  56. package/dist/node/distillation.d.ts.map +1 -1
  57. package/dist/node/embedding-vendor.d.ts +66 -0
  58. package/dist/node/embedding-vendor.d.ts.map +1 -0
  59. package/dist/node/embedding-worker-types.d.ts +66 -0
  60. package/dist/node/embedding-worker-types.d.ts.map +1 -0
  61. package/dist/node/embedding-worker.d.ts +16 -0
  62. package/dist/node/embedding-worker.d.ts.map +1 -0
  63. package/dist/node/embedding-worker.js +100 -0
  64. package/dist/node/embedding-worker.js.map +7 -0
  65. package/dist/node/embedding.d.ts +91 -8
  66. package/dist/node/embedding.d.ts.map +1 -1
  67. package/dist/node/git.d.ts +47 -0
  68. package/dist/node/git.d.ts.map +1 -0
  69. package/dist/node/gradient.d.ts +19 -1
  70. package/dist/node/gradient.d.ts.map +1 -1
  71. package/dist/node/index.d.ts +9 -6
  72. package/dist/node/index.d.ts.map +1 -1
  73. package/dist/node/index.js +13029 -10885
  74. package/dist/node/index.js.map +4 -4
  75. package/dist/node/lat-reader.d.ts +1 -1
  76. package/dist/node/lat-reader.d.ts.map +1 -1
  77. package/dist/node/ltm.d.ts.map +1 -1
  78. package/dist/node/markdown.d.ts +11 -0
  79. package/dist/node/markdown.d.ts.map +1 -1
  80. package/dist/node/prompt.d.ts +1 -1
  81. package/dist/node/prompt.d.ts.map +1 -1
  82. package/dist/node/recall.d.ts +53 -0
  83. package/dist/node/recall.d.ts.map +1 -1
  84. package/dist/node/search.d.ts +29 -0
  85. package/dist/node/search.d.ts.map +1 -1
  86. package/dist/node/temporal.d.ts +2 -0
  87. package/dist/node/temporal.d.ts.map +1 -1
  88. package/dist/node/types.d.ts +15 -0
  89. package/dist/node/types.d.ts.map +1 -1
  90. package/dist/node/worker-model.d.ts +12 -9
  91. package/dist/node/worker-model.d.ts.map +1 -1
  92. package/dist/types/agents-file.d.ts +13 -1
  93. package/dist/types/agents-file.d.ts.map +1 -1
  94. package/dist/types/config.d.ts +20 -1
  95. package/dist/types/config.d.ts.map +1 -1
  96. package/dist/types/data.d.ts +174 -0
  97. package/dist/types/data.d.ts.map +1 -0
  98. package/dist/types/db.d.ts +65 -0
  99. package/dist/types/db.d.ts.map +1 -1
  100. package/dist/types/distillation.d.ts +49 -6
  101. package/dist/types/distillation.d.ts.map +1 -1
  102. package/dist/types/embedding-vendor.d.ts +66 -0
  103. package/dist/types/embedding-vendor.d.ts.map +1 -0
  104. package/dist/types/embedding-worker-types.d.ts +66 -0
  105. package/dist/types/embedding-worker-types.d.ts.map +1 -0
  106. package/dist/types/embedding-worker.d.ts +16 -0
  107. package/dist/types/embedding-worker.d.ts.map +1 -0
  108. package/dist/types/embedding.d.ts +91 -8
  109. package/dist/types/embedding.d.ts.map +1 -1
  110. package/dist/types/git.d.ts +47 -0
  111. package/dist/types/git.d.ts.map +1 -0
  112. package/dist/types/gradient.d.ts +19 -1
  113. package/dist/types/gradient.d.ts.map +1 -1
  114. package/dist/types/index.d.ts +9 -6
  115. package/dist/types/index.d.ts.map +1 -1
  116. package/dist/types/lat-reader.d.ts +1 -1
  117. package/dist/types/lat-reader.d.ts.map +1 -1
  118. package/dist/types/ltm.d.ts.map +1 -1
  119. package/dist/types/markdown.d.ts +11 -0
  120. package/dist/types/markdown.d.ts.map +1 -1
  121. package/dist/types/prompt.d.ts +1 -1
  122. package/dist/types/prompt.d.ts.map +1 -1
  123. package/dist/types/recall.d.ts +53 -0
  124. package/dist/types/recall.d.ts.map +1 -1
  125. package/dist/types/search.d.ts +29 -0
  126. package/dist/types/search.d.ts.map +1 -1
  127. package/dist/types/temporal.d.ts +2 -0
  128. package/dist/types/temporal.d.ts.map +1 -1
  129. package/dist/types/types.d.ts +15 -0
  130. package/dist/types/types.d.ts.map +1 -1
  131. package/dist/types/worker-model.d.ts +12 -9
  132. package/dist/types/worker-model.d.ts.map +1 -1
  133. package/package.json +5 -2
  134. package/src/agents-file.ts +87 -4
  135. package/src/config.ts +68 -5
  136. package/src/curator.ts +2 -2
  137. package/src/data.ts +768 -0
  138. package/src/db.ts +386 -7
  139. package/src/distillation.ts +178 -35
  140. package/src/embedding-vendor.ts +102 -0
  141. package/src/embedding-worker-types.ts +82 -0
  142. package/src/embedding-worker.ts +185 -0
  143. package/src/embedding.ts +607 -61
  144. package/src/git.ts +144 -0
  145. package/src/gradient.ts +174 -17
  146. package/src/index.ts +20 -0
  147. package/src/lat-reader.ts +5 -11
  148. package/src/ltm.ts +17 -44
  149. package/src/markdown.ts +15 -0
  150. package/src/prompt.ts +1 -2
  151. package/src/recall.ts +401 -70
  152. package/src/search.ts +71 -1
  153. package/src/temporal.ts +42 -35
  154. package/src/types.ts +15 -0
  155. package/src/worker-model.ts +14 -9
@@ -12,7 +12,7 @@ import {
12
12
  RECURSIVE_SYSTEM,
13
13
  recursiveUser,
14
14
  } from "./prompt";
15
- import { needsUrgentDistillation, toolStripAnnotation } from "./gradient";
15
+ import { toolStripAnnotation } from "./gradient";
16
16
  import { workerSessionIDs } from "./worker";
17
17
  import type { LLMClient } from "./types";
18
18
 
@@ -40,29 +40,91 @@ export function compressionRatio(
40
40
  return distilledTokens / Math.sqrt(sourceTokens);
41
41
  }
42
42
 
43
+ /**
44
+ * Maximum allowed expansion for distillation output.
45
+ *
46
+ * Tiny segments can't meaningfully compress — distillation adds metadata
47
+ * (timestamps, importance markers, cross-references) that necessarily
48
+ * exceeds the source. Allow generous expansion for small segments while
49
+ * still enforcing compression on large ones.
50
+ *
51
+ * @returns Maximum allowed distilled tokens for a given source token count.
52
+ */
53
+ export function maxAllowedExpansion(sourceTokens: number): number {
54
+ if (sourceTokens < 100) return sourceTokens * 5; // tiny: 8→40 is fine
55
+ if (sourceTokens < 500) return sourceTokens * 2; // small: 2x headroom
56
+ return sourceTokens; // large: must compress
57
+ }
58
+
43
59
  /**
44
60
  * Segment detection: group related messages into distillation-sized chunks.
45
61
  *
46
- * When the message count exceeds `maxSegment`, prefers splitting at the
62
+ * When the total token count exceeds `maxTokens`, prefers splitting at the
47
63
  * largest inter-message time gap (if it's ≥ 3× the median gap) to respect
48
- * natural conversation boundaries. Falls back to count-based splitting at
49
- * `maxSegment` when timestamps are uniform.
64
+ * natural conversation boundaries. Falls back to token-boundary splitting
65
+ * when timestamps are uniform.
50
66
  *
51
- * Trailing segments with < 3 messages are merged into the previous segment
52
- * to avoid tiny distillation inputs with too little context.
67
+ * Trailing segments whose token sum is below {@link MIN_SEGMENT_TOKENS}
68
+ * are merged into the previous segment to avoid tiny distillation inputs
69
+ * with too little context.
53
70
  *
54
71
  * Exported for testing; `run()` is the production caller.
55
72
  */
56
73
  export function detectSegments(
57
74
  messages: TemporalMessage[],
58
- maxSegment: number,
75
+ maxTokens: number,
59
76
  ): TemporalMessage[][] {
60
- if (messages.length <= maxSegment) return [messages];
61
- return splitSegments(messages, maxSegment);
77
+ const totalTokens = messages.reduce((s, m) => s + m.tokens, 0);
78
+ if (totalTokens <= maxTokens) return [messages];
79
+ return splitSegments(messages, maxTokens);
80
+ }
81
+
82
+ /**
83
+ * Compute the max_tokens budget for a worker LLM call.
84
+ *
85
+ * @param inputTokens Estimated source token count
86
+ * @param ratio Compression ratio (0.0–1.0) — output ≈ ratio × input
87
+ * @param floor Minimum output tokens
88
+ * @param cap Maximum output tokens
89
+ */
90
+ export function workerTokenBudget(
91
+ inputTokens: number,
92
+ ratio: number,
93
+ floor: number,
94
+ cap: number,
95
+ ): number {
96
+ return Math.max(floor, Math.min(Math.ceil(inputTokens * ratio), cap));
97
+ }
98
+
99
+ /**
100
+ * Compute the max_tokens budget for gen-0 distillation of raw messages.
101
+ *
102
+ * Uses a √N-based formula (8 × √N) instead of a linear ratio so that the
103
+ * budget grows sub-linearly with input size. This naturally constrains the
104
+ * LLM to produce output at ~R ≈ 2–4 (the square-root boundary) and avoids
105
+ * expansion on small segments where a linear 0.25 ratio + 1024 floor gave
106
+ * the model far too much room.
107
+ *
108
+ * The multiplier (8) gives ~4× headroom above the R=2.0 target, accounting
109
+ * for the detailed observation format (emoji markers, timestamps, entity
110
+ * tags, exact numbers) required by the distillation prompt.
111
+ *
112
+ * @param sourceTokens Estimated source token count from raw messages
113
+ * @returns Token budget clamped to [256, 4096]
114
+ */
115
+ export function distillTokenBudget(sourceTokens: number): number {
116
+ const MULTIPLIER = 8;
117
+ const FLOOR = 256;
118
+ const CAP = 4096;
119
+ return Math.max(FLOOR, Math.min(Math.ceil(MULTIPLIER * Math.sqrt(sourceTokens)), CAP));
62
120
  }
63
121
 
64
- /** Minimum segment size — segments smaller than this get merged. */
65
- const MIN_SEGMENT = 3;
122
+ /**
123
+ * Minimum segment token count — trailing segments smaller than this get
124
+ * merged into the previous segment during splitting to avoid producing
125
+ * segments too small to compress meaningfully.
126
+ */
127
+ const MIN_SEGMENT_TOKENS = 64;
66
128
 
67
129
  /**
68
130
  * Multiplier for the median gap threshold: a time gap must be at least
@@ -70,26 +132,35 @@ const MIN_SEGMENT = 3;
70
132
  */
71
133
  const GAP_THRESHOLD_MULTIPLIER = 3;
72
134
 
135
+ /** Sum tokens for a slice of messages. */
136
+ function sliceTokens(messages: TemporalMessage[], start: number, end: number): number {
137
+ let sum = 0;
138
+ for (let i = start; i < end; i++) sum += messages[i].tokens;
139
+ return sum;
140
+ }
141
+
73
142
  function splitSegments(
74
143
  messages: TemporalMessage[],
75
- maxSegment: number,
144
+ maxTokens: number,
76
145
  ): TemporalMessage[][] {
77
- if (messages.length <= maxSegment) return [messages];
146
+ const totalTokens = messages.reduce((s, m) => s + m.tokens, 0);
147
+ if (totalTokens <= maxTokens) return [messages];
78
148
 
79
149
  // Find the split point: prefer the largest time gap if it's significant
80
- const splitIdx = findSplitIndex(messages, maxSegment);
150
+ const splitIdx = findSplitIndex(messages, maxTokens);
81
151
 
82
152
  const left = messages.slice(0, splitIdx);
83
153
  const right = messages.slice(splitIdx);
84
154
 
85
155
  // Recurse on both halves
86
- const result = splitSegments(left, maxSegment);
156
+ const result = splitSegments(left, maxTokens);
87
157
 
88
- if (right.length < MIN_SEGMENT) {
158
+ const rightTokens = right.reduce((s, m) => s + m.tokens, 0);
159
+ if (rightTokens < MIN_SEGMENT_TOKENS) {
89
160
  // Merge tiny trailing segment into the last segment
90
161
  result[result.length - 1].push(...right);
91
162
  } else {
92
- result.push(...splitSegments(right, maxSegment));
163
+ result.push(...splitSegments(right, maxTokens));
93
164
  }
94
165
 
95
166
  return result;
@@ -99,12 +170,13 @@ function splitSegments(
99
170
  * Choose where to split an oversized message array.
100
171
  *
101
172
  * If there's a time gap ≥ 3× the median gap AND it falls within a range
102
- * that would produce segments of at least MIN_SEGMENT size, use it.
103
- * Otherwise fall back to the count-based boundary at `maxSegment`.
173
+ * that would produce segments of at least MIN_SEGMENT_TOKENS on each side,
174
+ * use it. Otherwise fall back to the token-boundary split point (the index
175
+ * where cumulative tokens first exceed `maxTokens`).
104
176
  */
105
177
  function findSplitIndex(
106
178
  messages: TemporalMessage[],
107
- maxSegment: number,
179
+ maxTokens: number,
108
180
  ): number {
109
181
  // Compute consecutive time gaps
110
182
  const gaps: Array<{ index: number; gap: number }> = [];
@@ -115,19 +187,35 @@ function findSplitIndex(
115
187
  });
116
188
  }
117
189
 
118
- if (gaps.length === 0) return maxSegment;
190
+ // Compute the token-boundary fallback: first index where cumulative tokens exceed maxTokens
191
+ let cumulative = 0;
192
+ let tokenBoundary = messages.length; // fallback if all messages fit (shouldn't happen)
193
+ for (let i = 0; i < messages.length; i++) {
194
+ cumulative += messages[i].tokens;
195
+ if (cumulative > maxTokens) {
196
+ // Split so left half has indices [0, i), right half starts at i.
197
+ // Ensure at least 1 message on each side.
198
+ tokenBoundary = Math.max(1, i);
199
+ break;
200
+ }
201
+ }
202
+
203
+ if (gaps.length === 0) return tokenBoundary;
119
204
 
120
205
  // Find median gap
121
206
  const sortedGaps = gaps.map((g) => g.gap).sort((a, b) => a - b);
122
207
  const medianGap = sortedGaps[Math.floor(sortedGaps.length / 2)];
123
208
 
124
- // Find the largest gap that would produce viable segments (≥ MIN_SEGMENT on each side)
209
+ // Find the largest gap that would produce viable segments
210
+ // (≥ MIN_SEGMENT_TOKENS on each side)
125
211
  let bestGap = { index: -1, gap: 0 };
126
212
  for (const g of gaps) {
213
+ const leftTokens = sliceTokens(messages, 0, g.index);
214
+ const rightTokens = sliceTokens(messages, g.index, messages.length);
127
215
  if (
128
216
  g.gap > bestGap.gap &&
129
- g.index >= MIN_SEGMENT &&
130
- messages.length - g.index >= MIN_SEGMENT
217
+ leftTokens >= MIN_SEGMENT_TOKENS &&
218
+ rightTokens >= MIN_SEGMENT_TOKENS
131
219
  ) {
132
220
  bestGap = g;
133
221
  }
@@ -138,8 +226,8 @@ function findSplitIndex(
138
226
  return bestGap.index;
139
227
  }
140
228
 
141
- // Fall back to count-based splitting
142
- return maxSegment;
229
+ // Fall back to token-boundary splitting
230
+ return tokenBoundary;
143
231
  }
144
232
 
145
233
  function formatTime(ms: number): string {
@@ -387,6 +475,7 @@ function storeDistillation(input: {
387
475
  generation: number;
388
476
  rCompression?: number;
389
477
  cNorm?: number;
478
+ callType?: "batch" | "direct";
390
479
  }): string {
391
480
  const pid = ensureProject(input.projectPath);
392
481
  const id = crypto.randomUUID();
@@ -394,8 +483,8 @@ function storeDistillation(input: {
394
483
  const tokens = Math.ceil(input.observations.length / 3);
395
484
  db()
396
485
  .query(
397
- `INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at, r_compression, c_norm)
398
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
486
+ `INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at, r_compression, c_norm, call_type)
487
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
399
488
  )
400
489
  .run(
401
490
  id,
@@ -410,13 +499,14 @@ function storeDistillation(input: {
410
499
  Date.now(),
411
500
  input.rCompression ?? null,
412
501
  input.cNorm ?? null,
502
+ input.callType ?? null,
413
503
  );
414
504
  return id;
415
505
  }
416
506
 
417
507
  // Count non-archived gen-0 distillations — these are the ones awaiting
418
508
  // meta-distillation. Archived gen-0 entries have already been consolidated.
419
- function gen0Count(projectPath: string, sessionID: string): number {
509
+ export function gen0Count(projectPath: string, sessionID: string): number {
420
510
  const pid = ensureProject(projectPath);
421
511
  return (
422
512
  db()
@@ -539,6 +629,9 @@ export async function run(input: {
539
629
  * where the caller is blocking on the result. Background/idle distillation
540
630
  * should leave this false to benefit from batch API 50% cost savings. */
541
631
  urgent?: boolean;
632
+ /** Whether the LLM call will use batch or direct pricing. Recorded on the
633
+ * distillation row for accurate historical cost estimates. */
634
+ callType?: "batch" | "direct";
542
635
  }): Promise<{ rounds: number; distilled: number }> {
543
636
  // Reset orphaned messages (marked distilled by a deleted/migrated distillation)
544
637
  const orphans = resetOrphans(input.projectPath, input.sessionID);
@@ -564,8 +657,22 @@ export async function run(input: {
564
657
  break;
565
658
 
566
659
  if (pending.length > 0) {
567
- const segments = detectSegments(pending, cfg.distillation.maxSegment);
660
+ const segments = detectSegments(pending, cfg.distillation.maxSegmentTokens);
568
661
  for (const segment of segments) {
662
+ const segTokens = segment.reduce((s, m) => s + m.tokens, 0);
663
+ if (segTokens < cfg.distillation.minSegmentTokens) {
664
+ if (input.force) {
665
+ // Absorb: mark distilled without LLM call to avoid blocking
666
+ // the caller on useless work. Messages remain searchable via
667
+ // BM25/vector recall on the temporal table.
668
+ temporal.markDistilled(segment.map((m) => m.id));
669
+ log.info(
670
+ `absorb tiny segment: ${segment.length} msgs, ${segTokens} tokens (below min ${cfg.distillation.minSegmentTokens})`,
671
+ );
672
+ }
673
+ // else: leave undistilled to accumulate with future messages
674
+ continue;
675
+ }
569
676
  const result = await distillSegment({
570
677
  llm: input.llm,
571
678
  projectPath: input.projectPath,
@@ -573,6 +680,7 @@ export async function run(input: {
573
680
  messages: segment,
574
681
  model: input.model,
575
682
  urgent: input.urgent,
683
+ callType: input.callType,
576
684
  });
577
685
  if (result) {
578
686
  distilled += segment.length;
@@ -595,12 +703,15 @@ export async function run(input: {
595
703
  sessionID: input.sessionID,
596
704
  model: input.model,
597
705
  urgent: input.urgent,
706
+ callType: input.callType,
598
707
  });
599
708
  rounds++;
600
709
  }
601
710
 
602
- // Check if we still need urgent distillation
603
- if (!needsUrgentDistillation()) break;
711
+ // Continue looping only when explicitly forced (urgent/overflow recovery).
712
+ // Previously re-polled needsUrgentDistillation() here, but that consumed
713
+ // the per-session flag and raced with the caller that already checked it.
714
+ if (!input.force) break;
604
715
  }
605
716
 
606
717
  return { rounds, distilled };
@@ -613,6 +724,7 @@ async function distillSegment(input: {
613
724
  messages: TemporalMessage[];
614
725
  model?: { providerID: string; modelID: string };
615
726
  urgent?: boolean;
727
+ callType?: "batch" | "direct";
616
728
  }): Promise<DistillationResult | null> {
617
729
  const prior = latestObservations(input.projectPath, input.sessionID);
618
730
  const text = messagesToText(input.messages);
@@ -632,10 +744,12 @@ async function distillSegment(input: {
632
744
  });
633
745
 
634
746
  const model = input.model ?? config().model;
747
+ const sourceTokens = input.messages.reduce((sum, m) => sum + m.tokens, 0);
748
+ const maxTokens = distillTokenBudget(sourceTokens);
635
749
  const responseText = await input.llm.prompt(
636
750
  DISTILLATION_SYSTEM,
637
751
  userContent,
638
- { model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID },
752
+ { model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID, maxTokens },
639
753
  );
640
754
  if (!responseText) return null;
641
755
 
@@ -644,10 +758,24 @@ async function distillSegment(input: {
644
758
 
645
759
  // Compute context health metrics before storing.
646
760
  const distilledTokens = Math.ceil(result.observations.length / 3);
647
- const sourceTokens = input.messages.reduce((sum, m) => sum + m.tokens, 0);
648
761
  const rComp = compressionRatio(distilledTokens, sourceTokens);
649
762
  const cNorm = temporal.temporalCnorm(input.messages.map((m) => m.created_at));
650
763
 
764
+ // Expansion guard: discard distillation output that exceeds the allowed
765
+ // expansion limit. Tiny segments (< 100 tokens) get generous headroom
766
+ // because distillation necessarily adds metadata; large segments must
767
+ // actually compress. Still marks source messages as distilled to prevent
768
+ // infinite retry loops — they remain searchable via BM25/vector recall.
769
+ const expansionLimit = maxAllowedExpansion(sourceTokens);
770
+ if (distilledTokens > expansionLimit) {
771
+ temporal.markDistilled(input.messages.map((m) => m.id));
772
+ log.warn(
773
+ `distill expansion discarded: ${input.messages.length} msgs, ` +
774
+ `${sourceTokens}→${distilledTokens} tokens (exceeds ${expansionLimit} limit)`,
775
+ );
776
+ return null;
777
+ }
778
+
651
779
  const distillId = storeDistillation({
652
780
  projectPath: input.projectPath,
653
781
  sessionID: input.sessionID,
@@ -656,6 +784,7 @@ async function distillSegment(input: {
656
784
  generation: 0,
657
785
  rCompression: rComp,
658
786
  cNorm,
787
+ callType: input.callType,
659
788
  });
660
789
  temporal.markDistilled(input.messages.map((m) => m.id));
661
790
 
@@ -665,6 +794,16 @@ async function distillSegment(input: {
665
794
  `R=${rComp.toFixed(2)}, C_norm=${cNorm.toFixed(3)}`,
666
795
  );
667
796
 
797
+ // Soft quality warning: R < 1.0 means the distillation is below the √N
798
+ // boundary, suggesting potentially lossy compression. Stored for
799
+ // monitoring — not a hard gate.
800
+ if (rComp < 1.0) {
801
+ log.warn(
802
+ `distill quality low: R=${rComp.toFixed(2)} (<1.0) on ${input.messages.length} msgs, ` +
803
+ `${sourceTokens}→${distilledTokens} tokens — may have lost detail`,
804
+ );
805
+ }
806
+
668
807
  // Fire-and-forget: embed the distillation for vector search
669
808
  if (embedding.isAvailable()) {
670
809
  embedding.embedDistillation(distillId, result.observations);
@@ -705,6 +844,7 @@ export async function metaDistill(input: {
705
844
  sessionID: string;
706
845
  model?: { providerID: string; modelID: string };
707
846
  urgent?: boolean;
847
+ callType?: "batch" | "direct";
708
848
  }): Promise<DistillationResult | null> {
709
849
  const existing = loadGen0(input.projectPath, input.sessionID);
710
850
 
@@ -729,10 +869,12 @@ export async function metaDistill(input: {
729
869
  const userContent = recursiveUser(existing, priorMeta?.observations);
730
870
 
731
871
  const model = input.model ?? config().model;
872
+ const inputTokens = Math.ceil(userContent.length / 3);
873
+ const maxTokens = workerTokenBudget(inputTokens, 0.25, 1024, 8192);
732
874
  const responseText = await input.llm.prompt(
733
875
  RECURSIVE_SYSTEM,
734
876
  userContent,
735
- { model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID },
877
+ { model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID, maxTokens },
736
878
  );
737
879
  if (!responseText) return null;
738
880
 
@@ -767,6 +909,7 @@ export async function metaDistill(input: {
767
909
  observations: result.observations,
768
910
  sourceIDs: allSourceIDs,
769
911
  generation: maxGen + 1,
912
+ callType: input.callType,
770
913
  });
771
914
  // Archive the gen-0 distillations that were merged into gen-1+.
772
915
  // They remain searchable via BM25 recall but are excluded from the
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Vendored bge-small registration for the standalone Lore binary.
3
+ *
4
+ * The Bun-compiled `lore` binary uses `bun build --compile` to bundle
5
+ * `fastembed` + `onnxruntime-node` + `@anush008/tokenizers-<platform>`
6
+ * directly into the executable — including the platform-specific
7
+ * `.node` addons which Bun embeds and dlopens from `$bunfs` at runtime.
8
+ *
9
+ * Two pieces don't fit into Bun's automatic bundling and need our help:
10
+ *
11
+ * 1. **Side-load shared libraries**. `onnxruntime_binding.node` does a
12
+ * runtime `dlopen("libonnxruntime.so.1")` (or the .dylib / .dll
13
+ * equivalent) for the actual ONNX Runtime computation library. Bun
14
+ * doesn't follow this kind of dependency. The binary's wrapper
15
+ * pre-loads these libs via `bun:ffi` *before* fastembed evaluates,
16
+ * so when the addon's dlopen fires it finds the cached handle.
17
+ *
18
+ * 2. **Model weights + tokenizer**. fastembed downloads from the HF
19
+ * Hub on first use; we want zero network on first run. The wrapper
20
+ * embeds the bge-small INT8 files as Bun assets, writes them to a
21
+ * real disk dir on first run, and sets `globalThis.__LORE_VENDOR_MODEL__`
22
+ * to that path. This module exposes that registration to the
23
+ * LocalProvider so it can hand the path to fastembed's CUSTOM-mode
24
+ * init (`modelAbsoluteDirPath` + `modelName`).
25
+ *
26
+ * In npm-mode usage from `@loreai/opencode` / `@loreai/pi` the global
27
+ * is unset and `vendorModelInfo()` returns `null`, so the LocalProvider
28
+ * falls through to fastembed's default Qdrant repo + cache.
29
+ */
30
+
31
+ // ---------------------------------------------------------------------------
32
+ // Vendor registration (set by the binary wrapper, read here)
33
+ // ---------------------------------------------------------------------------
34
+
35
+ /** What the binary wrapper writes to globalThis after extracting model files. */
36
+ export interface VendorRegistration {
37
+ /** Absolute path to the dir containing the bge-small files
38
+ * (config.json, tokenizer.json, model_quantized.onnx, …). Pass to
39
+ * fastembed as `modelAbsoluteDirPath` in CUSTOM init. */
40
+ modelAbsoluteDirPath: string;
41
+ /** Filename of the ONNX weights inside that dir. Pass to fastembed
42
+ * as `modelName` in CUSTOM init. */
43
+ modelName: string;
44
+ /** Target identifier the binary was built for, e.g. "linux-x64".
45
+ * Diagnostic only — the runtime doesn't branch on it. */
46
+ target: string;
47
+ /** Lore CLI version that produced the binary. Diagnostic only. */
48
+ version: string;
49
+ }
50
+
51
+ const REGISTRATION_KEY = "__LORE_VENDOR_MODEL__";
52
+
53
+ /** Read the vendor registration written by the binary wrapper, if any. */
54
+ function getRegistration(): VendorRegistration | null {
55
+ const g = globalThis as unknown as Record<string, VendorRegistration | undefined>;
56
+ return g[REGISTRATION_KEY] ?? null;
57
+ }
58
+
59
+ /** Test-only: programmatically set/clear the registration to exercise
60
+ * both binary-mode and npm-mode code paths without spinning up a real
61
+ * compiled binary. */
62
+ export function _setVendorRegistration(reg: VendorRegistration | null): void {
63
+ const g = globalThis as unknown as Record<string, VendorRegistration | undefined>;
64
+ if (reg) g[REGISTRATION_KEY] = reg;
65
+ else delete g[REGISTRATION_KEY];
66
+ }
67
+
68
+ // ---------------------------------------------------------------------------
69
+ // Public entry
70
+ // ---------------------------------------------------------------------------
71
+
72
+ /** Subset of the registration fastembed needs. Stripped of the
73
+ * diagnostic fields so the LocalProvider has exactly what it should
74
+ * hand to `FlagEmbedding.init`. */
75
+ export interface VendorModelInfo {
76
+ modelAbsoluteDirPath: string;
77
+ modelName: string;
78
+ }
79
+
80
+ /**
81
+ * Resolve the bundled-model arguments for fastembed CUSTOM init. Returns
82
+ * `null` when no vendor is registered (npm-mode), so the caller can fall
83
+ * through to fastembed's default cacheDir/HF Hub flow.
84
+ */
85
+ export function vendorModelInfo(): VendorModelInfo | null {
86
+ const reg = getRegistration();
87
+ if (!reg) return null;
88
+ return {
89
+ modelAbsoluteDirPath: reg.modelAbsoluteDirPath,
90
+ modelName: reg.modelName,
91
+ };
92
+ }
93
+
94
+ /** True iff this process is running inside a vendored Lore binary. */
95
+ export function isVendoredBinary(): boolean {
96
+ return getRegistration() !== null;
97
+ }
98
+
99
+ /** The full registration, for diagnostics (`lore --print-vendor-info`). */
100
+ export function vendorRegistration(): VendorRegistration | null {
101
+ return getRegistration();
102
+ }
@@ -0,0 +1,82 @@
1
+ /**
2
+ * Shared message types for the embedding worker thread.
3
+ *
4
+ * The embedding worker (`embedding-worker.ts`) runs fastembed/ONNX inference
5
+ * in a separate `node:worker_threads` Worker so the main thread's event loop
6
+ * stays free during inference. This file defines the message protocol between
7
+ * the main thread (`LocalProvider` in `embedding.ts`) and the worker.
8
+ *
9
+ * Imported by both sides — keep this file free of runtime dependencies.
10
+ */
11
+
12
+ // ---------------------------------------------------------------------------
13
+ // Main thread → Worker
14
+ // ---------------------------------------------------------------------------
15
+
16
+ /** Request an embedding batch. */
17
+ export interface EmbedRequest {
18
+ type: "embed";
19
+ /** Monotonic request ID for correlating responses. */
20
+ id: number;
21
+ /** Texts to embed. */
22
+ texts: string[];
23
+ /** "document" for storage, "query" for search. */
24
+ inputType: "document" | "query";
25
+ /** "high" = recall queries (jump the queue), "normal" = backfill. */
26
+ priority: "high" | "normal";
27
+ }
28
+
29
+ /** Ask the worker to exit cleanly. */
30
+ export interface ShutdownRequest {
31
+ type: "shutdown";
32
+ }
33
+
34
+ export type WorkerInbound = EmbedRequest | ShutdownRequest;
35
+
36
+ // ---------------------------------------------------------------------------
37
+ // Worker → Main thread
38
+ // ---------------------------------------------------------------------------
39
+
40
+ /** Embedding result — vectors are Float32Array[], sent via structured clone. */
41
+ export interface EmbedResult {
42
+ type: "result";
43
+ /** Matches the request ID. */
44
+ id: number;
45
+ /** One Float32Array per input text. Sent via structured clone
46
+ * (Bun preserves Float32Array identity across threads). */
47
+ vectors: Float32Array[];
48
+ }
49
+
50
+ /** A single embed request failed (ONNX error, etc.). */
51
+ export interface EmbedError {
52
+ type: "error";
53
+ /** Matches the request ID. */
54
+ id: number;
55
+ /** Human-readable error message. */
56
+ error: string;
57
+ }
58
+
59
+ /** Model initialization failed inside the worker. All pending and future
60
+ * requests should be rejected — the worker is unusable. */
61
+ export interface InitError {
62
+ type: "init-error";
63
+ /** Human-readable error message. */
64
+ error: string;
65
+ }
66
+
67
+ export type WorkerOutbound = EmbedResult | EmbedError | InitError;
68
+
69
+ // ---------------------------------------------------------------------------
70
+ // workerData contract
71
+ // ---------------------------------------------------------------------------
72
+
73
+ /** Passed to the worker via `workerData` at construction time. */
74
+ export interface WorkerInitData {
75
+ /** fastembed model name, e.g. "BGESmallENV15". */
76
+ modelName: string;
77
+ /** Vendored model info for binary mode, or null for npm mode.
78
+ * Mirrors the `globalThis.__LORE_VENDOR_MODEL__` registration which
79
+ * only exists on the main thread — passed explicitly so the worker
80
+ * can hand it to `FlagEmbedding.init()`. */
81
+ vendorModel: { modelAbsoluteDirPath: string; modelName: string } | null;
82
+ }