@loreai/core 0.15.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/dist/bun/agents-file.d.ts +13 -1
- package/dist/bun/agents-file.d.ts.map +1 -1
- package/dist/bun/config.d.ts +20 -1
- package/dist/bun/config.d.ts.map +1 -1
- package/dist/bun/data.d.ts +174 -0
- package/dist/bun/data.d.ts.map +1 -0
- package/dist/bun/db.d.ts +65 -0
- package/dist/bun/db.d.ts.map +1 -1
- package/dist/bun/distillation.d.ts +49 -6
- package/dist/bun/distillation.d.ts.map +1 -1
- package/dist/bun/embedding-vendor.d.ts +66 -0
- package/dist/bun/embedding-vendor.d.ts.map +1 -0
- package/dist/bun/embedding-worker-types.d.ts +66 -0
- package/dist/bun/embedding-worker-types.d.ts.map +1 -0
- package/dist/bun/embedding-worker.d.ts +16 -0
- package/dist/bun/embedding-worker.d.ts.map +1 -0
- package/dist/bun/embedding-worker.js +100 -0
- package/dist/bun/embedding-worker.js.map +7 -0
- package/dist/bun/embedding.d.ts +91 -8
- package/dist/bun/embedding.d.ts.map +1 -1
- package/dist/bun/git.d.ts +47 -0
- package/dist/bun/git.d.ts.map +1 -0
- package/dist/bun/gradient.d.ts +19 -1
- package/dist/bun/gradient.d.ts.map +1 -1
- package/dist/bun/index.d.ts +9 -6
- package/dist/bun/index.d.ts.map +1 -1
- package/dist/bun/index.js +13205 -11259
- package/dist/bun/index.js.map +4 -4
- package/dist/bun/lat-reader.d.ts +1 -1
- package/dist/bun/lat-reader.d.ts.map +1 -1
- package/dist/bun/ltm.d.ts.map +1 -1
- package/dist/bun/markdown.d.ts +11 -0
- package/dist/bun/markdown.d.ts.map +1 -1
- package/dist/bun/prompt.d.ts +1 -1
- package/dist/bun/prompt.d.ts.map +1 -1
- package/dist/bun/recall.d.ts +53 -0
- package/dist/bun/recall.d.ts.map +1 -1
- package/dist/bun/search.d.ts +29 -0
- package/dist/bun/search.d.ts.map +1 -1
- package/dist/bun/temporal.d.ts +2 -0
- package/dist/bun/temporal.d.ts.map +1 -1
- package/dist/bun/types.d.ts +15 -0
- package/dist/bun/types.d.ts.map +1 -1
- package/dist/bun/worker-model.d.ts +15 -80
- package/dist/bun/worker-model.d.ts.map +1 -1
- package/dist/node/agents-file.d.ts +13 -1
- package/dist/node/agents-file.d.ts.map +1 -1
- package/dist/node/config.d.ts +20 -1
- package/dist/node/config.d.ts.map +1 -1
- package/dist/node/data.d.ts +174 -0
- package/dist/node/data.d.ts.map +1 -0
- package/dist/node/db.d.ts +65 -0
- package/dist/node/db.d.ts.map +1 -1
- package/dist/node/distillation.d.ts +49 -6
- package/dist/node/distillation.d.ts.map +1 -1
- package/dist/node/embedding-vendor.d.ts +66 -0
- package/dist/node/embedding-vendor.d.ts.map +1 -0
- package/dist/node/embedding-worker-types.d.ts +66 -0
- package/dist/node/embedding-worker-types.d.ts.map +1 -0
- package/dist/node/embedding-worker.d.ts +16 -0
- package/dist/node/embedding-worker.d.ts.map +1 -0
- package/dist/node/embedding-worker.js +100 -0
- package/dist/node/embedding-worker.js.map +7 -0
- package/dist/node/embedding.d.ts +91 -8
- package/dist/node/embedding.d.ts.map +1 -1
- package/dist/node/git.d.ts +47 -0
- package/dist/node/git.d.ts.map +1 -0
- package/dist/node/gradient.d.ts +19 -1
- package/dist/node/gradient.d.ts.map +1 -1
- package/dist/node/index.d.ts +9 -6
- package/dist/node/index.d.ts.map +1 -1
- package/dist/node/index.js +13205 -11259
- package/dist/node/index.js.map +4 -4
- package/dist/node/lat-reader.d.ts +1 -1
- package/dist/node/lat-reader.d.ts.map +1 -1
- package/dist/node/ltm.d.ts.map +1 -1
- package/dist/node/markdown.d.ts +11 -0
- package/dist/node/markdown.d.ts.map +1 -1
- package/dist/node/prompt.d.ts +1 -1
- package/dist/node/prompt.d.ts.map +1 -1
- package/dist/node/recall.d.ts +53 -0
- package/dist/node/recall.d.ts.map +1 -1
- package/dist/node/search.d.ts +29 -0
- package/dist/node/search.d.ts.map +1 -1
- package/dist/node/temporal.d.ts +2 -0
- package/dist/node/temporal.d.ts.map +1 -1
- package/dist/node/types.d.ts +15 -0
- package/dist/node/types.d.ts.map +1 -1
- package/dist/node/worker-model.d.ts +15 -80
- package/dist/node/worker-model.d.ts.map +1 -1
- package/dist/types/agents-file.d.ts +13 -1
- package/dist/types/agents-file.d.ts.map +1 -1
- package/dist/types/config.d.ts +20 -1
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/data.d.ts +174 -0
- package/dist/types/data.d.ts.map +1 -0
- package/dist/types/db.d.ts +65 -0
- package/dist/types/db.d.ts.map +1 -1
- package/dist/types/distillation.d.ts +49 -6
- package/dist/types/distillation.d.ts.map +1 -1
- package/dist/types/embedding-vendor.d.ts +66 -0
- package/dist/types/embedding-vendor.d.ts.map +1 -0
- package/dist/types/embedding-worker-types.d.ts +66 -0
- package/dist/types/embedding-worker-types.d.ts.map +1 -0
- package/dist/types/embedding-worker.d.ts +16 -0
- package/dist/types/embedding-worker.d.ts.map +1 -0
- package/dist/types/embedding.d.ts +91 -8
- package/dist/types/embedding.d.ts.map +1 -1
- package/dist/types/git.d.ts +47 -0
- package/dist/types/git.d.ts.map +1 -0
- package/dist/types/gradient.d.ts +19 -1
- package/dist/types/gradient.d.ts.map +1 -1
- package/dist/types/index.d.ts +9 -6
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/lat-reader.d.ts +1 -1
- package/dist/types/lat-reader.d.ts.map +1 -1
- package/dist/types/ltm.d.ts.map +1 -1
- package/dist/types/markdown.d.ts +11 -0
- package/dist/types/markdown.d.ts.map +1 -1
- package/dist/types/prompt.d.ts +1 -1
- package/dist/types/prompt.d.ts.map +1 -1
- package/dist/types/recall.d.ts +53 -0
- package/dist/types/recall.d.ts.map +1 -1
- package/dist/types/search.d.ts +29 -0
- package/dist/types/search.d.ts.map +1 -1
- package/dist/types/temporal.d.ts +2 -0
- package/dist/types/temporal.d.ts.map +1 -1
- package/dist/types/types.d.ts +15 -0
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/worker-model.d.ts +15 -80
- package/dist/types/worker-model.d.ts.map +1 -1
- package/package.json +5 -2
- package/src/agents-file.ts +87 -4
- package/src/config.ts +68 -5
- package/src/curator.ts +2 -2
- package/src/data.ts +768 -0
- package/src/db.ts +386 -7
- package/src/distillation.ts +178 -35
- package/src/embedding-vendor.ts +102 -0
- package/src/embedding-worker-types.ts +82 -0
- package/src/embedding-worker.ts +185 -0
- package/src/embedding.ts +607 -61
- package/src/git.ts +144 -0
- package/src/gradient.ts +174 -17
- package/src/index.ts +20 -0
- package/src/lat-reader.ts +5 -11
- package/src/ltm.ts +17 -44
- package/src/markdown.ts +15 -0
- package/src/prompt.ts +1 -2
- package/src/recall.ts +401 -70
- package/src/search.ts +71 -1
- package/src/temporal.ts +42 -35
- package/src/types.ts +15 -0
- package/src/worker-model.ts +17 -363
package/src/distillation.ts
CHANGED
|
@@ -12,7 +12,7 @@ import {
|
|
|
12
12
|
RECURSIVE_SYSTEM,
|
|
13
13
|
recursiveUser,
|
|
14
14
|
} from "./prompt";
|
|
15
|
-
import {
|
|
15
|
+
import { toolStripAnnotation } from "./gradient";
|
|
16
16
|
import { workerSessionIDs } from "./worker";
|
|
17
17
|
import type { LLMClient } from "./types";
|
|
18
18
|
|
|
@@ -40,29 +40,91 @@ export function compressionRatio(
|
|
|
40
40
|
return distilledTokens / Math.sqrt(sourceTokens);
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
+
/**
|
|
44
|
+
* Maximum allowed expansion for distillation output.
|
|
45
|
+
*
|
|
46
|
+
* Tiny segments can't meaningfully compress — distillation adds metadata
|
|
47
|
+
* (timestamps, importance markers, cross-references) that necessarily
|
|
48
|
+
* exceeds the source. Allow generous expansion for small segments while
|
|
49
|
+
* still enforcing compression on large ones.
|
|
50
|
+
*
|
|
51
|
+
* @returns Maximum allowed distilled tokens for a given source token count.
|
|
52
|
+
*/
|
|
53
|
+
export function maxAllowedExpansion(sourceTokens: number): number {
|
|
54
|
+
if (sourceTokens < 100) return sourceTokens * 5; // tiny: 8→40 is fine
|
|
55
|
+
if (sourceTokens < 500) return sourceTokens * 2; // small: 2x headroom
|
|
56
|
+
return sourceTokens; // large: must compress
|
|
57
|
+
}
|
|
58
|
+
|
|
43
59
|
/**
|
|
44
60
|
* Segment detection: group related messages into distillation-sized chunks.
|
|
45
61
|
*
|
|
46
|
-
* When the
|
|
62
|
+
* When the total token count exceeds `maxTokens`, prefers splitting at the
|
|
47
63
|
* largest inter-message time gap (if it's ≥ 3× the median gap) to respect
|
|
48
|
-
* natural conversation boundaries. Falls back to
|
|
49
|
-
*
|
|
64
|
+
* natural conversation boundaries. Falls back to token-boundary splitting
|
|
65
|
+
* when timestamps are uniform.
|
|
50
66
|
*
|
|
51
|
-
* Trailing segments
|
|
52
|
-
* to avoid tiny distillation inputs
|
|
67
|
+
* Trailing segments whose token sum is below {@link MIN_SEGMENT_TOKENS}
|
|
68
|
+
* are merged into the previous segment to avoid tiny distillation inputs
|
|
69
|
+
* with too little context.
|
|
53
70
|
*
|
|
54
71
|
* Exported for testing; `run()` is the production caller.
|
|
55
72
|
*/
|
|
56
73
|
export function detectSegments(
|
|
57
74
|
messages: TemporalMessage[],
|
|
58
|
-
|
|
75
|
+
maxTokens: number,
|
|
59
76
|
): TemporalMessage[][] {
|
|
60
|
-
|
|
61
|
-
|
|
77
|
+
const totalTokens = messages.reduce((s, m) => s + m.tokens, 0);
|
|
78
|
+
if (totalTokens <= maxTokens) return [messages];
|
|
79
|
+
return splitSegments(messages, maxTokens);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Compute the max_tokens budget for a worker LLM call.
|
|
84
|
+
*
|
|
85
|
+
* @param inputTokens Estimated source token count
|
|
86
|
+
* @param ratio Compression ratio (0.0–1.0) — output ≈ ratio × input
|
|
87
|
+
* @param floor Minimum output tokens
|
|
88
|
+
* @param cap Maximum output tokens
|
|
89
|
+
*/
|
|
90
|
+
export function workerTokenBudget(
|
|
91
|
+
inputTokens: number,
|
|
92
|
+
ratio: number,
|
|
93
|
+
floor: number,
|
|
94
|
+
cap: number,
|
|
95
|
+
): number {
|
|
96
|
+
return Math.max(floor, Math.min(Math.ceil(inputTokens * ratio), cap));
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Compute the max_tokens budget for gen-0 distillation of raw messages.
|
|
101
|
+
*
|
|
102
|
+
* Uses a √N-based formula (8 × √N) instead of a linear ratio so that the
|
|
103
|
+
* budget grows sub-linearly with input size. This naturally constrains the
|
|
104
|
+
* LLM to produce output at ~R ≈ 2–4 (the square-root boundary) and avoids
|
|
105
|
+
* expansion on small segments where a linear 0.25 ratio + 1024 floor gave
|
|
106
|
+
* the model far too much room.
|
|
107
|
+
*
|
|
108
|
+
* The multiplier (8) gives ~4× headroom above the R=2.0 target, accounting
|
|
109
|
+
* for the detailed observation format (emoji markers, timestamps, entity
|
|
110
|
+
* tags, exact numbers) required by the distillation prompt.
|
|
111
|
+
*
|
|
112
|
+
* @param sourceTokens Estimated source token count from raw messages
|
|
113
|
+
* @returns Token budget clamped to [256, 4096]
|
|
114
|
+
*/
|
|
115
|
+
export function distillTokenBudget(sourceTokens: number): number {
|
|
116
|
+
const MULTIPLIER = 8;
|
|
117
|
+
const FLOOR = 256;
|
|
118
|
+
const CAP = 4096;
|
|
119
|
+
return Math.max(FLOOR, Math.min(Math.ceil(MULTIPLIER * Math.sqrt(sourceTokens)), CAP));
|
|
62
120
|
}
|
|
63
121
|
|
|
64
|
-
/**
|
|
65
|
-
|
|
122
|
+
/**
|
|
123
|
+
* Minimum segment token count — trailing segments smaller than this get
|
|
124
|
+
* merged into the previous segment during splitting to avoid producing
|
|
125
|
+
* segments too small to compress meaningfully.
|
|
126
|
+
*/
|
|
127
|
+
const MIN_SEGMENT_TOKENS = 64;
|
|
66
128
|
|
|
67
129
|
/**
|
|
68
130
|
* Multiplier for the median gap threshold: a time gap must be at least
|
|
@@ -70,26 +132,35 @@ const MIN_SEGMENT = 3;
|
|
|
70
132
|
*/
|
|
71
133
|
const GAP_THRESHOLD_MULTIPLIER = 3;
|
|
72
134
|
|
|
135
|
+
/** Sum tokens for a slice of messages. */
|
|
136
|
+
function sliceTokens(messages: TemporalMessage[], start: number, end: number): number {
|
|
137
|
+
let sum = 0;
|
|
138
|
+
for (let i = start; i < end; i++) sum += messages[i].tokens;
|
|
139
|
+
return sum;
|
|
140
|
+
}
|
|
141
|
+
|
|
73
142
|
function splitSegments(
|
|
74
143
|
messages: TemporalMessage[],
|
|
75
|
-
|
|
144
|
+
maxTokens: number,
|
|
76
145
|
): TemporalMessage[][] {
|
|
77
|
-
|
|
146
|
+
const totalTokens = messages.reduce((s, m) => s + m.tokens, 0);
|
|
147
|
+
if (totalTokens <= maxTokens) return [messages];
|
|
78
148
|
|
|
79
149
|
// Find the split point: prefer the largest time gap if it's significant
|
|
80
|
-
const splitIdx = findSplitIndex(messages,
|
|
150
|
+
const splitIdx = findSplitIndex(messages, maxTokens);
|
|
81
151
|
|
|
82
152
|
const left = messages.slice(0, splitIdx);
|
|
83
153
|
const right = messages.slice(splitIdx);
|
|
84
154
|
|
|
85
155
|
// Recurse on both halves
|
|
86
|
-
const result = splitSegments(left,
|
|
156
|
+
const result = splitSegments(left, maxTokens);
|
|
87
157
|
|
|
88
|
-
|
|
158
|
+
const rightTokens = right.reduce((s, m) => s + m.tokens, 0);
|
|
159
|
+
if (rightTokens < MIN_SEGMENT_TOKENS) {
|
|
89
160
|
// Merge tiny trailing segment into the last segment
|
|
90
161
|
result[result.length - 1].push(...right);
|
|
91
162
|
} else {
|
|
92
|
-
result.push(...splitSegments(right,
|
|
163
|
+
result.push(...splitSegments(right, maxTokens));
|
|
93
164
|
}
|
|
94
165
|
|
|
95
166
|
return result;
|
|
@@ -99,12 +170,13 @@ function splitSegments(
|
|
|
99
170
|
* Choose where to split an oversized message array.
|
|
100
171
|
*
|
|
101
172
|
* If there's a time gap ≥ 3× the median gap AND it falls within a range
|
|
102
|
-
* that would produce segments of at least
|
|
103
|
-
* Otherwise fall back to the
|
|
173
|
+
* that would produce segments of at least MIN_SEGMENT_TOKENS on each side,
|
|
174
|
+
* use it. Otherwise fall back to the token-boundary split point (the index
|
|
175
|
+
* where cumulative tokens first exceed `maxTokens`).
|
|
104
176
|
*/
|
|
105
177
|
function findSplitIndex(
|
|
106
178
|
messages: TemporalMessage[],
|
|
107
|
-
|
|
179
|
+
maxTokens: number,
|
|
108
180
|
): number {
|
|
109
181
|
// Compute consecutive time gaps
|
|
110
182
|
const gaps: Array<{ index: number; gap: number }> = [];
|
|
@@ -115,19 +187,35 @@ function findSplitIndex(
|
|
|
115
187
|
});
|
|
116
188
|
}
|
|
117
189
|
|
|
118
|
-
|
|
190
|
+
// Compute the token-boundary fallback: first index where cumulative tokens exceed maxTokens
|
|
191
|
+
let cumulative = 0;
|
|
192
|
+
let tokenBoundary = messages.length; // fallback if all messages fit (shouldn't happen)
|
|
193
|
+
for (let i = 0; i < messages.length; i++) {
|
|
194
|
+
cumulative += messages[i].tokens;
|
|
195
|
+
if (cumulative > maxTokens) {
|
|
196
|
+
// Split so left half has indices [0, i), right half starts at i.
|
|
197
|
+
// Ensure at least 1 message on each side.
|
|
198
|
+
tokenBoundary = Math.max(1, i);
|
|
199
|
+
break;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
if (gaps.length === 0) return tokenBoundary;
|
|
119
204
|
|
|
120
205
|
// Find median gap
|
|
121
206
|
const sortedGaps = gaps.map((g) => g.gap).sort((a, b) => a - b);
|
|
122
207
|
const medianGap = sortedGaps[Math.floor(sortedGaps.length / 2)];
|
|
123
208
|
|
|
124
|
-
// Find the largest gap that would produce viable segments
|
|
209
|
+
// Find the largest gap that would produce viable segments
|
|
210
|
+
// (≥ MIN_SEGMENT_TOKENS on each side)
|
|
125
211
|
let bestGap = { index: -1, gap: 0 };
|
|
126
212
|
for (const g of gaps) {
|
|
213
|
+
const leftTokens = sliceTokens(messages, 0, g.index);
|
|
214
|
+
const rightTokens = sliceTokens(messages, g.index, messages.length);
|
|
127
215
|
if (
|
|
128
216
|
g.gap > bestGap.gap &&
|
|
129
|
-
|
|
130
|
-
|
|
217
|
+
leftTokens >= MIN_SEGMENT_TOKENS &&
|
|
218
|
+
rightTokens >= MIN_SEGMENT_TOKENS
|
|
131
219
|
) {
|
|
132
220
|
bestGap = g;
|
|
133
221
|
}
|
|
@@ -138,8 +226,8 @@ function findSplitIndex(
|
|
|
138
226
|
return bestGap.index;
|
|
139
227
|
}
|
|
140
228
|
|
|
141
|
-
// Fall back to
|
|
142
|
-
return
|
|
229
|
+
// Fall back to token-boundary splitting
|
|
230
|
+
return tokenBoundary;
|
|
143
231
|
}
|
|
144
232
|
|
|
145
233
|
function formatTime(ms: number): string {
|
|
@@ -387,6 +475,7 @@ function storeDistillation(input: {
|
|
|
387
475
|
generation: number;
|
|
388
476
|
rCompression?: number;
|
|
389
477
|
cNorm?: number;
|
|
478
|
+
callType?: "batch" | "direct";
|
|
390
479
|
}): string {
|
|
391
480
|
const pid = ensureProject(input.projectPath);
|
|
392
481
|
const id = crypto.randomUUID();
|
|
@@ -394,8 +483,8 @@ function storeDistillation(input: {
|
|
|
394
483
|
const tokens = Math.ceil(input.observations.length / 3);
|
|
395
484
|
db()
|
|
396
485
|
.query(
|
|
397
|
-
`INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at, r_compression, c_norm)
|
|
398
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
486
|
+
`INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at, r_compression, c_norm, call_type)
|
|
487
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
399
488
|
)
|
|
400
489
|
.run(
|
|
401
490
|
id,
|
|
@@ -410,13 +499,14 @@ function storeDistillation(input: {
|
|
|
410
499
|
Date.now(),
|
|
411
500
|
input.rCompression ?? null,
|
|
412
501
|
input.cNorm ?? null,
|
|
502
|
+
input.callType ?? null,
|
|
413
503
|
);
|
|
414
504
|
return id;
|
|
415
505
|
}
|
|
416
506
|
|
|
417
507
|
// Count non-archived gen-0 distillations — these are the ones awaiting
|
|
418
508
|
// meta-distillation. Archived gen-0 entries have already been consolidated.
|
|
419
|
-
function gen0Count(projectPath: string, sessionID: string): number {
|
|
509
|
+
export function gen0Count(projectPath: string, sessionID: string): number {
|
|
420
510
|
const pid = ensureProject(projectPath);
|
|
421
511
|
return (
|
|
422
512
|
db()
|
|
@@ -539,6 +629,9 @@ export async function run(input: {
|
|
|
539
629
|
* where the caller is blocking on the result. Background/idle distillation
|
|
540
630
|
* should leave this false to benefit from batch API 50% cost savings. */
|
|
541
631
|
urgent?: boolean;
|
|
632
|
+
/** Whether the LLM call will use batch or direct pricing. Recorded on the
|
|
633
|
+
* distillation row for accurate historical cost estimates. */
|
|
634
|
+
callType?: "batch" | "direct";
|
|
542
635
|
}): Promise<{ rounds: number; distilled: number }> {
|
|
543
636
|
// Reset orphaned messages (marked distilled by a deleted/migrated distillation)
|
|
544
637
|
const orphans = resetOrphans(input.projectPath, input.sessionID);
|
|
@@ -564,8 +657,22 @@ export async function run(input: {
|
|
|
564
657
|
break;
|
|
565
658
|
|
|
566
659
|
if (pending.length > 0) {
|
|
567
|
-
const segments = detectSegments(pending, cfg.distillation.
|
|
660
|
+
const segments = detectSegments(pending, cfg.distillation.maxSegmentTokens);
|
|
568
661
|
for (const segment of segments) {
|
|
662
|
+
const segTokens = segment.reduce((s, m) => s + m.tokens, 0);
|
|
663
|
+
if (segTokens < cfg.distillation.minSegmentTokens) {
|
|
664
|
+
if (input.force) {
|
|
665
|
+
// Absorb: mark distilled without LLM call to avoid blocking
|
|
666
|
+
// the caller on useless work. Messages remain searchable via
|
|
667
|
+
// BM25/vector recall on the temporal table.
|
|
668
|
+
temporal.markDistilled(segment.map((m) => m.id));
|
|
669
|
+
log.info(
|
|
670
|
+
`absorb tiny segment: ${segment.length} msgs, ${segTokens} tokens (below min ${cfg.distillation.minSegmentTokens})`,
|
|
671
|
+
);
|
|
672
|
+
}
|
|
673
|
+
// else: leave undistilled to accumulate with future messages
|
|
674
|
+
continue;
|
|
675
|
+
}
|
|
569
676
|
const result = await distillSegment({
|
|
570
677
|
llm: input.llm,
|
|
571
678
|
projectPath: input.projectPath,
|
|
@@ -573,6 +680,7 @@ export async function run(input: {
|
|
|
573
680
|
messages: segment,
|
|
574
681
|
model: input.model,
|
|
575
682
|
urgent: input.urgent,
|
|
683
|
+
callType: input.callType,
|
|
576
684
|
});
|
|
577
685
|
if (result) {
|
|
578
686
|
distilled += segment.length;
|
|
@@ -595,12 +703,15 @@ export async function run(input: {
|
|
|
595
703
|
sessionID: input.sessionID,
|
|
596
704
|
model: input.model,
|
|
597
705
|
urgent: input.urgent,
|
|
706
|
+
callType: input.callType,
|
|
598
707
|
});
|
|
599
708
|
rounds++;
|
|
600
709
|
}
|
|
601
710
|
|
|
602
|
-
//
|
|
603
|
-
|
|
711
|
+
// Continue looping only when explicitly forced (urgent/overflow recovery).
|
|
712
|
+
// Previously re-polled needsUrgentDistillation() here, but that consumed
|
|
713
|
+
// the per-session flag and raced with the caller that already checked it.
|
|
714
|
+
if (!input.force) break;
|
|
604
715
|
}
|
|
605
716
|
|
|
606
717
|
return { rounds, distilled };
|
|
@@ -613,6 +724,7 @@ async function distillSegment(input: {
|
|
|
613
724
|
messages: TemporalMessage[];
|
|
614
725
|
model?: { providerID: string; modelID: string };
|
|
615
726
|
urgent?: boolean;
|
|
727
|
+
callType?: "batch" | "direct";
|
|
616
728
|
}): Promise<DistillationResult | null> {
|
|
617
729
|
const prior = latestObservations(input.projectPath, input.sessionID);
|
|
618
730
|
const text = messagesToText(input.messages);
|
|
@@ -632,10 +744,12 @@ async function distillSegment(input: {
|
|
|
632
744
|
});
|
|
633
745
|
|
|
634
746
|
const model = input.model ?? config().model;
|
|
747
|
+
const sourceTokens = input.messages.reduce((sum, m) => sum + m.tokens, 0);
|
|
748
|
+
const maxTokens = distillTokenBudget(sourceTokens);
|
|
635
749
|
const responseText = await input.llm.prompt(
|
|
636
750
|
DISTILLATION_SYSTEM,
|
|
637
751
|
userContent,
|
|
638
|
-
{ model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID },
|
|
752
|
+
{ model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID, maxTokens },
|
|
639
753
|
);
|
|
640
754
|
if (!responseText) return null;
|
|
641
755
|
|
|
@@ -644,10 +758,24 @@ async function distillSegment(input: {
|
|
|
644
758
|
|
|
645
759
|
// Compute context health metrics before storing.
|
|
646
760
|
const distilledTokens = Math.ceil(result.observations.length / 3);
|
|
647
|
-
const sourceTokens = input.messages.reduce((sum, m) => sum + m.tokens, 0);
|
|
648
761
|
const rComp = compressionRatio(distilledTokens, sourceTokens);
|
|
649
762
|
const cNorm = temporal.temporalCnorm(input.messages.map((m) => m.created_at));
|
|
650
763
|
|
|
764
|
+
// Expansion guard: discard distillation output that exceeds the allowed
|
|
765
|
+
// expansion limit. Tiny segments (< 100 tokens) get generous headroom
|
|
766
|
+
// because distillation necessarily adds metadata; large segments must
|
|
767
|
+
// actually compress. Still marks source messages as distilled to prevent
|
|
768
|
+
// infinite retry loops — they remain searchable via BM25/vector recall.
|
|
769
|
+
const expansionLimit = maxAllowedExpansion(sourceTokens);
|
|
770
|
+
if (distilledTokens > expansionLimit) {
|
|
771
|
+
temporal.markDistilled(input.messages.map((m) => m.id));
|
|
772
|
+
log.warn(
|
|
773
|
+
`distill expansion discarded: ${input.messages.length} msgs, ` +
|
|
774
|
+
`${sourceTokens}→${distilledTokens} tokens (exceeds ${expansionLimit} limit)`,
|
|
775
|
+
);
|
|
776
|
+
return null;
|
|
777
|
+
}
|
|
778
|
+
|
|
651
779
|
const distillId = storeDistillation({
|
|
652
780
|
projectPath: input.projectPath,
|
|
653
781
|
sessionID: input.sessionID,
|
|
@@ -656,6 +784,7 @@ async function distillSegment(input: {
|
|
|
656
784
|
generation: 0,
|
|
657
785
|
rCompression: rComp,
|
|
658
786
|
cNorm,
|
|
787
|
+
callType: input.callType,
|
|
659
788
|
});
|
|
660
789
|
temporal.markDistilled(input.messages.map((m) => m.id));
|
|
661
790
|
|
|
@@ -665,6 +794,16 @@ async function distillSegment(input: {
|
|
|
665
794
|
`R=${rComp.toFixed(2)}, C_norm=${cNorm.toFixed(3)}`,
|
|
666
795
|
);
|
|
667
796
|
|
|
797
|
+
// Soft quality warning: R < 1.0 means the distillation is below the √N
|
|
798
|
+
// boundary, suggesting potentially lossy compression. Stored for
|
|
799
|
+
// monitoring — not a hard gate.
|
|
800
|
+
if (rComp < 1.0) {
|
|
801
|
+
log.warn(
|
|
802
|
+
`distill quality low: R=${rComp.toFixed(2)} (<1.0) on ${input.messages.length} msgs, ` +
|
|
803
|
+
`${sourceTokens}→${distilledTokens} tokens — may have lost detail`,
|
|
804
|
+
);
|
|
805
|
+
}
|
|
806
|
+
|
|
668
807
|
// Fire-and-forget: embed the distillation for vector search
|
|
669
808
|
if (embedding.isAvailable()) {
|
|
670
809
|
embedding.embedDistillation(distillId, result.observations);
|
|
@@ -705,6 +844,7 @@ export async function metaDistill(input: {
|
|
|
705
844
|
sessionID: string;
|
|
706
845
|
model?: { providerID: string; modelID: string };
|
|
707
846
|
urgent?: boolean;
|
|
847
|
+
callType?: "batch" | "direct";
|
|
708
848
|
}): Promise<DistillationResult | null> {
|
|
709
849
|
const existing = loadGen0(input.projectPath, input.sessionID);
|
|
710
850
|
|
|
@@ -729,10 +869,12 @@ export async function metaDistill(input: {
|
|
|
729
869
|
const userContent = recursiveUser(existing, priorMeta?.observations);
|
|
730
870
|
|
|
731
871
|
const model = input.model ?? config().model;
|
|
872
|
+
const inputTokens = Math.ceil(userContent.length / 3);
|
|
873
|
+
const maxTokens = workerTokenBudget(inputTokens, 0.25, 1024, 8192);
|
|
732
874
|
const responseText = await input.llm.prompt(
|
|
733
875
|
RECURSIVE_SYSTEM,
|
|
734
876
|
userContent,
|
|
735
|
-
{ model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID },
|
|
877
|
+
{ model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID, maxTokens },
|
|
736
878
|
);
|
|
737
879
|
if (!responseText) return null;
|
|
738
880
|
|
|
@@ -767,6 +909,7 @@ export async function metaDistill(input: {
|
|
|
767
909
|
observations: result.observations,
|
|
768
910
|
sourceIDs: allSourceIDs,
|
|
769
911
|
generation: maxGen + 1,
|
|
912
|
+
callType: input.callType,
|
|
770
913
|
});
|
|
771
914
|
// Archive the gen-0 distillations that were merged into gen-1+.
|
|
772
915
|
// They remain searchable via BM25 recall but are excluded from the
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vendored bge-small registration for the standalone Lore binary.
|
|
3
|
+
*
|
|
4
|
+
* The Bun-compiled `lore` binary uses `bun build --compile` to bundle
|
|
5
|
+
* `fastembed` + `onnxruntime-node` + `@anush008/tokenizers-<platform>`
|
|
6
|
+
* directly into the executable — including the platform-specific
|
|
7
|
+
* `.node` addons which Bun embeds and dlopens from `$bunfs` at runtime.
|
|
8
|
+
*
|
|
9
|
+
* Two pieces don't fit into Bun's automatic bundling and need our help:
|
|
10
|
+
*
|
|
11
|
+
* 1. **Side-load shared libraries**. `onnxruntime_binding.node` does a
|
|
12
|
+
* runtime `dlopen("libonnxruntime.so.1")` (or the .dylib / .dll
|
|
13
|
+
* equivalent) for the actual ONNX Runtime computation library. Bun
|
|
14
|
+
* doesn't follow this kind of dependency. The binary's wrapper
|
|
15
|
+
* pre-loads these libs via `bun:ffi` *before* fastembed evaluates,
|
|
16
|
+
* so when the addon's dlopen fires it finds the cached handle.
|
|
17
|
+
*
|
|
18
|
+
* 2. **Model weights + tokenizer**. fastembed downloads from the HF
|
|
19
|
+
* Hub on first use; we want zero network on first run. The wrapper
|
|
20
|
+
* embeds the bge-small INT8 files as Bun assets, writes them to a
|
|
21
|
+
* real disk dir on first run, and sets `globalThis.__LORE_VENDOR_MODEL__`
|
|
22
|
+
* to that path. This module exposes that registration to the
|
|
23
|
+
* LocalProvider so it can hand the path to fastembed's CUSTOM-mode
|
|
24
|
+
* init (`modelAbsoluteDirPath` + `modelName`).
|
|
25
|
+
*
|
|
26
|
+
* In npm-mode usage from `@loreai/opencode` / `@loreai/pi` the global
|
|
27
|
+
* is unset and `vendorModelInfo()` returns `null`, so the LocalProvider
|
|
28
|
+
* falls through to fastembed's default Qdrant repo + cache.
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
// Vendor registration (set by the binary wrapper, read here)
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
/** What the binary wrapper writes to globalThis after extracting model files. */
|
|
36
|
+
export interface VendorRegistration {
|
|
37
|
+
/** Absolute path to the dir containing the bge-small files
|
|
38
|
+
* (config.json, tokenizer.json, model_quantized.onnx, …). Pass to
|
|
39
|
+
* fastembed as `modelAbsoluteDirPath` in CUSTOM init. */
|
|
40
|
+
modelAbsoluteDirPath: string;
|
|
41
|
+
/** Filename of the ONNX weights inside that dir. Pass to fastembed
|
|
42
|
+
* as `modelName` in CUSTOM init. */
|
|
43
|
+
modelName: string;
|
|
44
|
+
/** Target identifier the binary was built for, e.g. "linux-x64".
|
|
45
|
+
* Diagnostic only — the runtime doesn't branch on it. */
|
|
46
|
+
target: string;
|
|
47
|
+
/** Lore CLI version that produced the binary. Diagnostic only. */
|
|
48
|
+
version: string;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const REGISTRATION_KEY = "__LORE_VENDOR_MODEL__";
|
|
52
|
+
|
|
53
|
+
/** Read the vendor registration written by the binary wrapper, if any. */
|
|
54
|
+
function getRegistration(): VendorRegistration | null {
|
|
55
|
+
const g = globalThis as unknown as Record<string, VendorRegistration | undefined>;
|
|
56
|
+
return g[REGISTRATION_KEY] ?? null;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** Test-only: programmatically set/clear the registration to exercise
|
|
60
|
+
* both binary-mode and npm-mode code paths without spinning up a real
|
|
61
|
+
* compiled binary. */
|
|
62
|
+
export function _setVendorRegistration(reg: VendorRegistration | null): void {
|
|
63
|
+
const g = globalThis as unknown as Record<string, VendorRegistration | undefined>;
|
|
64
|
+
if (reg) g[REGISTRATION_KEY] = reg;
|
|
65
|
+
else delete g[REGISTRATION_KEY];
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
// Public entry
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
/** Subset of the registration fastembed needs. Stripped of the
|
|
73
|
+
* diagnostic fields so the LocalProvider has exactly what it should
|
|
74
|
+
* hand to `FlagEmbedding.init`. */
|
|
75
|
+
export interface VendorModelInfo {
|
|
76
|
+
modelAbsoluteDirPath: string;
|
|
77
|
+
modelName: string;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Resolve the bundled-model arguments for fastembed CUSTOM init. Returns
|
|
82
|
+
* `null` when no vendor is registered (npm-mode), so the caller can fall
|
|
83
|
+
* through to fastembed's default cacheDir/HF Hub flow.
|
|
84
|
+
*/
|
|
85
|
+
export function vendorModelInfo(): VendorModelInfo | null {
|
|
86
|
+
const reg = getRegistration();
|
|
87
|
+
if (!reg) return null;
|
|
88
|
+
return {
|
|
89
|
+
modelAbsoluteDirPath: reg.modelAbsoluteDirPath,
|
|
90
|
+
modelName: reg.modelName,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/** True iff this process is running inside a vendored Lore binary. */
|
|
95
|
+
export function isVendoredBinary(): boolean {
|
|
96
|
+
return getRegistration() !== null;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/** The full registration, for diagnostics (`lore --print-vendor-info`). */
|
|
100
|
+
export function vendorRegistration(): VendorRegistration | null {
|
|
101
|
+
return getRegistration();
|
|
102
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared message types for the embedding worker thread.
|
|
3
|
+
*
|
|
4
|
+
* The embedding worker (`embedding-worker.ts`) runs fastembed/ONNX inference
|
|
5
|
+
* in a separate `node:worker_threads` Worker so the main thread's event loop
|
|
6
|
+
* stays free during inference. This file defines the message protocol between
|
|
7
|
+
* the main thread (`LocalProvider` in `embedding.ts`) and the worker.
|
|
8
|
+
*
|
|
9
|
+
* Imported by both sides — keep this file free of runtime dependencies.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// Main thread → Worker
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
|
|
16
|
+
/** Request an embedding batch. */
|
|
17
|
+
export interface EmbedRequest {
|
|
18
|
+
type: "embed";
|
|
19
|
+
/** Monotonic request ID for correlating responses. */
|
|
20
|
+
id: number;
|
|
21
|
+
/** Texts to embed. */
|
|
22
|
+
texts: string[];
|
|
23
|
+
/** "document" for storage, "query" for search. */
|
|
24
|
+
inputType: "document" | "query";
|
|
25
|
+
/** "high" = recall queries (jump the queue), "normal" = backfill. */
|
|
26
|
+
priority: "high" | "normal";
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/** Ask the worker to exit cleanly. */
|
|
30
|
+
export interface ShutdownRequest {
|
|
31
|
+
type: "shutdown";
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export type WorkerInbound = EmbedRequest | ShutdownRequest;
|
|
35
|
+
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
// Worker → Main thread
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
/** Embedding result — vectors are Float32Array[], sent via structured clone. */
|
|
41
|
+
export interface EmbedResult {
|
|
42
|
+
type: "result";
|
|
43
|
+
/** Matches the request ID. */
|
|
44
|
+
id: number;
|
|
45
|
+
/** One Float32Array per input text. Sent via structured clone
|
|
46
|
+
* (Bun preserves Float32Array identity across threads). */
|
|
47
|
+
vectors: Float32Array[];
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/** A single embed request failed (ONNX error, etc.). */
|
|
51
|
+
export interface EmbedError {
|
|
52
|
+
type: "error";
|
|
53
|
+
/** Matches the request ID. */
|
|
54
|
+
id: number;
|
|
55
|
+
/** Human-readable error message. */
|
|
56
|
+
error: string;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** Model initialization failed inside the worker. All pending and future
|
|
60
|
+
* requests should be rejected — the worker is unusable. */
|
|
61
|
+
export interface InitError {
|
|
62
|
+
type: "init-error";
|
|
63
|
+
/** Human-readable error message. */
|
|
64
|
+
error: string;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export type WorkerOutbound = EmbedResult | EmbedError | InitError;
|
|
68
|
+
|
|
69
|
+
// ---------------------------------------------------------------------------
|
|
70
|
+
// workerData contract
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
/** Passed to the worker via `workerData` at construction time. */
|
|
74
|
+
export interface WorkerInitData {
|
|
75
|
+
/** fastembed model name, e.g. "BGESmallENV15". */
|
|
76
|
+
modelName: string;
|
|
77
|
+
/** Vendored model info for binary mode, or null for npm mode.
|
|
78
|
+
* Mirrors the `globalThis.__LORE_VENDOR_MODEL__` registration which
|
|
79
|
+
* only exists on the main thread — passed explicitly so the worker
|
|
80
|
+
* can hand it to `FlagEmbedding.init()`. */
|
|
81
|
+
vendorModel: { modelAbsoluteDirPath: string; modelName: string } | null;
|
|
82
|
+
}
|