omegon 0.6.9 → 0.6.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -10,163 +10,116 @@
|
|
|
10
10
|
* supersede — "This new fact replaces that old one" (by ID + new content)
|
|
11
11
|
* archive — "This fact appears stale/wrong" (by ID)
|
|
12
12
|
* connect — "These two facts are related" (global extraction only)
|
|
13
|
+
*
|
|
14
|
+
* All LLM calls use direct HTTP (llm-direct.ts) — zero subprocess overhead.
|
|
13
15
|
*/
|
|
14
16
|
|
|
15
|
-
import { spawn, type ChildProcess } from "node:child_process";
|
|
16
17
|
import type { MemoryConfig } from "./types.ts";
|
|
17
18
|
import type { Fact, Edge } from "./factstore.ts";
|
|
18
|
-
import {
|
|
19
|
+
import { chatDirect, cleanModelOutput, isCloudModel, getBudgetCloudModel } from "./llm-direct.ts";
|
|
19
20
|
|
|
20
21
|
// ---------------------------------------------------------------------------
|
|
21
|
-
//
|
|
22
|
+
// Cancellation support
|
|
22
23
|
// ---------------------------------------------------------------------------
|
|
23
24
|
|
|
24
|
-
/**
|
|
25
|
-
let
|
|
26
|
-
|
|
27
|
-
/** Track all spawned processes for cleanup on module unload */
|
|
28
|
-
const allProcs = new Set<ChildProcess>();
|
|
29
|
-
|
|
30
|
-
/** Track the active direct-HTTP extraction AbortController for cancellation */
|
|
31
|
-
let activeDirectAbort: AbortController | null = null;
|
|
32
|
-
|
|
33
|
-
function killProc(proc: ChildProcess): void {
|
|
34
|
-
try {
|
|
35
|
-
if (proc.pid) process.kill(-proc.pid, "SIGTERM");
|
|
36
|
-
} catch {
|
|
37
|
-
try { proc.kill("SIGTERM"); } catch { /* already dead */ }
|
|
38
|
-
}
|
|
39
|
-
}
|
|
25
|
+
/** Active AbortController for the current extraction — killable externally */
|
|
26
|
+
let activeAbort: AbortController | null = null;
|
|
40
27
|
|
|
41
28
|
/**
|
|
42
|
-
* Kill the active extraction
|
|
43
|
-
* Returns true if something was
|
|
29
|
+
* Kill the active extraction (abort in-flight HTTP request).
|
|
30
|
+
* Returns true if something was aborted.
|
|
44
31
|
*/
|
|
45
32
|
export function killActiveExtraction(): boolean {
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
killed = true;
|
|
51
|
-
}
|
|
52
|
-
if (activeDirectAbort) {
|
|
53
|
-
activeDirectAbort.abort();
|
|
54
|
-
activeDirectAbort = null;
|
|
55
|
-
killed = true;
|
|
33
|
+
if (activeAbort) {
|
|
34
|
+
activeAbort.abort();
|
|
35
|
+
activeAbort = null;
|
|
36
|
+
return true;
|
|
56
37
|
}
|
|
57
|
-
return
|
|
38
|
+
return false;
|
|
58
39
|
}
|
|
59
40
|
|
|
60
41
|
/**
|
|
61
|
-
* Kill
|
|
62
|
-
*
|
|
42
|
+
* Kill all active operations. Alias for killActiveExtraction since we no
|
|
43
|
+
* longer spawn subprocesses — kept for API compatibility with index.ts.
|
|
63
44
|
*/
|
|
64
45
|
export function killAllSubprocesses(): void {
|
|
65
|
-
|
|
66
|
-
killProc(proc);
|
|
67
|
-
}
|
|
68
|
-
allProcs.clear();
|
|
69
|
-
activeProc = null;
|
|
70
|
-
if (activeDirectAbort) {
|
|
71
|
-
activeDirectAbort.abort();
|
|
72
|
-
activeDirectAbort = null;
|
|
73
|
-
}
|
|
46
|
+
killActiveExtraction();
|
|
74
47
|
}
|
|
75
48
|
|
|
76
49
|
/** Check if an extraction is currently in progress */
|
|
77
50
|
export function isExtractionRunning(): boolean {
|
|
78
|
-
return
|
|
51
|
+
return activeAbort !== null;
|
|
79
52
|
}
|
|
80
53
|
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
// Shared LLM call with abort tracking
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
|
|
81
58
|
/**
|
|
82
|
-
*
|
|
83
|
-
*
|
|
59
|
+
* Run a tracked LLM call — sets activeAbort for external cancellation.
|
|
60
|
+
* Only one tracked call at a time (new call aborts previous).
|
|
84
61
|
*/
|
|
85
|
-
function
|
|
86
|
-
cwd: string;
|
|
62
|
+
async function trackedChat(opts: {
|
|
87
63
|
model: string;
|
|
88
64
|
systemPrompt: string;
|
|
89
65
|
userMessage: string;
|
|
90
66
|
timeout: number;
|
|
67
|
+
maxTokens?: number;
|
|
91
68
|
label: string;
|
|
92
69
|
}): Promise<string> {
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
}
|
|
70
|
+
// Cancel any previous tracked call
|
|
71
|
+
if (activeAbort) activeAbort.abort();
|
|
72
|
+
const controller = new AbortController();
|
|
73
|
+
activeAbort = controller;
|
|
98
74
|
|
|
99
|
-
|
|
100
|
-
const
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
];
|
|
108
|
-
|
|
109
|
-
const proc = spawn(omegon.command, args, {
|
|
110
|
-
cwd: opts.cwd,
|
|
111
|
-
stdio: ["ignore", "pipe", "pipe"],
|
|
112
|
-
// Detach into new session so child has no controlling terminal.
|
|
113
|
-
// Prevents child pi from opening /dev/tty and setting kitty keyboard
|
|
114
|
-
// protocol, which corrupts parent terminal state if child is killed.
|
|
115
|
-
detached: true,
|
|
116
|
-
env: { ...process.env, TERM: "dumb" },
|
|
75
|
+
try {
|
|
76
|
+
const result = await chatDirect({
|
|
77
|
+
model: opts.model,
|
|
78
|
+
systemPrompt: opts.systemPrompt,
|
|
79
|
+
userMessage: opts.userMessage,
|
|
80
|
+
maxTokens: opts.maxTokens ?? 2048,
|
|
81
|
+
timeout: opts.timeout,
|
|
82
|
+
signal: controller.signal,
|
|
117
83
|
});
|
|
118
|
-
|
|
119
|
-
|
|
84
|
+
return result.content;
|
|
85
|
+
} finally {
|
|
86
|
+
if (activeAbort === controller) activeAbort = null;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
120
89
|
|
|
121
|
-
|
|
122
|
-
|
|
90
|
+
/**
|
|
91
|
+
* Run an untracked LLM call — does NOT set activeAbort.
|
|
92
|
+
* Used for secondary calls (pruning, episodes) that shouldn't cancel extraction.
|
|
93
|
+
*/
|
|
94
|
+
async function untrackedChat(opts: {
|
|
95
|
+
model: string;
|
|
96
|
+
systemPrompt: string;
|
|
97
|
+
userMessage: string;
|
|
98
|
+
timeout: number;
|
|
99
|
+
maxTokens?: number;
|
|
100
|
+
}): Promise<string> {
|
|
101
|
+
const result = await chatDirect({
|
|
102
|
+
model: opts.model,
|
|
103
|
+
systemPrompt: opts.systemPrompt,
|
|
104
|
+
userMessage: opts.userMessage,
|
|
105
|
+
maxTokens: opts.maxTokens ?? 2048,
|
|
106
|
+
timeout: opts.timeout,
|
|
107
|
+
});
|
|
108
|
+
return result.content;
|
|
109
|
+
}
|
|
123
110
|
|
|
124
|
-
|
|
125
|
-
|
|
111
|
+
// ---------------------------------------------------------------------------
|
|
112
|
+
// Cloud fallback model — cheapest available for budget tasks
|
|
113
|
+
// ---------------------------------------------------------------------------
|
|
126
114
|
|
|
127
|
-
|
|
128
|
-
const killThisProc = (signal: NodeJS.Signals) => {
|
|
129
|
-
try {
|
|
130
|
-
if (proc.pid) process.kill(-proc.pid, signal);
|
|
131
|
-
} catch {
|
|
132
|
-
try { proc.kill(signal); } catch { /* already dead */ }
|
|
133
|
-
}
|
|
134
|
-
};
|
|
135
|
-
const timeoutHandle = setTimeout(() => {
|
|
136
|
-
killThisProc("SIGTERM");
|
|
137
|
-
escalationTimer = setTimeout(() => {
|
|
138
|
-
if (!proc.killed) killThisProc("SIGKILL");
|
|
139
|
-
}, 5000);
|
|
140
|
-
reject(new Error(`${opts.label} timed out`));
|
|
141
|
-
}, opts.timeout);
|
|
142
|
-
|
|
143
|
-
proc.on("close", (code) => {
|
|
144
|
-
clearTimeout(timeoutHandle);
|
|
145
|
-
if (escalationTimer) clearTimeout(escalationTimer);
|
|
146
|
-
activeProc = null;
|
|
147
|
-
allProcs.delete(proc);
|
|
148
|
-
|
|
149
|
-
const output = stdout.trim();
|
|
150
|
-
if (code === 0 && output) {
|
|
151
|
-
// Strip code fences if the model wraps output
|
|
152
|
-
const cleaned = output
|
|
153
|
-
.replace(/^```(?:jsonl?|json)?\n?/, "")
|
|
154
|
-
.replace(/\n?```\s*$/, "");
|
|
155
|
-
resolve(cleaned);
|
|
156
|
-
} else if (code === 0 && !output) {
|
|
157
|
-
resolve("");
|
|
158
|
-
} else {
|
|
159
|
-
reject(new Error(`${opts.label} failed (exit ${code}): ${stderr.slice(0, 500)}`));
|
|
160
|
-
}
|
|
161
|
-
});
|
|
115
|
+
const CLOUD_FALLBACK_MODEL = "claude-haiku-4-5";
|
|
162
116
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
});
|
|
117
|
+
function resolveModel(configModel: string): string {
|
|
118
|
+
// If the configured model is a cloud model with a key, use it directly
|
|
119
|
+
if (isCloudModel(configModel)) return configModel;
|
|
120
|
+
// If it's a local model, try it (chatDirect handles Ollama)
|
|
121
|
+
// If Ollama is down, chatDirect will throw and caller handles fallback
|
|
122
|
+
return configModel;
|
|
170
123
|
}
|
|
171
124
|
|
|
172
125
|
// ---------------------------------------------------------------------------
|
|
@@ -246,115 +199,13 @@ export function formatFactsForExtraction(facts: Fact[]): string {
|
|
|
246
199
|
return lines.join("\n");
|
|
247
200
|
}
|
|
248
201
|
|
|
249
|
-
// ---------------------------------------------------------------------------
|
|
250
|
-
// Direct Ollama extraction (no pi subprocess overhead)
|
|
251
|
-
// ---------------------------------------------------------------------------
|
|
252
|
-
|
|
253
|
-
/**
|
|
254
|
-
* Known cloud model prefixes. If a model starts with any of these, it's cloud.
|
|
255
|
-
* Everything else is assumed local (Ollama).
|
|
256
|
-
*
|
|
257
|
-
* This is an allowlist approach — new cloud providers must be added here.
|
|
258
|
-
* The alternative (detecting local by "name:tag" pattern) is too fragile
|
|
259
|
-
* since Ollama accepts bare names without tags.
|
|
260
|
-
*/
|
|
261
|
-
const CLOUD_MODEL_PREFIXES = [
|
|
262
|
-
"claude-", // Anthropic
|
|
263
|
-
"gpt-", // OpenAI
|
|
264
|
-
"o1-", "o3-", "o4-", // OpenAI reasoning
|
|
265
|
-
"gemini-", // Google
|
|
266
|
-
"mistral-", // Mistral cloud (not devstral which is local)
|
|
267
|
-
"command-", // Cohere
|
|
268
|
-
];
|
|
269
|
-
|
|
270
|
-
/**
|
|
271
|
-
* Check if extraction model is a local Ollama model.
|
|
272
|
-
* Uses an explicit cloud-prefix allowlist. Models with a "/" are assumed
|
|
273
|
-
* to be provider-qualified cloud models (e.g., "openai/gpt-4").
|
|
274
|
-
*/
|
|
275
|
-
function isLocalModel(model: string): boolean {
|
|
276
|
-
if (model.includes("/")) return false;
|
|
277
|
-
for (const prefix of CLOUD_MODEL_PREFIXES) {
|
|
278
|
-
if (model.startsWith(prefix)) return false;
|
|
279
|
-
}
|
|
280
|
-
return true;
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
/** Fallback cloud model when local extraction fails and Ollama is unreachable. */
|
|
284
|
-
const CLOUD_FALLBACK_MODEL = "claude-sonnet-4-6";
|
|
285
|
-
|
|
286
|
-
/**
|
|
287
|
-
* Run extraction directly via Ollama HTTP API.
|
|
288
|
-
* ~10x faster than spawning a pi subprocess — no process startup overhead.
|
|
289
|
-
* Returns null if Ollama is unreachable (caller should fall back to subprocess).
|
|
290
|
-
*/
|
|
291
|
-
async function runExtractionDirect(
|
|
292
|
-
systemPrompt: string,
|
|
293
|
-
userMessage: string,
|
|
294
|
-
config: MemoryConfig,
|
|
295
|
-
opts?: { ollamaUrl?: string },
|
|
296
|
-
): Promise<string | null> {
|
|
297
|
-
const baseUrl = opts?.ollamaUrl ?? process.env.LOCAL_INFERENCE_URL ?? "http://localhost:11434";
|
|
298
|
-
const timeout = config.extractionTimeout;
|
|
299
|
-
|
|
300
|
-
// Create an AbortController that can be killed externally via killActiveExtraction().
|
|
301
|
-
// Combines our controller with a timeout signal so either trigger aborts the fetch.
|
|
302
|
-
const controller = new AbortController();
|
|
303
|
-
activeDirectAbort = controller;
|
|
304
|
-
|
|
305
|
-
try {
|
|
306
|
-
const resp = await fetch(`${baseUrl}/api/chat`, {
|
|
307
|
-
method: "POST",
|
|
308
|
-
headers: { "Content-Type": "application/json" },
|
|
309
|
-
body: JSON.stringify({
|
|
310
|
-
model: config.extractionModel,
|
|
311
|
-
stream: false,
|
|
312
|
-
options: {
|
|
313
|
-
temperature: 0.2,
|
|
314
|
-
num_predict: 2048,
|
|
315
|
-
num_ctx: 32768,
|
|
316
|
-
},
|
|
317
|
-
messages: [
|
|
318
|
-
{ role: "system", content: systemPrompt },
|
|
319
|
-
{ role: "user", content: userMessage },
|
|
320
|
-
],
|
|
321
|
-
}),
|
|
322
|
-
signal: typeof AbortSignal.any === "function"
|
|
323
|
-
? AbortSignal.any([controller.signal, AbortSignal.timeout(timeout)])
|
|
324
|
-
: controller.signal, // Node <20.3: external abort works, timeout relies on Ollama's own
|
|
325
|
-
});
|
|
326
|
-
|
|
327
|
-
if (!resp.ok) return null;
|
|
328
|
-
|
|
329
|
-
const data = await resp.json() as { message?: { content?: string } };
|
|
330
|
-
const raw = data.message?.content?.trim();
|
|
331
|
-
if (!raw) return null;
|
|
332
|
-
|
|
333
|
-
// Strip code fences and <think> blocks from reasoning models
|
|
334
|
-
return raw
|
|
335
|
-
.replace(/^```(?:jsonl?|json)?\n?/, "")
|
|
336
|
-
.replace(/\n?```\s*$/, "")
|
|
337
|
-
.replace(/<think>[\s\S]*?<\/think>\s*/g, "")
|
|
338
|
-
.trim();
|
|
339
|
-
} catch {
|
|
340
|
-
return null;
|
|
341
|
-
} finally {
|
|
342
|
-
if (activeDirectAbort === controller) {
|
|
343
|
-
activeDirectAbort = null;
|
|
344
|
-
}
|
|
345
|
-
}
|
|
346
|
-
}
|
|
347
|
-
|
|
348
202
|
/**
|
|
349
203
|
* Run project extraction (Phase 1).
|
|
350
204
|
* Returns raw JSONL output from the extraction agent.
|
|
351
|
-
*
|
|
352
|
-
* When extractionModel is a local model, talks directly to Ollama HTTP API
|
|
353
|
-
* (no subprocess overhead). Falls back to pi subprocess for cloud models
|
|
354
|
-
* or if Ollama is unreachable.
|
|
205
|
+
* Uses direct HTTP — no subprocess spawning.
|
|
355
206
|
*/
|
|
356
207
|
export async function runExtractionV2(
|
|
357
|
-
|
|
208
|
+
_cwd: string,
|
|
358
209
|
currentFacts: Fact[],
|
|
359
210
|
recentConversation: string,
|
|
360
211
|
config: MemoryConfig,
|
|
@@ -370,21 +221,32 @@ export async function runExtractionV2(
|
|
|
370
221
|
"\n\nOutput JSONL actions based on what you observe.",
|
|
371
222
|
].join("");
|
|
372
223
|
|
|
373
|
-
|
|
374
|
-
if (isLocalModel(config.extractionModel)) {
|
|
375
|
-
const result = await runExtractionDirect(prompt, userMessage, config);
|
|
376
|
-
if (result !== null) return result;
|
|
377
|
-
// Ollama unreachable — fall through to subprocess with cloud fallback
|
|
378
|
-
}
|
|
224
|
+
const model = resolveModel(config.extractionModel);
|
|
379
225
|
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
226
|
+
try {
|
|
227
|
+
return await trackedChat({
|
|
228
|
+
model,
|
|
229
|
+
systemPrompt: prompt,
|
|
230
|
+
userMessage,
|
|
231
|
+
timeout: config.extractionTimeout,
|
|
232
|
+
label: "Project extraction",
|
|
233
|
+
});
|
|
234
|
+
} catch (err) {
|
|
235
|
+
// If configured model failed (e.g., Ollama down), try cloud fallback
|
|
236
|
+
if (!isCloudModel(model)) {
|
|
237
|
+
const fallback = getBudgetCloudModel();
|
|
238
|
+
if (fallback) {
|
|
239
|
+
return await trackedChat({
|
|
240
|
+
model: fallback,
|
|
241
|
+
systemPrompt: prompt,
|
|
242
|
+
userMessage,
|
|
243
|
+
timeout: config.extractionTimeout,
|
|
244
|
+
label: "Project extraction (cloud fallback)",
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
throw err;
|
|
249
|
+
}
|
|
388
250
|
}
|
|
389
251
|
|
|
390
252
|
// ---------------------------------------------------------------------------
|
|
@@ -486,10 +348,10 @@ export function formatGlobalExtractionInput(
|
|
|
486
348
|
/**
|
|
487
349
|
* Run global extraction (Phase 2).
|
|
488
350
|
* Only called when Phase 1 produced new facts.
|
|
489
|
-
* Uses direct
|
|
351
|
+
* Uses direct HTTP — no subprocess spawning.
|
|
490
352
|
*/
|
|
491
353
|
export async function runGlobalExtraction(
|
|
492
|
-
|
|
354
|
+
_cwd: string,
|
|
493
355
|
newProjectFacts: Fact[],
|
|
494
356
|
globalFacts: Fact[],
|
|
495
357
|
globalEdges: Edge[],
|
|
@@ -502,22 +364,31 @@ export async function runGlobalExtraction(
|
|
|
502
364
|
"\n\nOutput JSONL actions: promote generalizable facts and identify connections between GLOBAL facts.",
|
|
503
365
|
].join("");
|
|
504
366
|
|
|
505
|
-
const
|
|
367
|
+
const model = resolveModel(config.extractionModel);
|
|
506
368
|
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
369
|
+
try {
|
|
370
|
+
return await trackedChat({
|
|
371
|
+
model,
|
|
372
|
+
systemPrompt: buildGlobalExtractionPrompt(),
|
|
373
|
+
userMessage,
|
|
374
|
+
timeout: config.extractionTimeout,
|
|
375
|
+
label: "Global extraction",
|
|
376
|
+
});
|
|
377
|
+
} catch (err) {
|
|
378
|
+
if (!isCloudModel(model)) {
|
|
379
|
+
const fallback = getBudgetCloudModel();
|
|
380
|
+
if (fallback) {
|
|
381
|
+
return await trackedChat({
|
|
382
|
+
model: fallback,
|
|
383
|
+
systemPrompt: buildGlobalExtractionPrompt(),
|
|
384
|
+
userMessage,
|
|
385
|
+
timeout: config.extractionTimeout,
|
|
386
|
+
label: "Global extraction (cloud fallback)",
|
|
387
|
+
});
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
throw err;
|
|
511
391
|
}
|
|
512
|
-
|
|
513
|
-
return spawnExtraction({
|
|
514
|
-
cwd,
|
|
515
|
-
model: isLocalModel(config.extractionModel) ? CLOUD_FALLBACK_MODEL : config.extractionModel,
|
|
516
|
-
systemPrompt,
|
|
517
|
-
userMessage,
|
|
518
|
-
timeout: config.extractionTimeout,
|
|
519
|
-
label: "Global extraction",
|
|
520
|
-
});
|
|
521
392
|
}
|
|
522
393
|
|
|
523
394
|
// ---------------------------------------------------------------------------
|
|
@@ -558,96 +429,60 @@ export interface SessionTelemetry {
|
|
|
558
429
|
}
|
|
559
430
|
|
|
560
431
|
/**
|
|
561
|
-
* Generate a session episode via direct
|
|
562
|
-
*
|
|
563
|
-
* Falls back to subprocess-based generation if Ollama is unreachable.
|
|
432
|
+
* Generate a session episode via direct LLM call.
|
|
433
|
+
* Uses chatDirect — no subprocess. Tries configured model, falls back to budget cloud.
|
|
564
434
|
*/
|
|
565
435
|
export async function generateEpisodeDirect(
|
|
566
436
|
recentConversation: string,
|
|
567
437
|
config: MemoryConfig,
|
|
568
|
-
opts?: { ollamaUrl?: string; model?: string },
|
|
569
438
|
): Promise<EpisodeOutput | null> {
|
|
570
|
-
const
|
|
571
|
-
const model = opts?.model ?? process.env.LOCAL_EPISODE_MODEL ?? "qwen3:30b";
|
|
439
|
+
const userMessage = `Session conversation:\n\n${recentConversation}\n\nOutput the episode JSON.`;
|
|
572
440
|
const timeout = Math.min(config.shutdownExtractionTimeout, 10_000);
|
|
573
441
|
|
|
442
|
+
// Try configured extraction model first
|
|
574
443
|
try {
|
|
575
|
-
const
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
options: { temperature: 0.3, num_predict: 512 },
|
|
582
|
-
messages: [
|
|
583
|
-
{ role: "system", content: EPISODE_PROMPT },
|
|
584
|
-
{ role: "user", content: `Session conversation:\n\n${recentConversation}\n\nOutput the episode JSON.` },
|
|
585
|
-
],
|
|
586
|
-
}),
|
|
587
|
-
signal: AbortSignal.timeout(timeout),
|
|
444
|
+
const raw = await untrackedChat({
|
|
445
|
+
model: resolveModel(config.extractionModel),
|
|
446
|
+
systemPrompt: EPISODE_PROMPT,
|
|
447
|
+
userMessage,
|
|
448
|
+
timeout,
|
|
449
|
+
maxTokens: 512,
|
|
588
450
|
});
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
const data = await resp.json() as { message?: { content?: string } };
|
|
593
|
-
const raw = data.message?.content?.trim();
|
|
594
|
-
if (!raw) return null;
|
|
595
|
-
|
|
596
|
-
const cleaned = raw
|
|
597
|
-
.replace(/^```(?:json)?\n?/, "")
|
|
598
|
-
.replace(/\n?```\s*$/, "")
|
|
599
|
-
// Strip <think>...</think> blocks from reasoning models
|
|
600
|
-
.replace(/<think>[\s\S]*?<\/think>\s*/g, "")
|
|
601
|
-
.trim();
|
|
602
|
-
const parsed = JSON.parse(cleaned);
|
|
603
|
-
|
|
604
|
-
if (parsed.title && parsed.narrative) {
|
|
605
|
-
return { title: parsed.title, narrative: parsed.narrative };
|
|
451
|
+
if (raw) {
|
|
452
|
+
const parsed = JSON.parse(raw);
|
|
453
|
+
if (parsed.title && parsed.narrative) return parsed as EpisodeOutput;
|
|
606
454
|
}
|
|
607
|
-
return null;
|
|
608
455
|
} catch {
|
|
609
|
-
|
|
456
|
+
// Fall through
|
|
610
457
|
}
|
|
611
|
-
}
|
|
612
458
|
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
});
|
|
631
|
-
|
|
632
|
-
if (!raw.trim()) return null;
|
|
633
|
-
|
|
634
|
-
// Strip any markdown code fences
|
|
635
|
-
const cleaned = raw.replace(/^```(?:json)?\n?/, "").replace(/\n?```\s*$/, "").trim();
|
|
636
|
-
const parsed = JSON.parse(cleaned);
|
|
637
|
-
|
|
638
|
-
if (parsed.title && parsed.narrative) {
|
|
639
|
-
return { title: parsed.title, narrative: parsed.narrative };
|
|
459
|
+
// Try budget cloud model
|
|
460
|
+
const budgetModel = getBudgetCloudModel();
|
|
461
|
+
if (budgetModel && budgetModel !== config.extractionModel) {
|
|
462
|
+
try {
|
|
463
|
+
const raw = await untrackedChat({
|
|
464
|
+
model: budgetModel,
|
|
465
|
+
systemPrompt: EPISODE_PROMPT,
|
|
466
|
+
userMessage,
|
|
467
|
+
timeout,
|
|
468
|
+
maxTokens: 512,
|
|
469
|
+
});
|
|
470
|
+
if (raw) {
|
|
471
|
+
const parsed = JSON.parse(raw);
|
|
472
|
+
if (parsed.title && parsed.narrative) return parsed as EpisodeOutput;
|
|
473
|
+
}
|
|
474
|
+
} catch {
|
|
475
|
+
// Fall through
|
|
640
476
|
}
|
|
641
|
-
return null;
|
|
642
|
-
} catch {
|
|
643
|
-
return null;
|
|
644
477
|
}
|
|
478
|
+
|
|
479
|
+
return null;
|
|
645
480
|
}
|
|
646
481
|
|
|
647
482
|
/**
|
|
648
483
|
* Build a minimum viable episode from raw session telemetry.
|
|
649
484
|
* Zero I/O — assembled deterministically from already-collected data.
|
|
650
|
-
* This is the guaranteed floor: always emitted when every model
|
|
485
|
+
* This is the guaranteed floor: always emitted when every model fails.
|
|
651
486
|
*/
|
|
652
487
|
export function buildTemplateEpisode(telemetry: SessionTelemetry): EpisodeOutput {
|
|
653
488
|
const allModified = [...new Set([...telemetry.filesWritten, ...telemetry.filesEdited])];
|
|
@@ -684,102 +519,20 @@ export function buildTemplateEpisode(telemetry: SessionTelemetry): EpisodeOutput
|
|
|
684
519
|
}
|
|
685
520
|
|
|
686
521
|
/**
|
|
687
|
-
* Generate a session episode with
|
|
688
|
-
* 1.
|
|
689
|
-
* 2.
|
|
690
|
-
* 3. Ollama (direct HTTP — only if user has LOCAL_EPISODE_MODEL configured)
|
|
691
|
-
* 4. Template episode (deterministic, zero I/O) — always succeeds
|
|
692
|
-
*
|
|
693
|
-
* Cloud is first because: (1) it's always available if pi is configured at all,
|
|
694
|
-
* (2) retribution-tier cost is negligible (~$0.0001/call), (3) model quality
|
|
695
|
-
* is substantially better than typical local models for narrative generation.
|
|
696
|
-
* Ollama is tried last as an optional local preference, not a dependency.
|
|
522
|
+
* Generate a session episode with fallback chain:
|
|
523
|
+
* 1. Direct LLM call (configured model → budget cloud)
|
|
524
|
+
* 2. Template episode (deterministic, zero I/O) — always succeeds
|
|
697
525
|
*
|
|
698
|
-
*
|
|
699
|
-
* chain fits within config.shutdownExtractionTimeout.
|
|
526
|
+
* No subprocess spawning. Total time bounded by config timeouts.
|
|
700
527
|
*/
|
|
701
528
|
export async function generateEpisodeWithFallback(
|
|
702
529
|
recentConversation: string,
|
|
703
530
|
telemetry: SessionTelemetry,
|
|
704
531
|
config: MemoryConfig,
|
|
705
|
-
|
|
532
|
+
_cwd: string,
|
|
706
533
|
): Promise<EpisodeOutput> {
|
|
707
|
-
const
|
|
708
|
-
|
|
709
|
-
Math.floor(config.shutdownExtractionTimeout / 3),
|
|
710
|
-
);
|
|
711
|
-
|
|
712
|
-
if (config.episodeFallbackChain) {
|
|
713
|
-
// Step 1: Cloud primary (episodeModel — codex-spark by default)
|
|
714
|
-
// Always available if the user has a provider configured.
|
|
715
|
-
try {
|
|
716
|
-
const raw = await spawnExtraction({
|
|
717
|
-
cwd,
|
|
718
|
-
model: config.episodeModel,
|
|
719
|
-
systemPrompt: EPISODE_PROMPT,
|
|
720
|
-
userMessage: `Session conversation:\n\n${recentConversation}\n\nOutput the episode JSON.`,
|
|
721
|
-
timeout: stepTimeout,
|
|
722
|
-
label: "Episode generation (primary)",
|
|
723
|
-
});
|
|
724
|
-
if (raw.trim()) {
|
|
725
|
-
const cleaned = raw.replace(/^```(?:json)?\n?/, "").replace(/\n?```\s*$/, "").trim();
|
|
726
|
-
const parsed = JSON.parse(cleaned);
|
|
727
|
-
if (parsed.title && parsed.narrative) return parsed as EpisodeOutput;
|
|
728
|
-
}
|
|
729
|
-
} catch {
|
|
730
|
-
// Fall through
|
|
731
|
-
}
|
|
732
|
-
|
|
733
|
-
// Step 2: Cloud retribution tier (haiku — fast, cheap, independent model)
|
|
734
|
-
try {
|
|
735
|
-
const raw = await spawnExtraction({
|
|
736
|
-
cwd,
|
|
737
|
-
model: "claude-haiku-4-5",
|
|
738
|
-
systemPrompt: EPISODE_PROMPT,
|
|
739
|
-
userMessage: `Session conversation:\n\n${recentConversation}\n\nOutput the episode JSON.`,
|
|
740
|
-
timeout: stepTimeout,
|
|
741
|
-
label: "Episode generation (retribution fallback)",
|
|
742
|
-
});
|
|
743
|
-
if (raw.trim()) {
|
|
744
|
-
const cleaned = raw.replace(/^```(?:json)?\n?/, "").replace(/\n?```\s*$/, "").trim();
|
|
745
|
-
const parsed = JSON.parse(cleaned);
|
|
746
|
-
if (parsed.title && parsed.narrative) return parsed as EpisodeOutput;
|
|
747
|
-
}
|
|
748
|
-
} catch {
|
|
749
|
-
// Fall through
|
|
750
|
-
}
|
|
751
|
-
|
|
752
|
-
// Step 3: Ollama (optional — only meaningful if user has a local model running)
|
|
753
|
-
if (process.env.LOCAL_EPISODE_MODEL || process.env.LOCAL_INFERENCE_URL) {
|
|
754
|
-
try {
|
|
755
|
-
const result = await generateEpisodeDirect(recentConversation, config);
|
|
756
|
-
if (result) return result;
|
|
757
|
-
} catch {
|
|
758
|
-
// Fall through to template
|
|
759
|
-
}
|
|
760
|
-
}
|
|
761
|
-
} else {
|
|
762
|
-
// Chain disabled — try cloud primary only, no Ollama
|
|
763
|
-
try {
|
|
764
|
-
const raw = await spawnExtraction({
|
|
765
|
-
cwd,
|
|
766
|
-
model: config.episodeModel,
|
|
767
|
-
systemPrompt: EPISODE_PROMPT,
|
|
768
|
-
userMessage: `Session conversation:\n\n${recentConversation}\n\nOutput the episode JSON.`,
|
|
769
|
-
timeout: stepTimeout,
|
|
770
|
-
label: "Episode generation",
|
|
771
|
-
});
|
|
772
|
-
if (raw.trim()) {
|
|
773
|
-
const cleaned = raw.replace(/^```(?:json)?\n?/, "").replace(/\n?```\s*$/, "").trim();
|
|
774
|
-
const parsed = JSON.parse(cleaned);
|
|
775
|
-
if (parsed.title && parsed.narrative) return parsed as EpisodeOutput;
|
|
776
|
-
}
|
|
777
|
-
} catch {
|
|
778
|
-
// Fall through
|
|
779
|
-
}
|
|
780
|
-
}
|
|
781
|
-
|
|
782
|
-
// Step 4: Template episode — guaranteed floor, zero I/O
|
|
534
|
+
const result = await generateEpisodeDirect(recentConversation, config);
|
|
535
|
+
if (result) return result;
|
|
783
536
|
return buildTemplateEpisode(telemetry);
|
|
784
537
|
}
|
|
785
538
|
|
|
@@ -803,6 +556,7 @@ Rules:
|
|
|
803
556
|
/**
|
|
804
557
|
* Run a targeted LLM archival pass over a single section when it exceeds the ceiling.
|
|
805
558
|
* Returns the list of fact IDs recommended for archival.
|
|
559
|
+
* Uses direct HTTP — no subprocess spawning.
|
|
806
560
|
*/
|
|
807
561
|
export async function runSectionPruningPass(
|
|
808
562
|
section: string,
|
|
@@ -827,38 +581,56 @@ export async function runSectionPruningPass(
|
|
|
827
581
|
`Return a JSON array of fact IDs to archive. Archive at least ${excessCount} to bring the section under ${targetCount + 1}.`,
|
|
828
582
|
].join("\n");
|
|
829
583
|
|
|
830
|
-
|
|
831
|
-
if (isLocalModel(config.extractionModel)) {
|
|
832
|
-
try {
|
|
833
|
-
const raw = await runExtractionDirect(SECTION_PRUNING_PROMPT, userMessage, config);
|
|
834
|
-
if (raw) {
|
|
835
|
-
const cleaned = raw.replace(/^```(?:json)?\n?/, "").replace(/\n?```\s*$/, "").trim();
|
|
836
|
-
const parsed = JSON.parse(cleaned);
|
|
837
|
-
if (Array.isArray(parsed)) return parsed.filter((id: unknown) => typeof id === "string");
|
|
838
|
-
}
|
|
839
|
-
} catch {
|
|
840
|
-
// Fall through to cloud
|
|
841
|
-
}
|
|
842
|
-
}
|
|
584
|
+
const model = resolveModel(config.extractionModel);
|
|
843
585
|
|
|
844
|
-
// Cloud fallback: use episodeModel (cloud tier, always available)
|
|
845
586
|
try {
|
|
846
|
-
const raw = await
|
|
847
|
-
|
|
848
|
-
model: config.episodeModel,
|
|
587
|
+
const raw = await untrackedChat({
|
|
588
|
+
model,
|
|
849
589
|
systemPrompt: SECTION_PRUNING_PROMPT,
|
|
850
590
|
userMessage,
|
|
851
591
|
timeout: 30_000,
|
|
852
|
-
|
|
592
|
+
maxTokens: 1024,
|
|
853
593
|
});
|
|
854
|
-
if (raw
|
|
855
|
-
const
|
|
856
|
-
const parsed = JSON.parse(cleaned);
|
|
594
|
+
if (raw) {
|
|
595
|
+
const parsed = JSON.parse(raw);
|
|
857
596
|
if (Array.isArray(parsed)) return parsed.filter((id: unknown) => typeof id === "string");
|
|
858
597
|
}
|
|
859
598
|
} catch {
|
|
860
|
-
//
|
|
599
|
+
// Try budget cloud fallback
|
|
600
|
+
const fallback = getBudgetCloudModel();
|
|
601
|
+
if (fallback && fallback !== model) {
|
|
602
|
+
try {
|
|
603
|
+
const raw = await untrackedChat({
|
|
604
|
+
model: fallback,
|
|
605
|
+
systemPrompt: SECTION_PRUNING_PROMPT,
|
|
606
|
+
userMessage,
|
|
607
|
+
timeout: 30_000,
|
|
608
|
+
maxTokens: 1024,
|
|
609
|
+
});
|
|
610
|
+
if (raw) {
|
|
611
|
+
const parsed = JSON.parse(raw);
|
|
612
|
+
if (Array.isArray(parsed)) return parsed.filter((id: unknown) => typeof id === "string");
|
|
613
|
+
}
|
|
614
|
+
} catch {
|
|
615
|
+
// Best effort
|
|
616
|
+
}
|
|
617
|
+
}
|
|
861
618
|
}
|
|
862
619
|
|
|
863
620
|
return [];
|
|
864
621
|
}
|
|
622
|
+
|
|
623
|
+
// ---------------------------------------------------------------------------
|
|
624
|
+
// Legacy API shim — generateEpisode (subprocess-based) now delegates to direct
|
|
625
|
+
// ---------------------------------------------------------------------------
|
|
626
|
+
|
|
627
|
+
/**
|
|
628
|
+
* @deprecated Use generateEpisodeDirect instead. Kept for API compatibility.
|
|
629
|
+
*/
|
|
630
|
+
export async function generateEpisode(
|
|
631
|
+
_cwd: string,
|
|
632
|
+
recentConversation: string,
|
|
633
|
+
config: MemoryConfig,
|
|
634
|
+
): Promise<EpisodeOutput | null> {
|
|
635
|
+
return generateEpisodeDirect(recentConversation, config);
|
|
636
|
+
}
|