@aexol/spectral 0.3.7 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,813 @@
1
+ import { agentLoop } from "@mariozechner/pi-agent-core";
2
+ import { Type } from "@mariozechner/pi-ai";
3
+ import { debugLog, isDebugLogEnabled } from "./debug-log.js";
4
+ import { hashId } from "./ids.js";
5
+ import { AGENT_LOOP_MAX_TOKENS, boundedMaxTokens } from "./model-budget.js";
6
+ import { observationsToPromptLines } from "./observer.js";
7
+ import { buildPrunerPassGuidance, buildReflectorPassGuidance, CONTEXT_USAGE_INSTRUCTIONS, PRUNER_SYSTEM, REFLECTOR_SYSTEM } from "./prompts.js";
8
+ import { truncateRecordContent } from "./serialize.js";
9
+ import { estimateStringTokens } from "./tokens.js";
10
+ import { reflectionContent, reflectionToPromptLine } from "./types.js";
11
+ export const REFLECTOR_MAX_PASSES = 2;
12
+ export const PRUNER_MAX_PASSES = 2;
13
+ const PRUNER_TARGET_RATIO = 0.8;
14
+ export function observationPoolTokens(observations) {
15
+ return estimateStringTokens(observationsToPromptLines(observations).join("\n"));
16
+ }
17
+ function joinReflectionsOrEmpty(items) {
18
+ return items.length ? items.map(reflectionToPromptLine).join("\n") : "(none yet)";
19
+ }
20
+ function joinObservationsOrEmpty(items) {
21
+ return items.length ? observationsToPromptLines(items).join("\n") : "(none yet)";
22
+ }
23
+ function summarizeContentTypes(content) {
24
+ if (!Array.isArray(content))
25
+ return typeof content;
26
+ return content.map((block) => {
27
+ if (block && typeof block === "object" && "type" in block) {
28
+ const type = block.type;
29
+ return typeof type === "string" ? type : typeof type;
30
+ }
31
+ return typeof block;
32
+ });
33
+ }
34
+ function summarizeAgentMessage(message) {
35
+ if (!message || typeof message !== "object")
36
+ return { type: typeof message };
37
+ const record = message;
38
+ const summary = {
39
+ role: typeof record.role === "string" ? record.role : "unknown",
40
+ };
41
+ if ("api" in record && typeof record.api === "string")
42
+ summary.api = record.api;
43
+ if ("provider" in record && typeof record.provider === "string")
44
+ summary.provider = record.provider;
45
+ if ("model" in record && typeof record.model === "string")
46
+ summary.model = record.model;
47
+ if ("stopReason" in record && typeof record.stopReason === "string")
48
+ summary.stopReason = record.stopReason;
49
+ if ("errorMessage" in record && typeof record.errorMessage === "string")
50
+ summary.errorMessage = record.errorMessage;
51
+ if ("toolName" in record && typeof record.toolName === "string")
52
+ summary.toolName = record.toolName;
53
+ if ("isError" in record && typeof record.isError === "boolean")
54
+ summary.isError = record.isError;
55
+ if ("content" in record)
56
+ summary.contentTypes = summarizeContentTypes(record.content);
57
+ return summary;
58
+ }
59
+ function finalAssistantSummary(messages) {
60
+ if (!Array.isArray(messages))
61
+ return undefined;
62
+ for (let i = messages.length - 1; i >= 0; i--) {
63
+ const message = messages[i];
64
+ if (message && typeof message === "object" && message.role === "assistant") {
65
+ return summarizeAgentMessage(message);
66
+ }
67
+ }
68
+ return undefined;
69
+ }
70
+ function summarizeToolResults(toolResults) {
71
+ if (!Array.isArray(toolResults))
72
+ return [];
73
+ return toolResults.map(summarizeAgentMessage);
74
+ }
75
+ function summarizeObjectKeys(value) {
76
+ return value && typeof value === "object" ? Object.keys(value).sort() : undefined;
77
+ }
78
+ function logAgentLoopEvent(scope, pass, event) {
79
+ switch (event.type) {
80
+ case "agent_start":
81
+ case "turn_start":
82
+ debugLog(`${scope}.agent_loop.${event.type}`, { pass });
83
+ return;
84
+ case "message_start":
85
+ case "message_end":
86
+ debugLog(`${scope}.agent_loop.${event.type}`, { pass, message: summarizeAgentMessage(event.message) });
87
+ return;
88
+ case "message_update":
89
+ debugLog(`${scope}.agent_loop.message_update`, {
90
+ pass,
91
+ message: summarizeAgentMessage(event.message),
92
+ assistantEventType: event.assistantMessageEvent.type,
93
+ });
94
+ return;
95
+ case "turn_end":
96
+ debugLog(`${scope}.agent_loop.turn_end`, {
97
+ pass,
98
+ message: summarizeAgentMessage(event.message),
99
+ toolResultCount: event.toolResults.length,
100
+ toolResults: summarizeToolResults(event.toolResults),
101
+ });
102
+ return;
103
+ case "agent_end":
104
+ debugLog(`${scope}.agent_loop.agent_end`, {
105
+ pass,
106
+ messageCount: event.messages.length,
107
+ finalAssistant: finalAssistantSummary(event.messages),
108
+ });
109
+ return;
110
+ case "tool_execution_start":
111
+ debugLog(`${scope}.agent_loop.tool_execution_start`, {
112
+ pass,
113
+ toolCallId: event.toolCallId,
114
+ toolName: event.toolName,
115
+ argsKeys: summarizeObjectKeys(event.args),
116
+ });
117
+ return;
118
+ case "tool_execution_update":
119
+ debugLog(`${scope}.agent_loop.tool_execution_update`, {
120
+ pass,
121
+ toolCallId: event.toolCallId,
122
+ toolName: event.toolName,
123
+ argsKeys: summarizeObjectKeys(event.args),
124
+ partialResultKeys: summarizeObjectKeys(event.partialResult),
125
+ });
126
+ return;
127
+ case "tool_execution_end":
128
+ debugLog(`${scope}.agent_loop.tool_execution_end`, {
129
+ pass,
130
+ toolCallId: event.toolCallId,
131
+ toolName: event.toolName,
132
+ isError: event.isError,
133
+ resultKeys: summarizeObjectKeys(event.result),
134
+ });
135
+ return;
136
+ }
137
+ }
138
+ export function deriveObservationCoverageTags(reflections, observations) {
139
+ const activeIds = new Set(observations.map((o) => o.id));
140
+ const counts = new Map();
141
+ for (const observation of observations)
142
+ counts.set(observation.id, 0);
143
+ for (const reflection of reflections) {
144
+ if (typeof reflection === "string" || reflection.legacy === true)
145
+ continue;
146
+ const citedActiveIds = new Set(reflection.supportingObservationIds.filter((id) => activeIds.has(id)));
147
+ for (const id of citedActiveIds)
148
+ counts.set(id, (counts.get(id) ?? 0) + 1);
149
+ }
150
+ const tags = new Map();
151
+ for (const observation of observations) {
152
+ const count = counts.get(observation.id) ?? 0;
153
+ tags.set(observation.id, count === 0 ? "uncited" : count >= 4 ? "reinforced" : "cited");
154
+ }
155
+ return tags;
156
+ }
157
+ export function coverageTagCounts(reflections, observations) {
158
+ const tags = deriveObservationCoverageTags(reflections, observations);
159
+ const counts = { uncited: 0, cited: 0, reinforced: 0 };
160
+ for (const observation of observations) {
161
+ counts[tags.get(observation.id) ?? "uncited"]++;
162
+ }
163
+ return counts;
164
+ }
165
+ export function renderObservationsForPrunerPrompt(observations, coverageTags) {
166
+ if (observations.length === 0)
167
+ return "(none yet)";
168
+ return observations
169
+ .map((observation) => {
170
+ const tag = coverageTags.get(observation.id) ?? "uncited";
171
+ return `[${observation.id}] ${observation.timestamp} [${observation.relevance}] [coverage: ${tag}] ${observation.content}`;
172
+ })
173
+ .join("\n");
174
+ }
175
+ export function migrateLegacyReflections(reflections) {
176
+ const migrated = [];
177
+ const contentToIndex = new Map();
178
+ for (const reflection of reflections) {
179
+ const rawContent = reflectionContent(reflection).trim();
180
+ const normalizedContent = typeof reflection === "string" ? rawContent.replace(/\s+/g, " ") : rawContent;
181
+ if (!normalizedContent) {
182
+ migrated.push(reflection);
183
+ continue;
184
+ }
185
+ const content = truncateRecordContent(normalizedContent);
186
+ const existingIndex = contentToIndex.get(content);
187
+ if (existingIndex !== undefined) {
188
+ const existing = migrated[existingIndex];
189
+ if (typeof existing !== "string" && existing.legacy === true && typeof reflection !== "string" && reflection.legacy !== true) {
190
+ migrated[existingIndex] = reflection;
191
+ }
192
+ continue;
193
+ }
194
+ if (typeof reflection !== "string") {
195
+ migrated.push(reflection);
196
+ contentToIndex.set(content, migrated.length - 1);
197
+ continue;
198
+ }
199
+ migrated.push({
200
+ id: hashId(content),
201
+ content,
202
+ supportingObservationIds: [],
203
+ legacy: true,
204
+ });
205
+ contentToIndex.set(content, migrated.length - 1);
206
+ }
207
+ return migrated;
208
+ }
209
+ const RecordReflectionsSchema = Type.Object({
210
+ reflections: Type.Array(Type.Object({
211
+ content: Type.String({
212
+ minLength: 1,
213
+ description: "Single-line plain prose reflection. No markdown, no tags, no timestamp, no bullets.",
214
+ }),
215
+ supportingObservationIds: Type.Array(Type.String({
216
+ pattern: "^[a-f0-9]{12}$",
217
+ description: "Exact observation id from the current-observations list whose durable meaning is captured by this reflection.",
218
+ }), {
219
+ minItems: 1,
220
+ description: "Current observation ids whose durable meaning is captured by this reflection and can be treated as covered active-memory detail. " +
221
+ "Do not include observations whose unique exact detail or current task state is not captured. Use only ids shown in the current observations list; never invent ids.",
222
+ }),
223
+ }), {
224
+ minItems: 1,
225
+ description: "Batch of new reflection proposals with their supporting observation ids.",
226
+ }),
227
+ });
228
+ export function normalizeSupportingObservationIds(supportingObservationIds, allowedObservationIds) {
229
+ if (!supportingObservationIds || supportingObservationIds.length === 0)
230
+ return undefined;
231
+ const allowedOrder = new Map();
232
+ for (let i = 0; i < allowedObservationIds.length; i++) {
233
+ if (!allowedOrder.has(allowedObservationIds[i]))
234
+ allowedOrder.set(allowedObservationIds[i], i);
235
+ }
236
+ const seen = new Set();
237
+ for (const id of supportingObservationIds) {
238
+ if (!allowedOrder.has(id))
239
+ return undefined;
240
+ seen.add(id);
241
+ }
242
+ if (seen.size === 0)
243
+ return undefined;
244
+ return Array.from(seen).sort((a, b) => (allowedOrder.get(a) ?? 0) - (allowedOrder.get(b) ?? 0));
245
+ }
246
+ function emptyReflectorPassStats(pass) {
247
+ return {
248
+ pass,
249
+ toolCalls: 0,
250
+ accepted: 0,
251
+ added: 0,
252
+ merged: 0,
253
+ promoted: 0,
254
+ duplicates: 0,
255
+ unsupported: 0,
256
+ failed: false,
257
+ };
258
+ }
259
+ function addReflectionProposalStats(target, result) {
260
+ target.toolCalls++;
261
+ target.accepted += result.accepted;
262
+ target.added += result.added;
263
+ target.merged += result.merged;
264
+ target.promoted += result.promoted;
265
+ target.duplicates += result.duplicates;
266
+ target.unsupported += result.unsupported;
267
+ }
268
+ function aggregateReflectorStats(passes) {
269
+ const stats = {
270
+ passes,
271
+ toolCalls: 0,
272
+ accepted: 0,
273
+ added: 0,
274
+ merged: 0,
275
+ promoted: 0,
276
+ duplicates: 0,
277
+ unsupported: 0,
278
+ };
279
+ for (const pass of passes) {
280
+ stats.toolCalls += pass.toolCalls;
281
+ stats.accepted += pass.accepted;
282
+ stats.added += pass.added;
283
+ stats.merged += pass.merged;
284
+ stats.promoted += pass.promoted;
285
+ stats.duplicates += pass.duplicates;
286
+ stats.unsupported += pass.unsupported;
287
+ if (pass.failed && stats.failedPass === undefined)
288
+ stats.failedPass = pass.pass;
289
+ }
290
+ return stats;
291
+ }
292
+ function reflectorPassContext(pass) {
293
+ return {
294
+ pass,
295
+ maxPasses: REFLECTOR_MAX_PASSES,
296
+ minSupportingObservationIds: pass === 1 ? 2 : 1,
297
+ };
298
+ }
299
+ function reflectionContentKey(reflection) {
300
+ return reflectionContent(reflection).trim();
301
+ }
302
+ function normalizeReflectionProposalContent(content) {
303
+ const normalized = truncateRecordContent(content.trim());
304
+ if (!normalized || /[\r\n]/.test(normalized))
305
+ return undefined;
306
+ return normalized;
307
+ }
308
+ function mergeSupportingObservationIds(existing, incoming, allowedObservationIds) {
309
+ const allowed = new Set(allowedObservationIds);
310
+ const historicalExisting = existing.filter((id) => !allowed.has(id));
311
+ const currentExisting = existing.filter((id) => allowed.has(id));
312
+ const normalizedCurrent = normalizeSupportingObservationIds([...currentExisting, ...incoming], allowedObservationIds);
313
+ if (!normalizedCurrent)
314
+ return undefined;
315
+ return [...historicalExisting, ...normalizedCurrent];
316
+ }
317
+ export function renderReflectionsForReflectorPrompt(reflections) {
318
+ return joinReflectionsOrEmpty(reflections);
319
+ }
320
+ export function applyReflectionProposals(reflections, proposals, allowedObservationIds, passContext) {
321
+ const next = [...reflections];
322
+ let accepted = 0;
323
+ let added = 0;
324
+ let merged = 0;
325
+ let promoted = 0;
326
+ let duplicates = 0;
327
+ let unsupported = 0;
328
+ for (const proposal of proposals) {
329
+ const content = normalizeReflectionProposalContent(proposal.content);
330
+ if (!content) {
331
+ unsupported++;
332
+ continue;
333
+ }
334
+ const supportingObservationIds = normalizeSupportingObservationIds(proposal.supportingObservationIds, allowedObservationIds);
335
+ if (!supportingObservationIds || supportingObservationIds.length < passContext.minSupportingObservationIds) {
336
+ unsupported++;
337
+ continue;
338
+ }
339
+ const existingIndex = next.findIndex((reflection) => reflectionContentKey(reflection) === content);
340
+ if (existingIndex >= 0) {
341
+ const existing = next[existingIndex];
342
+ if (typeof existing === "string") {
343
+ next[existingIndex] = {
344
+ id: hashId(content),
345
+ content,
346
+ supportingObservationIds,
347
+ };
348
+ accepted++;
349
+ promoted++;
350
+ continue;
351
+ }
352
+ const mergedSupport = mergeSupportingObservationIds(existing.supportingObservationIds, supportingObservationIds, allowedObservationIds);
353
+ if (!mergedSupport) {
354
+ unsupported++;
355
+ continue;
356
+ }
357
+ const hasNewSupport = mergedSupport.length !== existing.supportingObservationIds.length;
358
+ if (existing.legacy === true) {
359
+ next[existingIndex] = {
360
+ id: existing.id,
361
+ content: existing.content,
362
+ supportingObservationIds: mergedSupport,
363
+ };
364
+ accepted++;
365
+ promoted++;
366
+ continue;
367
+ }
368
+ if (hasNewSupport) {
369
+ next[existingIndex] = {
370
+ ...existing,
371
+ supportingObservationIds: mergedSupport,
372
+ };
373
+ accepted++;
374
+ merged++;
375
+ }
376
+ else {
377
+ duplicates++;
378
+ }
379
+ continue;
380
+ }
381
+ next.push({
382
+ id: hashId(content),
383
+ content,
384
+ supportingObservationIds,
385
+ });
386
+ accepted++;
387
+ added++;
388
+ }
389
+ return { reflections: next, accepted, added, merged, promoted, duplicates, unsupported };
390
+ }
391
+ async function runReflectorPass(args, reflections, observations, passContext) {
392
+ const allowedObservationIds = observations.map((o) => o.id);
393
+ let currentReflections = reflections;
394
+ const stats = emptyReflectorPassStats(passContext.pass);
395
+ let consecutiveEmptyCalls = 0;
396
+ debugLog("reflector.pass.start", {
397
+ pass: passContext.pass,
398
+ maxPasses: passContext.maxPasses,
399
+ minSupportingObservationIds: passContext.minSupportingObservationIds,
400
+ reflectionCount: reflections.length,
401
+ observationCount: observations.length,
402
+ observationIds: isDebugLogEnabled() ? allowedObservationIds : undefined,
403
+ });
404
+ const recordTool = {
405
+ name: "record_reflections",
406
+ label: "Record reflections",
407
+ description: "Record a batch of reflections crystallized from the observation pool, with supporting ids for observations whose durable meaning is captured. " +
408
+ "May be called multiple times. Stop calling when nothing more is stable enough to crystallize or strengthen for this pass, " +
409
+ "then emit a short plain-text confirmation.",
410
+ parameters: RecordReflectionsSchema,
411
+ execute: async (_id, params) => {
412
+ const result = applyReflectionProposals(currentReflections, params.reflections, allowedObservationIds, passContext);
413
+ currentReflections = result.reflections;
414
+ addReflectionProposalStats(stats, result);
415
+ if (result.accepted === 0) {
416
+ consecutiveEmptyCalls++;
417
+ }
418
+ else {
419
+ consecutiveEmptyCalls = 0;
420
+ }
421
+ const parts = [];
422
+ parts.push(`Accepted ${result.accepted} reflection proposal${result.accepted === 1 ? "" : "s"}.`);
423
+ if (result.added)
424
+ parts.push(`${result.added} new.`);
425
+ if (result.merged)
426
+ parts.push(`${result.merged} merged into existing reflections.`);
427
+ if (result.promoted)
428
+ parts.push(`${result.promoted} promoted from legacy/no-provenance memory.`);
429
+ if (result.duplicates)
430
+ parts.push(`${result.duplicates} duplicate/no-op proposal${result.duplicates === 1 ? "" : "s"} skipped.`);
431
+ if (result.unsupported) {
432
+ parts.push(`${result.unsupported} unsupported proposal${result.unsupported === 1 ? "" : "s"} rejected for invalid supporting observation ids or this pass's minimum support requirement.`);
433
+ }
434
+ parts.push("Call record_reflections again if more should be crystallized for this pass; otherwise stop and emit a short plain-text confirmation.");
435
+ debugLog("reflector.tool_call", {
436
+ pass: passContext.pass,
437
+ accepted: result.accepted,
438
+ added: result.added,
439
+ merged: result.merged,
440
+ promoted: result.promoted,
441
+ duplicates: result.duplicates,
442
+ unsupported: result.unsupported,
443
+ currentReflectionCount: currentReflections.length,
444
+ proposals: params.reflections.map((reflection) => ({
445
+ content: reflection.content,
446
+ supportingObservationIds: reflection.supportingObservationIds,
447
+ })),
448
+ });
449
+ return {
450
+ content: [{ type: "text", text: parts.join(" ") }],
451
+ details: result,
452
+ };
453
+ },
454
+ };
455
+ const passGuidance = buildReflectorPassGuidance(passContext.pass, passContext.maxPasses);
456
+ const userText = `CURRENT REFLECTIONS:
457
+ ${renderReflectionsForReflectorPrompt(reflections)}
458
+
459
+ CURRENT OBSERVATIONS:
460
+ ${joinObservationsOrEmpty(observations)}
461
+
462
+ REFLECTOR PASS GUIDANCE:
463
+ ${passGuidance}
464
+
465
+ Crystallize long-lived reflections from the full observation pool for this pass. Call record_reflections with batches of reflection proposals, each with supporting observation ids whose durable meaning is captured by that reflection. You may call the tool multiple times as you reason through the pool. To strengthen or promote an existing reflection, repeat the exact existing reflection content with additional valid supporting observation ids. Do not lightly reword existing reflections. Do not attach observations whose unique exact detail or current task state is not captured with equivalent fidelity. When done, stop calling the tool and emit a short plain-text confirmation.`;
466
+ const prompts = [
467
+ {
468
+ role: "user",
469
+ content: [{ type: "text", text: userText }],
470
+ timestamp: Date.now(),
471
+ },
472
+ ];
473
+ const context = {
474
+ systemPrompt: REFLECTOR_SYSTEM,
475
+ messages: [],
476
+ tools: [recordTool],
477
+ };
478
+ const reasoning = args.model.reasoning;
479
+ const effectiveMaxTurns = args.maxTurns && args.maxTurns > 0 ? args.maxTurns : undefined;
480
+ let turnCount = 0;
481
+ const config = {
482
+ model: args.model,
483
+ apiKey: args.apiKey,
484
+ headers: args.headers,
485
+ maxTokens: boundedMaxTokens(args.model, AGENT_LOOP_MAX_TOKENS),
486
+ convertToLlm: (msgs) => msgs,
487
+ toolExecution: "sequential",
488
+ ...(reasoning ? { reasoning: "high" } : {}),
489
+ shouldStopAfterTurn: () => {
490
+ turnCount++;
491
+ if (effectiveMaxTurns !== undefined && turnCount >= effectiveMaxTurns)
492
+ return true;
493
+ if (consecutiveEmptyCalls >= 2)
494
+ return true;
495
+ return false;
496
+ },
497
+ };
498
+ let firstEventSeen = false;
499
+ try {
500
+ debugLog("reflector.agent_loop.before_call", { pass: passContext.pass });
501
+ const loop = args.agentLoop ?? agentLoop;
502
+ const stream = loop(prompts, context, config, args.signal);
503
+ debugLog("reflector.agent_loop.stream_created", { pass: passContext.pass });
504
+ for await (const event of stream) {
505
+ if (!firstEventSeen) {
506
+ firstEventSeen = true;
507
+ debugLog("reflector.agent_loop.first_event", { pass: passContext.pass, type: event.type });
508
+ }
509
+ logAgentLoopEvent("reflector", passContext.pass, event);
510
+ args.onEvent?.(event);
511
+ }
512
+ await stream.result();
513
+ debugLog("reflector.pass.result", { pass: passContext.pass, stats, reflectionCount: currentReflections.length });
514
+ }
515
+ catch (error) {
516
+ stats.failed = true;
517
+ debugLog("reflector.agent_loop.error", {
518
+ pass: passContext.pass,
519
+ firstEventSeen,
520
+ errorMessage: error instanceof Error ? error.message : String(error),
521
+ });
522
+ return { reflections: currentReflections, stats };
523
+ }
524
+ return { reflections: currentReflections, stats };
525
+ }
526
+ export async function runReflector(args, reflections, observations, onPassStart) {
527
+ debugLog("reflector.start", {
528
+ reflectionCount: reflections.length,
529
+ observationCount: observations.length,
530
+ observations: isDebugLogEnabled()
531
+ ? observations.map((observation) => ({
532
+ id: observation.id,
533
+ timestamp: observation.timestamp,
534
+ relevance: observation.relevance,
535
+ content: observation.content,
536
+ sourceEntryIds: observation.sourceEntryIds,
537
+ }))
538
+ : undefined,
539
+ });
540
+ let currentReflections = reflections;
541
+ const passes = [];
542
+ for (let pass = 1; pass <= REFLECTOR_MAX_PASSES; pass++) {
543
+ onPassStart?.(pass, REFLECTOR_MAX_PASSES);
544
+ const result = await runReflectorPass(args, currentReflections, observations, reflectorPassContext(pass));
545
+ currentReflections = result.reflections;
546
+ passes.push(result.stats);
547
+ if (result.stats.failed)
548
+ break;
549
+ }
550
+ const result = { reflections: currentReflections, stats: aggregateReflectorStats(passes) };
551
+ debugLog("reflector.result", {
552
+ stats: result.stats,
553
+ reflectionCount: result.reflections.length,
554
+ reflections: isDebugLogEnabled()
555
+ ? result.reflections.map((reflection) => typeof reflection === "string" ? { legacyString: true, content: reflection } : reflection)
556
+ : undefined,
557
+ });
558
+ return result;
559
+ }
560
+ const DropObservationsSchema = Type.Object({
561
+ ids: Type.Array(Type.String({
562
+ pattern: "^[a-f0-9]{12}$",
563
+ description: "12-character hex observation id from the current-observations list.",
564
+ }), {
565
+ minItems: 1,
566
+ description: "Ids of observations to remove from the kept set.",
567
+ }),
568
+ reason: Type.Optional(Type.String({ description: "Optional short note explaining why these observations were dropped." })),
569
+ });
570
+ async function runPrunerPass(args, reflections, observations, passContext) {
571
+ const idSet = new Set(observations.map((o) => o.id));
572
+ const dropped = new Set();
573
+ let consecutiveEmptyCalls = 0;
574
+ debugLog("pruner.pass.start", {
575
+ pass: passContext.pass,
576
+ maxPasses: passContext.maxPasses,
577
+ poolTokens: passContext.poolTokens,
578
+ targetTokens: passContext.targetTokens,
579
+ deltaTokens: passContext.deltaTokens,
580
+ observationCount: observations.length,
581
+ reflectionCount: reflections.length,
582
+ observationIds: isDebugLogEnabled() ? observations.map((observation) => observation.id) : undefined,
583
+ });
584
+ const dropTool = {
585
+ name: "drop_observations",
586
+ label: "Drop observations",
587
+ description: "Remove one or more observations from the kept set by id. May be called multiple times. " +
588
+ "Stop calling when no further drops are warranted, then emit a short plain-text confirmation.",
589
+ parameters: DropObservationsSchema,
590
+ execute: async (_id, params) => {
591
+ const valid = [];
592
+ const unknown = [];
593
+ const already = [];
594
+ for (const id of params.ids) {
595
+ if (!idSet.has(id)) {
596
+ unknown.push(id);
597
+ continue;
598
+ }
599
+ if (dropped.has(id)) {
600
+ already.push(id);
601
+ continue;
602
+ }
603
+ dropped.add(id);
604
+ valid.push(id);
605
+ }
606
+ if (valid.length === 0) {
607
+ consecutiveEmptyCalls++;
608
+ }
609
+ else {
610
+ consecutiveEmptyCalls = 0;
611
+ }
612
+ const remaining = idSet.size - dropped.size;
613
+ const parts = [];
614
+ parts.push(`Dropped ${valid.length} observation${valid.length === 1 ? "" : "s"}.`);
615
+ if (unknown.length)
616
+ parts.push(`Unknown ids ignored: ${unknown.join(", ")}.`);
617
+ if (already.length)
618
+ parts.push(`Already dropped: ${already.join(", ")}.`);
619
+ parts.push(`Remaining kept: ${remaining} of ${idSet.size}.`);
620
+ parts.push("Call drop_observations again if more should be removed; otherwise stop and emit a short plain-text confirmation.");
621
+ debugLog("pruner.tool_call", {
622
+ pass: passContext.pass,
623
+ requestedIds: params.ids,
624
+ dropped: valid,
625
+ unknown,
626
+ already,
627
+ remaining,
628
+ reason: params.reason,
629
+ });
630
+ return {
631
+ content: [{ type: "text", text: parts.join(" ") }],
632
+ details: { dropped: valid, unknown, already, remaining },
633
+ };
634
+ },
635
+ };
636
+ const pressureLine = passContext.deltaTokens > 0
637
+ ? `Pool ~${passContext.poolTokens.toLocaleString()} tokens, target ~${passContext.targetTokens.toLocaleString()} tokens, still need to cut at least ~${passContext.deltaTokens.toLocaleString()} tokens.`
638
+ : `Pool ~${passContext.poolTokens.toLocaleString()} tokens, target ~${passContext.targetTokens.toLocaleString()} tokens (already under budget) — drop only clear redundancies.`;
639
+ const passGuidance = buildPrunerPassGuidance(passContext.pass, passContext.maxPasses);
640
+ const userText = `CURRENT REFLECTIONS:
641
+ ${joinReflectionsOrEmpty(reflections)}
642
+
643
+ CURRENT OBSERVATIONS:
644
+ ${renderObservationsForPrunerPrompt(observations, passContext.coverageTags)}
645
+
646
+ ${pressureLine}
647
+
648
+ ${passGuidance}
649
+
650
+ Decide which observations to remove from the kept set. Call drop_observations with the ids you want to drop. You may call the tool multiple times as you reason through the pool. When satisfied, stop calling the tool and emit a short plain-text confirmation to end the run.`;
651
+ const prompts = [
652
+ {
653
+ role: "user",
654
+ content: [{ type: "text", text: userText }],
655
+ timestamp: Date.now(),
656
+ },
657
+ ];
658
+ const context = {
659
+ systemPrompt: PRUNER_SYSTEM,
660
+ messages: [],
661
+ tools: [dropTool],
662
+ };
663
+ const reasoning = args.model.reasoning;
664
+ const effectiveMaxTurns = args.maxTurns && args.maxTurns > 0 ? args.maxTurns : undefined;
665
+ let turnCount = 0;
666
+ const config = {
667
+ model: args.model,
668
+ apiKey: args.apiKey,
669
+ headers: args.headers,
670
+ maxTokens: boundedMaxTokens(args.model, AGENT_LOOP_MAX_TOKENS),
671
+ convertToLlm: (msgs) => msgs,
672
+ toolExecution: "sequential",
673
+ ...(reasoning ? { reasoning: "high" } : {}),
674
+ shouldStopAfterTurn: () => {
675
+ turnCount++;
676
+ if (effectiveMaxTurns !== undefined && turnCount >= effectiveMaxTurns)
677
+ return true;
678
+ if (consecutiveEmptyCalls >= 2)
679
+ return true;
680
+ return false;
681
+ },
682
+ };
683
+ let agentLoopCalled = false;
684
+ let streamCreated = false;
685
+ let firstEventSeen = false;
686
+ try {
687
+ debugLog("pruner.agent_loop.before_call", { pass: passContext.pass });
688
+ const loop = args.agentLoop ?? agentLoop;
689
+ agentLoopCalled = true;
690
+ const stream = loop(prompts, context, config, args.signal);
691
+ streamCreated = true;
692
+ debugLog("pruner.agent_loop.stream_created", { pass: passContext.pass });
693
+ for await (const event of stream) {
694
+ if (!firstEventSeen) {
695
+ firstEventSeen = true;
696
+ debugLog("pruner.agent_loop.first_event", { pass: passContext.pass, type: event.type });
697
+ }
698
+ logAgentLoopEvent("pruner", passContext.pass, event);
699
+ args.onEvent?.(event);
700
+ }
701
+ await stream.result();
702
+ }
703
+ catch (error) {
704
+ debugLog("pruner.agent_loop.error", {
705
+ pass: passContext.pass,
706
+ agentLoopCalled,
707
+ streamCreated,
708
+ firstEventSeen,
709
+ errorMessage: error instanceof Error ? error.message : String(error),
710
+ });
711
+ return { kept: observations, droppedIds: [], fellBack: true };
712
+ }
713
+ const kept = observations.filter((o) => !dropped.has(o.id));
714
+ const droppedIds = Array.from(dropped);
715
+ debugLog("pruner.pass.result", {
716
+ pass: passContext.pass,
717
+ droppedIds,
718
+ dropped: droppedIds.length,
719
+ remaining: kept.length,
720
+ agentLoopCalled,
721
+ streamCreated,
722
+ firstEventSeen,
723
+ });
724
+ return { kept, droppedIds, fellBack: false };
725
+ }
726
+ export async function runPruner(args, reflections, observations, budgetTokens, onPassStart) {
727
+ debugLog("pruner.start", {
728
+ reflectionCount: reflections.length,
729
+ observationCount: observations.length,
730
+ budgetTokens,
731
+ });
732
+ if (observations.length === 0) {
733
+ const result = { observations: [], droppedIds: [], fellBack: false, passes: [], stopReason: "empty" };
734
+ debugLog("pruner.result", {
735
+ stopReason: result.stopReason,
736
+ fellBack: result.fellBack,
737
+ droppedIds: result.droppedIds,
738
+ dropped: result.droppedIds.length,
739
+ passes: result.passes,
740
+ finalObservationCount: result.observations.length,
741
+ });
742
+ return result;
743
+ }
744
+ const target = Math.max(1, Math.floor(budgetTokens * PRUNER_TARGET_RATIO));
745
+ const coverageTags = deriveObservationCoverageTags(reflections, observations);
746
+ let pool = observations;
747
+ const allDropped = [];
748
+ const passes = [];
749
+ let fellBack = false;
750
+ let stopReason;
751
+ for (let pass = 1; pass <= PRUNER_MAX_PASSES; pass++) {
752
+ const poolTokens = observationPoolTokens(pool);
753
+ if (poolTokens <= target) {
754
+ stopReason = "under_target";
755
+ debugLog("pruner.under_target", { pass, poolTokens, targetTokens: target, observationCount: pool.length });
756
+ break;
757
+ }
758
+ onPassStart?.(pass, PRUNER_MAX_PASSES);
759
+ const deltaTokens = poolTokens - target;
760
+ const result = await runPrunerPass(args, reflections, pool, {
761
+ poolTokens,
762
+ targetTokens: target,
763
+ deltaTokens,
764
+ pass,
765
+ maxPasses: PRUNER_MAX_PASSES,
766
+ coverageTags,
767
+ });
768
+ passes.push({
769
+ pass,
770
+ poolTokens,
771
+ targetTokens: target,
772
+ deltaTokens,
773
+ dropped: result.droppedIds.length,
774
+ remaining: result.kept.length,
775
+ fellBack: result.fellBack,
776
+ });
777
+ if (result.fellBack) {
778
+ fellBack = true;
779
+ stopReason = "fell_back";
780
+ break;
781
+ }
782
+ if (result.droppedIds.length === 0) {
783
+ stopReason = "zero_drops";
784
+ break;
785
+ }
786
+ pool = result.kept;
787
+ allDropped.push(...result.droppedIds);
788
+ }
789
+ stopReason ??= observationPoolTokens(pool) <= target ? "under_target" : "max_passes";
790
+ const result = { observations: pool, droppedIds: allDropped, fellBack, passes, stopReason };
791
+ debugLog("pruner.result", {
792
+ stopReason: result.stopReason,
793
+ fellBack: result.fellBack,
794
+ droppedIds: result.droppedIds,
795
+ dropped: result.droppedIds.length,
796
+ passes: result.passes,
797
+ finalObservationCount: result.observations.length,
798
+ });
799
+ return result;
800
+ }
801
+ export function renderSummary(reflections, observations) {
802
+ if (reflections.length === 0 && observations.length === 0)
803
+ return "";
804
+ const parts = [CONTEXT_USAGE_INSTRUCTIONS];
805
+ if (reflections.length > 0) {
806
+ parts.push(`## Reflections\n${reflections.map(reflectionToPromptLine).join("\n")}`);
807
+ }
808
+ if (observations.length > 0) {
809
+ const body = observationsToPromptLines(observations).join("\n");
810
+ parts.push(`## Observations\n${body}`);
811
+ }
812
+ return parts.join("\n\n");
813
+ }