@redstone-md/mapr 0.0.4-alpha → 0.0.6-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,26 +1,28 @@
1
1
  import { z } from "zod";
2
- import { Buffer } from "buffer";
3
2
 
3
+ import type { AgentMemo, ArtifactSummary, BundleAnalysis, ChunkAnalysis } from "./analysis-schema";
4
4
  import {
5
5
  agentMemoSchema,
6
- artifactSummarySchema,
7
6
  buildAnalysisSnapshot,
8
7
  chunkAnalysisSchema,
9
8
  finalAnalysisSchema,
10
- type AgentMemo,
11
- type ArtifactSummary,
12
- type BundleAnalysis,
13
- type ChunkAnalysis,
14
9
  PartialAnalysisError,
15
10
  } from "./analysis-schema";
11
+ import { createFallbackAgentMemo, createFallbackChunkAnalysis } from "./analysis-fallback";
12
+ import {
13
+ chunkTextByBytes,
14
+ createPromptEnvelope,
15
+ deriveChunkSizeBytes,
16
+ formatAgentTelemetrySuffix,
17
+ normalizeAiError,
18
+ } from "./analysis-helpers";
16
19
  import { generateObjectFromStream, type StreamedObjectTelemetry } from "./ai-json";
17
20
  import { artifactTypeSchema } from "./artifacts";
18
21
  import type { FormattedArtifact } from "./formatter";
19
22
  import { LocalArtifactRag } from "./local-rag";
23
+ import { mapWithConcurrency } from "./promise-pool";
20
24
  import { AiProviderClient, type AiProviderConfig } from "./provider";
21
- import { SWARM_AGENT_ORDER, getGlobalMissionPrompt, getSwarmAgentPrompt, type SwarmAgentName } from "./swarm-prompts";
22
-
23
- export const DEFAULT_CHUNK_SIZE_BYTES = 80 * 1024;
25
+ import { getGlobalMissionPrompt, getSwarmAgentPrompt, SWARM_AGENT_ORDER, type SwarmAgentName } from "./swarm-prompts";
24
26
 
25
27
  const analyzeInputSchema = z.object({
26
28
  pageUrl: z.string().url(),
@@ -37,6 +39,7 @@ const analyzeInputSchema = z.object({
37
39
  }),
38
40
  ),
39
41
  });
42
+
40
43
  export type AnalysisProgressStage = "artifact" | "chunk" | "agent";
41
44
  export type AnalysisProgressState = "started" | "streaming" | "completed";
42
45
 
@@ -59,120 +62,37 @@ interface AnalyzerOptions {
59
62
  providerConfig: AiProviderConfig;
60
63
  chunkSizeBytes?: number;
61
64
  localRag?: boolean;
65
+ analysisConcurrency?: number;
62
66
  onProgress?: (event: AnalysisProgressEvent) => void;
63
67
  }
64
68
 
65
- function createPromptEnvelope(input: {
69
+ interface ChunkTaskInput {
66
70
  pageUrl: string;
67
71
  artifact: FormattedArtifact;
68
72
  chunk: string;
69
73
  chunkIndex: number;
70
74
  totalChunks: number;
71
- memory?: unknown;
72
- retrievedContext?: string[];
73
- }): string {
74
- return [
75
- `Target page: ${input.pageUrl}`,
76
- `Artifact URL: ${input.artifact.url}`,
77
- `Artifact type: ${input.artifact.type}`,
78
- `Discovered from: ${input.artifact.discoveredFrom}`,
79
- `Chunk ${input.chunkIndex + 1} of ${input.totalChunks}`,
80
- input.artifact.formattingNote ? `Formatting note: ${input.artifact.formattingNote}` : "Formatting note: none",
81
- input.memory ? `Swarm memory:\n${JSON.stringify(input.memory, null, 2)}` : "Swarm memory: none yet",
82
- input.retrievedContext && input.retrievedContext.length > 0
83
- ? `Local RAG evidence:\n${input.retrievedContext.map((segment, index) => `Segment ${index + 1}:\n${segment}`).join("\n\n")}`
84
- : "Local RAG evidence: none",
85
- "Artifact content:",
86
- "```text",
87
- input.chunk,
88
- "```",
89
- ].join("\n\n");
90
- }
91
-
92
- function findSplitBoundary(source: string, start: number, end: number): number {
93
- const minimumPreferredIndex = start + Math.max(1, Math.floor((end - start) * 0.6));
94
- const preferredDelimiters = new Set(["\n", ";", "}", " ", ","]);
95
-
96
- for (let cursor = end - 1; cursor >= minimumPreferredIndex; cursor -= 1) {
97
- const character = source[cursor];
98
- if (character && preferredDelimiters.has(character)) {
99
- return cursor + 1;
100
- }
101
- }
102
-
103
- return end;
104
- }
105
-
106
- export function deriveChunkSizeBytes(modelContextSize: number): number {
107
- const validatedContextSize = z.number().int().positive().parse(modelContextSize);
108
- const derived = Math.floor(validatedContextSize * 0.9);
109
- return Math.max(DEFAULT_CHUNK_SIZE_BYTES, derived);
110
- }
111
-
112
- export function chunkTextByBytes(source: string, maxBytes = DEFAULT_CHUNK_SIZE_BYTES): string[] {
113
- const validatedSource = z.string().parse(source);
114
- const validatedMaxBytes = z.number().int().positive().parse(maxBytes);
115
-
116
- if (validatedSource.length === 0) {
117
- return [];
118
- }
119
-
120
- const chunks: string[] = [];
121
- let start = 0;
122
-
123
- while (start < validatedSource.length) {
124
- let end = Math.min(validatedSource.length, start + validatedMaxBytes);
125
-
126
- while (end > start && Buffer.byteLength(validatedSource.slice(start, end), "utf8") > validatedMaxBytes) {
127
- end -= 1;
128
- }
129
-
130
- if (end <= start) {
131
- end = start + 1;
132
- }
133
-
134
- const splitAt = end === validatedSource.length ? end : findSplitBoundary(validatedSource, start, end);
135
- chunks.push(validatedSource.slice(start, splitAt));
136
- start = splitAt;
137
- }
138
-
139
- return chunks;
140
- }
141
-
142
- function normalizeAiError(error: unknown): Error {
143
- if (!(error instanceof Error)) {
144
- return new Error("AI analysis failed with an unknown error.");
145
- }
146
-
147
- const message = error.message.toLowerCase();
148
- if (message.includes("rate limit")) {
149
- return new Error("Provider rate limit hit during analysis. Please retry in a moment.");
150
- }
151
-
152
- if (message.includes("api key")) {
153
- return new Error("The configured API key was rejected by the provider.");
154
- }
155
-
156
- return error;
75
+ artifactIndex: number;
76
+ artifactCount: number;
77
+ localRag: LocalArtifactRag | null;
157
78
  }
158
79
 
159
- function formatAgentTelemetrySuffix(telemetry: StreamedObjectTelemetry): string {
160
- const tokenCount = telemetry.outputTokens ?? telemetry.estimatedOutputTokens;
161
- const tokenLabel = telemetry.outputTokens !== undefined ? `${tokenCount} tok` : `~${tokenCount} tok`;
162
- const tpsLabel = telemetry.tokensPerSecond !== undefined ? ` ${telemetry.tokensPerSecond} tps` : "";
163
- return ` [${tokenLabel}${tpsLabel}]`;
164
- }
80
+ export { chunkTextByBytes, deriveChunkSizeBytes } from "./analysis-helpers";
165
81
 
166
82
  export class AiBundleAnalyzer {
167
83
  private readonly providerClient: AiProviderClient;
168
84
  private readonly chunkSizeBytes: number;
169
85
  private readonly localRagEnabled: boolean;
86
+ private readonly analysisConcurrency: number;
170
87
  private readonly onProgress: ((event: AnalysisProgressEvent) => void) | undefined;
88
+ private readonly providerOptions: Record<string, unknown>;
171
89
 
172
90
  public constructor(options: AnalyzerOptions) {
173
91
  this.providerClient = new AiProviderClient(options.providerConfig);
174
92
  this.chunkSizeBytes = options.chunkSizeBytes ?? deriveChunkSizeBytes(options.providerConfig.modelContextSize);
175
93
  this.localRagEnabled = options.localRag ?? false;
94
+ this.analysisConcurrency = Math.max(1, Math.floor(options.analysisConcurrency ?? 1));
95
+ this.providerOptions = this.providerClient.getProviderOptions();
176
96
  this.onProgress = options.onProgress;
177
97
  }
178
98
 
@@ -196,268 +116,214 @@ export class AiBundleAnalyzer {
196
116
 
197
117
  const chunkAnalyses: ChunkAnalysis[] = [];
198
118
  const artifactSummaries: ArtifactSummary[] = [];
119
+ const localRag = this.localRagEnabled ? new LocalArtifactRag(validatedInput.artifacts) : null;
199
120
 
200
- try {
201
- const localRag = this.localRagEnabled ? new LocalArtifactRag(validatedInput.artifacts) : null;
202
-
203
- for (let artifactIndex = 0; artifactIndex < validatedInput.artifacts.length; artifactIndex += 1) {
204
- const artifact = validatedInput.artifacts[artifactIndex]!;
205
- const chunks = chunkTextByBytes(artifact.formattedContent || artifact.content, this.chunkSizeBytes);
206
- const perArtifactChunkAnalyses: ChunkAnalysis[] = [];
207
-
208
- this.emitProgress({
209
- stage: "artifact",
210
- state: "started",
211
- message: `Starting swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
212
- artifactIndex: artifactIndex + 1,
213
- artifactCount: validatedInput.artifacts.length,
214
- artifactUrl: artifact.url,
215
- });
216
-
217
- for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex += 1) {
218
- this.emitProgress({
219
- stage: "chunk",
220
- state: "started",
221
- message: `Starting chunk ${chunkIndex + 1}/${chunks.length} for ${artifact.url}`,
222
- artifactIndex: artifactIndex + 1,
223
- artifactCount: validatedInput.artifacts.length,
224
- artifactUrl: artifact.url,
225
- chunkIndex: chunkIndex + 1,
226
- chunkCount: chunks.length,
227
- });
121
+ for (let artifactIndex = 0; artifactIndex < validatedInput.artifacts.length; artifactIndex += 1) {
122
+ const artifact = validatedInput.artifacts[artifactIndex]!;
123
+ const chunks = chunkTextByBytes(artifact.formattedContent || artifact.content, this.chunkSizeBytes);
228
124
 
229
- const analysis = await this.analyzeChunkWithSwarm({
125
+ this.emitProgress({
126
+ stage: "artifact",
127
+ state: "started",
128
+ message: `Starting swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
129
+ artifactIndex: artifactIndex + 1,
130
+ artifactCount: validatedInput.artifacts.length,
131
+ artifactUrl: artifact.url,
132
+ });
133
+
134
+ const perArtifactChunkAnalyses = await mapWithConcurrency(
135
+ chunks,
136
+ this.analysisConcurrency,
137
+ async (chunk, chunkIndex): Promise<ChunkAnalysis> => {
138
+ const chunkInput: ChunkTaskInput = {
230
139
  pageUrl: validatedInput.pageUrl,
231
140
  artifact,
232
- chunk: chunks[chunkIndex] ?? "",
141
+ chunk,
233
142
  chunkIndex,
234
143
  totalChunks: chunks.length,
235
144
  artifactIndex: artifactIndex + 1,
236
145
  artifactCount: validatedInput.artifacts.length,
237
146
  localRag,
238
- });
239
-
240
- chunkAnalyses.push(analysis);
241
- perArtifactChunkAnalyses.push(analysis);
147
+ };
242
148
 
243
- this.emitProgress({
244
- stage: "chunk",
245
- state: "completed",
246
- message: `Completed chunk ${chunkIndex + 1}/${chunks.length} for ${artifact.url}`,
247
- artifactIndex: artifactIndex + 1,
248
- artifactCount: validatedInput.artifacts.length,
249
- artifactUrl: artifact.url,
250
- chunkIndex: chunkIndex + 1,
251
- chunkCount: chunks.length,
252
- });
253
- }
254
-
255
- artifactSummaries.push({
256
- url: artifact.url,
257
- type: artifact.type,
258
- chunkCount: chunks.length,
259
- summary: perArtifactChunkAnalyses.map((analysis) => analysis.summary).join(" "),
260
- });
261
-
262
- this.emitProgress({
263
- stage: "artifact",
264
- state: "completed",
265
- message: `Completed swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
266
- artifactIndex: artifactIndex + 1,
267
- artifactCount: validatedInput.artifacts.length,
268
- artifactUrl: artifact.url,
269
- });
270
- }
271
-
272
- return await this.summarizeFindings(validatedInput.pageUrl, artifactSummaries, chunkAnalyses);
273
- } catch (error) {
274
- const normalizedError = normalizeAiError(error);
275
- const partialAnalysis = buildAnalysisSnapshot({
276
- overview:
277
- chunkAnalyses.length > 0 || artifactSummaries.length > 0
278
- ? `Partial analysis only. Processing stopped because: ${normalizedError.message}`
279
- : `Analysis aborted before any chunk completed. Cause: ${normalizedError.message}`,
280
- artifactSummaries,
281
- chunkAnalyses,
149
+ this.emitChunkEvent("started", chunkInput);
150
+ const analysis = await this.analyzeChunkWithSwarm(chunkInput);
151
+ this.emitChunkEvent("completed", chunkInput);
152
+ return analysis;
153
+ },
154
+ );
155
+
156
+ chunkAnalyses.push(...perArtifactChunkAnalyses);
157
+ artifactSummaries.push({
158
+ url: artifact.url,
159
+ type: artifact.type,
160
+ chunkCount: chunks.length,
161
+ summary: perArtifactChunkAnalyses.map((analysis) => analysis.summary).join(" "),
282
162
  });
283
163
 
284
- throw new PartialAnalysisError(normalizedError.message, partialAnalysis);
164
+ this.emitProgress({
165
+ stage: "artifact",
166
+ state: "completed",
167
+ message: `Completed swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
168
+ artifactIndex: artifactIndex + 1,
169
+ artifactCount: validatedInput.artifacts.length,
170
+ artifactUrl: artifact.url,
171
+ });
285
172
  }
173
+
174
+ return await this.summarizeFindings(validatedInput.pageUrl, artifactSummaries, chunkAnalyses);
175
+ }
176
+
177
+ private emitChunkEvent(state: Extract<AnalysisProgressState, "started" | "completed">, input: ChunkTaskInput): void {
178
+ this.emitProgress({
179
+ stage: "chunk",
180
+ state,
181
+ message: `${state === "started" ? "Starting" : "Completed"} chunk ${input.chunkIndex + 1}/${input.totalChunks} for ${input.artifact.url}`,
182
+ artifactIndex: input.artifactIndex,
183
+ artifactCount: input.artifactCount,
184
+ artifactUrl: input.artifact.url,
185
+ chunkIndex: input.chunkIndex + 1,
186
+ chunkCount: input.totalChunks,
187
+ });
286
188
  }
287
189
 
288
- private async analyzeChunkWithSwarm(input: {
289
- pageUrl: string;
290
- artifact: FormattedArtifact;
291
- chunk: string;
292
- chunkIndex: number;
293
- totalChunks: number;
294
- artifactIndex: number;
295
- artifactCount: number;
296
- localRag: LocalArtifactRag | null;
297
- }): Promise<ChunkAnalysis> {
190
+ private async analyzeChunkWithSwarm(input: ChunkTaskInput): Promise<ChunkAnalysis> {
298
191
  const memory: Partial<Record<SwarmAgentName, AgentMemo | ChunkAnalysis>> = {};
299
192
 
300
193
  for (const agent of SWARM_AGENT_ORDER) {
301
- this.emitProgress({
302
- stage: "agent",
303
- state: "started",
304
- message: `${agent} agent running on ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}`,
305
- artifactIndex: input.artifactIndex,
306
- artifactCount: input.artifactCount,
307
- artifactUrl: input.artifact.url,
308
- chunkIndex: input.chunkIndex + 1,
309
- chunkCount: input.totalChunks,
310
- agent,
311
- });
194
+ this.emitAgentEvent("started", agent, input, `${agent} agent running on ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}`);
195
+
196
+ try {
197
+ if (agent === "synthesizer") {
198
+ const synthesized = await this.runSynthesisAgent(input, memory, this.getRetrievedContext(agent, input, memory));
199
+ memory[agent] = synthesized.object;
200
+ this.emitAgentCompletion(agent, input, synthesized.telemetry);
201
+ continue;
202
+ }
312
203
 
313
- if (agent === "synthesizer") {
314
- const synthesized = await this.runSynthesisAgent(input, memory, this.getRetrievedContext(agent, input, memory));
315
- memory[agent] = synthesized.object;
316
- this.emitProgress({
317
- stage: "agent",
318
- state: "completed",
319
- message: `${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(synthesized.telemetry)}`,
320
- artifactIndex: input.artifactIndex,
321
- artifactCount: input.artifactCount,
322
- artifactUrl: input.artifact.url,
323
- chunkIndex: input.chunkIndex + 1,
324
- chunkCount: input.totalChunks,
325
- agent,
326
- estimatedOutputTokens: synthesized.telemetry.estimatedOutputTokens,
327
- ...(synthesized.telemetry.outputTokens !== undefined ? { outputTokens: synthesized.telemetry.outputTokens } : {}),
328
- ...(synthesized.telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: synthesized.telemetry.tokensPerSecond } : {}),
329
- });
330
- } else {
331
204
  const memo = await this.runMemoAgent(agent, input, memory, this.getRetrievedContext(agent, input, memory));
332
205
  memory[agent] = memo.object;
333
- this.emitProgress({
334
- stage: "agent",
335
- state: "completed",
336
- message: `${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(memo.telemetry)}`,
337
- artifactIndex: input.artifactIndex,
338
- artifactCount: input.artifactCount,
339
- artifactUrl: input.artifact.url,
340
- chunkIndex: input.chunkIndex + 1,
341
- chunkCount: input.totalChunks,
206
+ this.emitAgentCompletion(agent, input, memo.telemetry);
207
+ } catch (error) {
208
+ const normalizedError = normalizeAiError(error);
209
+ memory[agent] =
210
+ agent === "synthesizer"
211
+ ? createFallbackChunkAnalysis({ artifactUrl: input.artifact.url, memory, error: normalizedError })
212
+ : createFallbackAgentMemo(agent, normalizedError);
213
+
214
+ this.emitAgentEvent(
215
+ "completed",
342
216
  agent,
343
- estimatedOutputTokens: memo.telemetry.estimatedOutputTokens,
344
- ...(memo.telemetry.outputTokens !== undefined ? { outputTokens: memo.telemetry.outputTokens } : {}),
345
- ...(memo.telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: memo.telemetry.tokensPerSecond } : {}),
346
- });
217
+ input,
218
+ `${agent} agent fell back ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}: ${normalizedError.message}`,
219
+ );
347
220
  }
348
221
  }
349
222
 
350
223
  return chunkAnalysisSchema.parse(memory.synthesizer);
351
224
  }
352
225
 
226
+ private emitAgentCompletion(agent: SwarmAgentName, input: ChunkTaskInput, telemetry: StreamedObjectTelemetry): void {
227
+ this.emitAgentEvent(
228
+ "completed",
229
+ agent,
230
+ input,
231
+ `${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
232
+ telemetry,
233
+ );
234
+ }
235
+
236
+ private emitAgentEvent(
237
+ state: AnalysisProgressState,
238
+ agent: SwarmAgentName,
239
+ input: ChunkTaskInput,
240
+ message: string,
241
+ telemetry?: StreamedObjectTelemetry,
242
+ ): void {
243
+ this.emitProgress({
244
+ stage: "agent",
245
+ state,
246
+ message,
247
+ artifactIndex: input.artifactIndex,
248
+ artifactCount: input.artifactCount,
249
+ artifactUrl: input.artifact.url,
250
+ chunkIndex: input.chunkIndex + 1,
251
+ chunkCount: input.totalChunks,
252
+ agent,
253
+ ...(telemetry !== undefined ? { estimatedOutputTokens: telemetry.estimatedOutputTokens } : {}),
254
+ ...(telemetry?.outputTokens !== undefined ? { outputTokens: telemetry.outputTokens } : {}),
255
+ ...(telemetry?.tokensPerSecond !== undefined ? { tokensPerSecond: telemetry.tokensPerSecond } : {}),
256
+ });
257
+ }
258
+
353
259
  private async runMemoAgent(
354
260
  agent: Exclude<SwarmAgentName, "synthesizer">,
355
- input: {
356
- pageUrl: string;
357
- artifact: FormattedArtifact;
358
- chunk: string;
359
- chunkIndex: number;
360
- totalChunks: number;
361
- artifactIndex: number;
362
- artifactCount: number;
363
- },
261
+ input: ChunkTaskInput,
364
262
  memory: Partial<Record<SwarmAgentName, unknown>>,
365
263
  retrievedContext: string[],
366
264
  ): Promise<{ object: AgentMemo; telemetry: StreamedObjectTelemetry }> {
367
265
  return generateObjectFromStream({
368
266
  model: this.providerClient.getModel(),
369
267
  system: getSwarmAgentPrompt(agent),
370
- prompt: createPromptEnvelope({
371
- pageUrl: input.pageUrl,
372
- artifact: input.artifact,
373
- chunk: input.chunk,
374
- chunkIndex: input.chunkIndex,
375
- totalChunks: input.totalChunks,
376
- memory,
377
- retrievedContext,
378
- }),
268
+ prompt: createPromptEnvelope({ ...input, memory, retrievedContext }),
379
269
  schema: agentMemoSchema,
380
270
  contract: [
381
271
  "JSON contract:",
382
272
  '{"role":"string","summary":"string","observations":["string"],"evidence":["string"],"nextQuestions":["string"]}',
383
273
  ].join("\n"),
274
+ attempts: 4,
384
275
  maxRetries: 2,
385
- providerOptions: {
386
- openai: {
387
- store: false,
388
- },
389
- },
390
- onProgress: (telemetry) => {
391
- this.emitProgress({
392
- stage: "agent",
393
- state: "streaming",
394
- message: `${agent} agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
395
- artifactIndex: input.artifactIndex,
396
- artifactCount: input.artifactCount,
397
- artifactUrl: input.artifact.url,
398
- chunkIndex: input.chunkIndex + 1,
399
- chunkCount: input.totalChunks,
276
+ providerOptions: this.providerOptions,
277
+ onRetry: (attempt, error) =>
278
+ this.emitAgentEvent(
279
+ "streaming",
400
280
  agent,
401
- estimatedOutputTokens: telemetry.estimatedOutputTokens,
402
- ...(telemetry.outputTokens !== undefined ? { outputTokens: telemetry.outputTokens } : {}),
403
- ...(telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: telemetry.tokensPerSecond } : {}),
404
- });
405
- },
281
+ input,
282
+ `${agent} agent retry ${attempt}/4 ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}: ${error.message}`,
283
+ ),
284
+ onProgress: (telemetry) =>
285
+ this.emitAgentEvent(
286
+ "streaming",
287
+ agent,
288
+ input,
289
+ `${agent} agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
290
+ telemetry,
291
+ ),
406
292
  });
407
293
  }
408
294
 
409
295
  private async runSynthesisAgent(
410
- input: {
411
- pageUrl: string;
412
- artifact: FormattedArtifact;
413
- chunk: string;
414
- chunkIndex: number;
415
- totalChunks: number;
416
- artifactIndex: number;
417
- artifactCount: number;
418
- },
296
+ input: ChunkTaskInput,
419
297
  memory: Partial<Record<SwarmAgentName, unknown>>,
420
298
  retrievedContext: string[],
421
299
  ): Promise<{ object: ChunkAnalysis; telemetry: StreamedObjectTelemetry }> {
422
300
  return generateObjectFromStream({
423
301
  model: this.providerClient.getModel(),
424
302
  system: getSwarmAgentPrompt("synthesizer"),
425
- prompt: createPromptEnvelope({
426
- pageUrl: input.pageUrl,
427
- artifact: input.artifact,
428
- chunk: input.chunk,
429
- chunkIndex: input.chunkIndex,
430
- totalChunks: input.totalChunks,
431
- memory,
432
- retrievedContext,
433
- }),
303
+ prompt: createPromptEnvelope({ ...input, memory, retrievedContext }),
434
304
  schema: chunkAnalysisSchema,
435
305
  contract: [
436
306
  "JSON contract:",
437
307
  '{"entryPoints":[{"symbol":"string","description":"string","evidence":"string"}],"initializationFlow":["string"],"callGraph":[{"caller":"string","callee":"string","rationale":"string"}],"restoredNames":[{"originalName":"string","suggestedName":"string","justification":"string"}],"summary":"string","notableLibraries":["string"],"investigationTips":["string"],"risks":["string"]}',
438
308
  ].join("\n"),
309
+ attempts: 4,
439
310
  maxRetries: 2,
440
- providerOptions: {
441
- openai: {
442
- store: false,
443
- },
444
- },
445
- onProgress: (telemetry) => {
446
- this.emitProgress({
447
- stage: "agent",
448
- state: "streaming",
449
- message: `synthesizer agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
450
- artifactIndex: input.artifactIndex,
451
- artifactCount: input.artifactCount,
452
- artifactUrl: input.artifact.url,
453
- chunkIndex: input.chunkIndex + 1,
454
- chunkCount: input.totalChunks,
455
- agent: "synthesizer",
456
- estimatedOutputTokens: telemetry.estimatedOutputTokens,
457
- ...(telemetry.outputTokens !== undefined ? { outputTokens: telemetry.outputTokens } : {}),
458
- ...(telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: telemetry.tokensPerSecond } : {}),
459
- });
460
- },
311
+ providerOptions: this.providerOptions,
312
+ onRetry: (attempt, error) =>
313
+ this.emitAgentEvent(
314
+ "streaming",
315
+ "synthesizer",
316
+ input,
317
+ `synthesizer agent retry ${attempt}/4 ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}: ${error.message}`,
318
+ ),
319
+ onProgress: (telemetry) =>
320
+ this.emitAgentEvent(
321
+ "streaming",
322
+ "synthesizer",
323
+ input,
324
+ `synthesizer agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
325
+ telemetry,
326
+ ),
461
327
  });
462
328
  }
463
329
 
@@ -474,27 +340,15 @@ export class AiBundleAnalyzer {
474
340
  "You are the lead synthesis agent for the final report.",
475
341
  "Merge artifact summaries and chunk analyses into a coherent site-level reverse-engineering map with the strongest evidence available.",
476
342
  ].join(" "),
477
- prompt: [
478
- `Target page: ${pageUrl}`,
479
- "Artifact summaries:",
480
- JSON.stringify(artifactSummaries, null, 2),
481
- "Chunk analyses:",
482
- JSON.stringify(chunkAnalyses, null, 2),
483
- ].join("\n\n"),
484
- schema: finalAnalysisSchema.omit({
485
- artifactSummaries: true,
486
- analyzedChunkCount: true,
487
- }),
343
+ prompt: [`Target page: ${pageUrl}`, "Artifact summaries:", JSON.stringify(artifactSummaries, null, 2), "Chunk analyses:", JSON.stringify(chunkAnalyses, null, 2)].join("\n\n"),
344
+ schema: finalAnalysisSchema.omit({ artifactSummaries: true, analyzedChunkCount: true }),
488
345
  contract: [
489
346
  "JSON contract:",
490
347
  '{"overview":"string","entryPoints":[{"symbol":"string","description":"string","evidence":"string"}],"initializationFlow":["string"],"callGraph":[{"caller":"string","callee":"string","rationale":"string"}],"restoredNames":[{"originalName":"string","suggestedName":"string","justification":"string"}],"notableLibraries":["string"],"investigationTips":["string"],"risks":["string"]}',
491
348
  ].join("\n"),
349
+ attempts: 4,
492
350
  maxRetries: 2,
493
- providerOptions: {
494
- openai: {
495
- store: false,
496
- },
497
- },
351
+ providerOptions: this.providerOptions,
498
352
  });
499
353
 
500
354
  return finalAnalysisSchema.parse({
@@ -517,11 +371,7 @@ export class AiBundleAnalyzer {
517
371
 
518
372
  private getRetrievedContext(
519
373
  agent: SwarmAgentName,
520
- input: {
521
- artifact: FormattedArtifact;
522
- chunk: string;
523
- localRag: LocalArtifactRag | null;
524
- },
374
+ input: Pick<ChunkTaskInput, "artifact" | "chunk" | "localRag">,
525
375
  memory: Partial<Record<SwarmAgentName, unknown>>,
526
376
  ): string[] {
527
377
  if (!input.localRag) {
@@ -536,10 +386,7 @@ export class AiBundleAnalyzer {
536
386
  synthesizer: "entry points call graph restored names investigation tips risks runtime relationships architecture summary",
537
387
  };
538
388
 
539
- const memoryText = Object.values(memory)
540
- .map((entry) => JSON.stringify(entry))
541
- .join(" ");
542
-
389
+ const memoryText = Object.values(memory).map((entry) => JSON.stringify(entry)).join(" ");
543
390
  return input.localRag.query({
544
391
  artifactUrl: input.artifact.url,
545
392
  query: `${agentKeywords[agent]} ${input.chunk} ${memoryText}`.slice(0, 6000),