@redstone-md/mapr 0.0.4-alpha → 0.0.5-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env bun
2
2
 
3
- import { cancel, confirm, intro, isCancel, log, outro, spinner, text } from "@clack/prompts";
3
+ import { cancel, confirm, intro, isCancel, log, outro, select, spinner, text } from "@clack/prompts";
4
4
  import pc from "picocolors";
5
5
  import packageJson from "./package.json";
6
6
 
@@ -14,6 +14,9 @@ import { ReportWriter } from "./lib/reporter";
14
14
  import { BundleScraper } from "./lib/scraper";
15
15
  import { SWARM_AGENT_ORDER } from "./lib/swarm-prompts";
16
16
 
17
+ process.env.AI_SDK_LOG_WARNINGS = "false";
18
+ (globalThis as typeof globalThis & { AI_SDK_LOG_WARNINGS?: boolean }).AI_SDK_LOG_WARNINGS = false;
19
+
17
20
  function exitIfCancelled<T>(value: T): T {
18
21
  if (isCancel(value)) {
19
22
  cancel("Operation cancelled.");
@@ -62,6 +65,30 @@ async function resolveTargetUrl(headless: boolean, prefilledUrl?: string): Promi
62
65
  );
63
66
  }
64
67
 
68
+ async function resolveAnalysisConcurrency(headless: boolean, prefilledValue: number | undefined, totalChunks: number): Promise<number> {
69
+ if (prefilledValue !== undefined) {
70
+ return prefilledValue;
71
+ }
72
+
73
+ if (headless || totalChunks <= 1) {
74
+ return 1;
75
+ }
76
+
77
+ return Number(
78
+ exitIfCancelled(
79
+ await select({
80
+ message: "Analysis concurrency",
81
+ initialValue: 2,
82
+ options: [
83
+ { value: 1, label: "1 lane", hint: "Most stable" },
84
+ { value: 2, label: "2 lanes", hint: "Recommended" },
85
+ { value: 4, label: "4 lanes", hint: "Aggressive" },
86
+ ],
87
+ }),
88
+ ),
89
+ );
90
+ }
91
+
65
92
  async function run(): Promise<void> {
66
93
  const args = parseCliArgs(process.argv.slice(2));
67
94
 
@@ -147,15 +174,17 @@ async function run(): Promise<void> {
147
174
  sum + chunkTextByBytes(artifact.formattedContent || artifact.content, deriveChunkSizeBytes(config.modelContextSize)).length,
148
175
  0,
149
176
  );
177
+ const analysisConcurrency = await resolveAnalysisConcurrency(headless, args.analysisConcurrency, totalChunks);
150
178
  const totalAgentTasks = Math.max(1, totalChunks * SWARM_AGENT_ORDER.length);
151
179
  let completedAgentTasks = 0;
152
180
 
153
181
  const analysisStep = spinner({ indicator: "timer" });
154
- analysisStep.start(formatAnalysisProgress(0, totalAgentTasks, "Starting swarm analysis"));
182
+ analysisStep.start(formatAnalysisProgress(0, totalAgentTasks, `Starting swarm analysis (${analysisConcurrency} lane${analysisConcurrency === 1 ? "" : "s"})`));
155
183
 
156
184
  const analyzer = new AiBundleAnalyzer({
157
185
  providerConfig: config,
158
186
  localRag: args.localRag,
187
+ analysisConcurrency,
159
188
  onProgress(event) {
160
189
  if (event.stage === "agent" && event.state === "completed") {
161
190
  completedAgentTasks += 1;
@@ -224,6 +253,7 @@ async function run(): Promise<void> {
224
253
  `${pc.bold("Target:")} ${scrapeResult.pageUrl}`,
225
254
  `${pc.bold("Provider:")} ${config.providerName} (${config.model})`,
226
255
  `${pc.bold("Context size:")} ${config.modelContextSize.toLocaleString()} tokens`,
256
+ `${pc.bold("Concurrency:")} ${analysisConcurrency}`,
227
257
  `${pc.bold("Local RAG:")} ${args.localRag ? "enabled" : "disabled"}`,
228
258
  `${pc.bold("Pages:")} ${scrapeResult.htmlPages.length}`,
229
259
  `${pc.bold("Artifacts:")} ${formattedArtifacts.length}`,
@@ -1,26 +1,28 @@
1
1
  import { z } from "zod";
2
- import { Buffer } from "buffer";
3
2
 
3
+ import type { AgentMemo, ArtifactSummary, BundleAnalysis, ChunkAnalysis } from "./analysis-schema";
4
4
  import {
5
5
  agentMemoSchema,
6
- artifactSummarySchema,
7
6
  buildAnalysisSnapshot,
8
7
  chunkAnalysisSchema,
9
8
  finalAnalysisSchema,
10
- type AgentMemo,
11
- type ArtifactSummary,
12
- type BundleAnalysis,
13
- type ChunkAnalysis,
14
9
  PartialAnalysisError,
15
10
  } from "./analysis-schema";
11
+ import { createFallbackAgentMemo, createFallbackChunkAnalysis } from "./analysis-fallback";
12
+ import {
13
+ chunkTextByBytes,
14
+ createPromptEnvelope,
15
+ deriveChunkSizeBytes,
16
+ formatAgentTelemetrySuffix,
17
+ normalizeAiError,
18
+ } from "./analysis-helpers";
16
19
  import { generateObjectFromStream, type StreamedObjectTelemetry } from "./ai-json";
17
20
  import { artifactTypeSchema } from "./artifacts";
18
21
  import type { FormattedArtifact } from "./formatter";
19
22
  import { LocalArtifactRag } from "./local-rag";
23
+ import { mapWithConcurrency } from "./promise-pool";
20
24
  import { AiProviderClient, type AiProviderConfig } from "./provider";
21
- import { SWARM_AGENT_ORDER, getGlobalMissionPrompt, getSwarmAgentPrompt, type SwarmAgentName } from "./swarm-prompts";
22
-
23
- export const DEFAULT_CHUNK_SIZE_BYTES = 80 * 1024;
25
+ import { getGlobalMissionPrompt, getSwarmAgentPrompt, SWARM_AGENT_ORDER, type SwarmAgentName } from "./swarm-prompts";
24
26
 
25
27
  const analyzeInputSchema = z.object({
26
28
  pageUrl: z.string().url(),
@@ -37,6 +39,7 @@ const analyzeInputSchema = z.object({
37
39
  }),
38
40
  ),
39
41
  });
42
+
40
43
  export type AnalysisProgressStage = "artifact" | "chunk" | "agent";
41
44
  export type AnalysisProgressState = "started" | "streaming" | "completed";
42
45
 
@@ -59,120 +62,35 @@ interface AnalyzerOptions {
59
62
  providerConfig: AiProviderConfig;
60
63
  chunkSizeBytes?: number;
61
64
  localRag?: boolean;
65
+ analysisConcurrency?: number;
62
66
  onProgress?: (event: AnalysisProgressEvent) => void;
63
67
  }
64
68
 
65
- function createPromptEnvelope(input: {
69
+ interface ChunkTaskInput {
66
70
  pageUrl: string;
67
71
  artifact: FormattedArtifact;
68
72
  chunk: string;
69
73
  chunkIndex: number;
70
74
  totalChunks: number;
71
- memory?: unknown;
72
- retrievedContext?: string[];
73
- }): string {
74
- return [
75
- `Target page: ${input.pageUrl}`,
76
- `Artifact URL: ${input.artifact.url}`,
77
- `Artifact type: ${input.artifact.type}`,
78
- `Discovered from: ${input.artifact.discoveredFrom}`,
79
- `Chunk ${input.chunkIndex + 1} of ${input.totalChunks}`,
80
- input.artifact.formattingNote ? `Formatting note: ${input.artifact.formattingNote}` : "Formatting note: none",
81
- input.memory ? `Swarm memory:\n${JSON.stringify(input.memory, null, 2)}` : "Swarm memory: none yet",
82
- input.retrievedContext && input.retrievedContext.length > 0
83
- ? `Local RAG evidence:\n${input.retrievedContext.map((segment, index) => `Segment ${index + 1}:\n${segment}`).join("\n\n")}`
84
- : "Local RAG evidence: none",
85
- "Artifact content:",
86
- "```text",
87
- input.chunk,
88
- "```",
89
- ].join("\n\n");
90
- }
91
-
92
- function findSplitBoundary(source: string, start: number, end: number): number {
93
- const minimumPreferredIndex = start + Math.max(1, Math.floor((end - start) * 0.6));
94
- const preferredDelimiters = new Set(["\n", ";", "}", " ", ","]);
95
-
96
- for (let cursor = end - 1; cursor >= minimumPreferredIndex; cursor -= 1) {
97
- const character = source[cursor];
98
- if (character && preferredDelimiters.has(character)) {
99
- return cursor + 1;
100
- }
101
- }
102
-
103
- return end;
104
- }
105
-
106
- export function deriveChunkSizeBytes(modelContextSize: number): number {
107
- const validatedContextSize = z.number().int().positive().parse(modelContextSize);
108
- const derived = Math.floor(validatedContextSize * 0.9);
109
- return Math.max(DEFAULT_CHUNK_SIZE_BYTES, derived);
110
- }
111
-
112
- export function chunkTextByBytes(source: string, maxBytes = DEFAULT_CHUNK_SIZE_BYTES): string[] {
113
- const validatedSource = z.string().parse(source);
114
- const validatedMaxBytes = z.number().int().positive().parse(maxBytes);
115
-
116
- if (validatedSource.length === 0) {
117
- return [];
118
- }
119
-
120
- const chunks: string[] = [];
121
- let start = 0;
122
-
123
- while (start < validatedSource.length) {
124
- let end = Math.min(validatedSource.length, start + validatedMaxBytes);
125
-
126
- while (end > start && Buffer.byteLength(validatedSource.slice(start, end), "utf8") > validatedMaxBytes) {
127
- end -= 1;
128
- }
129
-
130
- if (end <= start) {
131
- end = start + 1;
132
- }
133
-
134
- const splitAt = end === validatedSource.length ? end : findSplitBoundary(validatedSource, start, end);
135
- chunks.push(validatedSource.slice(start, splitAt));
136
- start = splitAt;
137
- }
138
-
139
- return chunks;
140
- }
141
-
142
- function normalizeAiError(error: unknown): Error {
143
- if (!(error instanceof Error)) {
144
- return new Error("AI analysis failed with an unknown error.");
145
- }
146
-
147
- const message = error.message.toLowerCase();
148
- if (message.includes("rate limit")) {
149
- return new Error("Provider rate limit hit during analysis. Please retry in a moment.");
150
- }
151
-
152
- if (message.includes("api key")) {
153
- return new Error("The configured API key was rejected by the provider.");
154
- }
155
-
156
- return error;
75
+ artifactIndex: number;
76
+ artifactCount: number;
77
+ localRag: LocalArtifactRag | null;
157
78
  }
158
79
 
159
- function formatAgentTelemetrySuffix(telemetry: StreamedObjectTelemetry): string {
160
- const tokenCount = telemetry.outputTokens ?? telemetry.estimatedOutputTokens;
161
- const tokenLabel = telemetry.outputTokens !== undefined ? `${tokenCount} tok` : `~${tokenCount} tok`;
162
- const tpsLabel = telemetry.tokensPerSecond !== undefined ? ` ${telemetry.tokensPerSecond} tps` : "";
163
- return ` [${tokenLabel}${tpsLabel}]`;
164
- }
80
+ export { chunkTextByBytes, deriveChunkSizeBytes } from "./analysis-helpers";
165
81
 
166
82
  export class AiBundleAnalyzer {
167
83
  private readonly providerClient: AiProviderClient;
168
84
  private readonly chunkSizeBytes: number;
169
85
  private readonly localRagEnabled: boolean;
86
+ private readonly analysisConcurrency: number;
170
87
  private readonly onProgress: ((event: AnalysisProgressEvent) => void) | undefined;
171
88
 
172
89
  public constructor(options: AnalyzerOptions) {
173
90
  this.providerClient = new AiProviderClient(options.providerConfig);
174
91
  this.chunkSizeBytes = options.chunkSizeBytes ?? deriveChunkSizeBytes(options.providerConfig.modelContextSize);
175
92
  this.localRagEnabled = options.localRag ?? false;
93
+ this.analysisConcurrency = Math.max(1, Math.floor(options.analysisConcurrency ?? 1));
176
94
  this.onProgress = options.onProgress;
177
95
  }
178
96
 
@@ -196,268 +114,214 @@ export class AiBundleAnalyzer {
196
114
 
197
115
  const chunkAnalyses: ChunkAnalysis[] = [];
198
116
  const artifactSummaries: ArtifactSummary[] = [];
117
+ const localRag = this.localRagEnabled ? new LocalArtifactRag(validatedInput.artifacts) : null;
199
118
 
200
- try {
201
- const localRag = this.localRagEnabled ? new LocalArtifactRag(validatedInput.artifacts) : null;
202
-
203
- for (let artifactIndex = 0; artifactIndex < validatedInput.artifacts.length; artifactIndex += 1) {
204
- const artifact = validatedInput.artifacts[artifactIndex]!;
205
- const chunks = chunkTextByBytes(artifact.formattedContent || artifact.content, this.chunkSizeBytes);
206
- const perArtifactChunkAnalyses: ChunkAnalysis[] = [];
207
-
208
- this.emitProgress({
209
- stage: "artifact",
210
- state: "started",
211
- message: `Starting swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
212
- artifactIndex: artifactIndex + 1,
213
- artifactCount: validatedInput.artifacts.length,
214
- artifactUrl: artifact.url,
215
- });
216
-
217
- for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex += 1) {
218
- this.emitProgress({
219
- stage: "chunk",
220
- state: "started",
221
- message: `Starting chunk ${chunkIndex + 1}/${chunks.length} for ${artifact.url}`,
222
- artifactIndex: artifactIndex + 1,
223
- artifactCount: validatedInput.artifacts.length,
224
- artifactUrl: artifact.url,
225
- chunkIndex: chunkIndex + 1,
226
- chunkCount: chunks.length,
227
- });
119
+ for (let artifactIndex = 0; artifactIndex < validatedInput.artifacts.length; artifactIndex += 1) {
120
+ const artifact = validatedInput.artifacts[artifactIndex]!;
121
+ const chunks = chunkTextByBytes(artifact.formattedContent || artifact.content, this.chunkSizeBytes);
228
122
 
229
- const analysis = await this.analyzeChunkWithSwarm({
123
+ this.emitProgress({
124
+ stage: "artifact",
125
+ state: "started",
126
+ message: `Starting swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
127
+ artifactIndex: artifactIndex + 1,
128
+ artifactCount: validatedInput.artifacts.length,
129
+ artifactUrl: artifact.url,
130
+ });
131
+
132
+ const perArtifactChunkAnalyses = await mapWithConcurrency(
133
+ chunks,
134
+ this.analysisConcurrency,
135
+ async (chunk, chunkIndex): Promise<ChunkAnalysis> => {
136
+ const chunkInput: ChunkTaskInput = {
230
137
  pageUrl: validatedInput.pageUrl,
231
138
  artifact,
232
- chunk: chunks[chunkIndex] ?? "",
139
+ chunk,
233
140
  chunkIndex,
234
141
  totalChunks: chunks.length,
235
142
  artifactIndex: artifactIndex + 1,
236
143
  artifactCount: validatedInput.artifacts.length,
237
144
  localRag,
238
- });
239
-
240
- chunkAnalyses.push(analysis);
241
- perArtifactChunkAnalyses.push(analysis);
145
+ };
242
146
 
243
- this.emitProgress({
244
- stage: "chunk",
245
- state: "completed",
246
- message: `Completed chunk ${chunkIndex + 1}/${chunks.length} for ${artifact.url}`,
247
- artifactIndex: artifactIndex + 1,
248
- artifactCount: validatedInput.artifacts.length,
249
- artifactUrl: artifact.url,
250
- chunkIndex: chunkIndex + 1,
251
- chunkCount: chunks.length,
252
- });
253
- }
254
-
255
- artifactSummaries.push({
256
- url: artifact.url,
257
- type: artifact.type,
258
- chunkCount: chunks.length,
259
- summary: perArtifactChunkAnalyses.map((analysis) => analysis.summary).join(" "),
260
- });
261
-
262
- this.emitProgress({
263
- stage: "artifact",
264
- state: "completed",
265
- message: `Completed swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
266
- artifactIndex: artifactIndex + 1,
267
- artifactCount: validatedInput.artifacts.length,
268
- artifactUrl: artifact.url,
269
- });
270
- }
271
-
272
- return await this.summarizeFindings(validatedInput.pageUrl, artifactSummaries, chunkAnalyses);
273
- } catch (error) {
274
- const normalizedError = normalizeAiError(error);
275
- const partialAnalysis = buildAnalysisSnapshot({
276
- overview:
277
- chunkAnalyses.length > 0 || artifactSummaries.length > 0
278
- ? `Partial analysis only. Processing stopped because: ${normalizedError.message}`
279
- : `Analysis aborted before any chunk completed. Cause: ${normalizedError.message}`,
280
- artifactSummaries,
281
- chunkAnalyses,
147
+ this.emitChunkEvent("started", chunkInput);
148
+ const analysis = await this.analyzeChunkWithSwarm(chunkInput);
149
+ this.emitChunkEvent("completed", chunkInput);
150
+ return analysis;
151
+ },
152
+ );
153
+
154
+ chunkAnalyses.push(...perArtifactChunkAnalyses);
155
+ artifactSummaries.push({
156
+ url: artifact.url,
157
+ type: artifact.type,
158
+ chunkCount: chunks.length,
159
+ summary: perArtifactChunkAnalyses.map((analysis) => analysis.summary).join(" "),
282
160
  });
283
161
 
284
- throw new PartialAnalysisError(normalizedError.message, partialAnalysis);
162
+ this.emitProgress({
163
+ stage: "artifact",
164
+ state: "completed",
165
+ message: `Completed swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
166
+ artifactIndex: artifactIndex + 1,
167
+ artifactCount: validatedInput.artifacts.length,
168
+ artifactUrl: artifact.url,
169
+ });
285
170
  }
171
+
172
+ return await this.summarizeFindings(validatedInput.pageUrl, artifactSummaries, chunkAnalyses);
173
+ }
174
+
175
+ private emitChunkEvent(state: Extract<AnalysisProgressState, "started" | "completed">, input: ChunkTaskInput): void {
176
+ this.emitProgress({
177
+ stage: "chunk",
178
+ state,
179
+ message: `${state === "started" ? "Starting" : "Completed"} chunk ${input.chunkIndex + 1}/${input.totalChunks} for ${input.artifact.url}`,
180
+ artifactIndex: input.artifactIndex,
181
+ artifactCount: input.artifactCount,
182
+ artifactUrl: input.artifact.url,
183
+ chunkIndex: input.chunkIndex + 1,
184
+ chunkCount: input.totalChunks,
185
+ });
286
186
  }
287
187
 
288
- private async analyzeChunkWithSwarm(input: {
289
- pageUrl: string;
290
- artifact: FormattedArtifact;
291
- chunk: string;
292
- chunkIndex: number;
293
- totalChunks: number;
294
- artifactIndex: number;
295
- artifactCount: number;
296
- localRag: LocalArtifactRag | null;
297
- }): Promise<ChunkAnalysis> {
188
+ private async analyzeChunkWithSwarm(input: ChunkTaskInput): Promise<ChunkAnalysis> {
298
189
  const memory: Partial<Record<SwarmAgentName, AgentMemo | ChunkAnalysis>> = {};
299
190
 
300
191
  for (const agent of SWARM_AGENT_ORDER) {
301
- this.emitProgress({
302
- stage: "agent",
303
- state: "started",
304
- message: `${agent} agent running on ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}`,
305
- artifactIndex: input.artifactIndex,
306
- artifactCount: input.artifactCount,
307
- artifactUrl: input.artifact.url,
308
- chunkIndex: input.chunkIndex + 1,
309
- chunkCount: input.totalChunks,
310
- agent,
311
- });
192
+ this.emitAgentEvent("started", agent, input, `${agent} agent running on ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}`);
193
+
194
+ try {
195
+ if (agent === "synthesizer") {
196
+ const synthesized = await this.runSynthesisAgent(input, memory, this.getRetrievedContext(agent, input, memory));
197
+ memory[agent] = synthesized.object;
198
+ this.emitAgentCompletion(agent, input, synthesized.telemetry);
199
+ continue;
200
+ }
312
201
 
313
- if (agent === "synthesizer") {
314
- const synthesized = await this.runSynthesisAgent(input, memory, this.getRetrievedContext(agent, input, memory));
315
- memory[agent] = synthesized.object;
316
- this.emitProgress({
317
- stage: "agent",
318
- state: "completed",
319
- message: `${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(synthesized.telemetry)}`,
320
- artifactIndex: input.artifactIndex,
321
- artifactCount: input.artifactCount,
322
- artifactUrl: input.artifact.url,
323
- chunkIndex: input.chunkIndex + 1,
324
- chunkCount: input.totalChunks,
325
- agent,
326
- estimatedOutputTokens: synthesized.telemetry.estimatedOutputTokens,
327
- ...(synthesized.telemetry.outputTokens !== undefined ? { outputTokens: synthesized.telemetry.outputTokens } : {}),
328
- ...(synthesized.telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: synthesized.telemetry.tokensPerSecond } : {}),
329
- });
330
- } else {
331
202
  const memo = await this.runMemoAgent(agent, input, memory, this.getRetrievedContext(agent, input, memory));
332
203
  memory[agent] = memo.object;
333
- this.emitProgress({
334
- stage: "agent",
335
- state: "completed",
336
- message: `${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(memo.telemetry)}`,
337
- artifactIndex: input.artifactIndex,
338
- artifactCount: input.artifactCount,
339
- artifactUrl: input.artifact.url,
340
- chunkIndex: input.chunkIndex + 1,
341
- chunkCount: input.totalChunks,
204
+ this.emitAgentCompletion(agent, input, memo.telemetry);
205
+ } catch (error) {
206
+ const normalizedError = normalizeAiError(error);
207
+ memory[agent] =
208
+ agent === "synthesizer"
209
+ ? createFallbackChunkAnalysis({ artifactUrl: input.artifact.url, memory, error: normalizedError })
210
+ : createFallbackAgentMemo(agent, normalizedError);
211
+
212
+ this.emitAgentEvent(
213
+ "completed",
342
214
  agent,
343
- estimatedOutputTokens: memo.telemetry.estimatedOutputTokens,
344
- ...(memo.telemetry.outputTokens !== undefined ? { outputTokens: memo.telemetry.outputTokens } : {}),
345
- ...(memo.telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: memo.telemetry.tokensPerSecond } : {}),
346
- });
215
+ input,
216
+ `${agent} agent fell back ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}: ${normalizedError.message}`,
217
+ );
347
218
  }
348
219
  }
349
220
 
350
221
  return chunkAnalysisSchema.parse(memory.synthesizer);
351
222
  }
352
223
 
224
+ private emitAgentCompletion(agent: SwarmAgentName, input: ChunkTaskInput, telemetry: StreamedObjectTelemetry): void {
225
+ this.emitAgentEvent(
226
+ "completed",
227
+ agent,
228
+ input,
229
+ `${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
230
+ telemetry,
231
+ );
232
+ }
233
+
234
+ private emitAgentEvent(
235
+ state: AnalysisProgressState,
236
+ agent: SwarmAgentName,
237
+ input: ChunkTaskInput,
238
+ message: string,
239
+ telemetry?: StreamedObjectTelemetry,
240
+ ): void {
241
+ this.emitProgress({
242
+ stage: "agent",
243
+ state,
244
+ message,
245
+ artifactIndex: input.artifactIndex,
246
+ artifactCount: input.artifactCount,
247
+ artifactUrl: input.artifact.url,
248
+ chunkIndex: input.chunkIndex + 1,
249
+ chunkCount: input.totalChunks,
250
+ agent,
251
+ ...(telemetry !== undefined ? { estimatedOutputTokens: telemetry.estimatedOutputTokens } : {}),
252
+ ...(telemetry?.outputTokens !== undefined ? { outputTokens: telemetry.outputTokens } : {}),
253
+ ...(telemetry?.tokensPerSecond !== undefined ? { tokensPerSecond: telemetry.tokensPerSecond } : {}),
254
+ });
255
+ }
256
+
353
257
  private async runMemoAgent(
354
258
  agent: Exclude<SwarmAgentName, "synthesizer">,
355
- input: {
356
- pageUrl: string;
357
- artifact: FormattedArtifact;
358
- chunk: string;
359
- chunkIndex: number;
360
- totalChunks: number;
361
- artifactIndex: number;
362
- artifactCount: number;
363
- },
259
+ input: ChunkTaskInput,
364
260
  memory: Partial<Record<SwarmAgentName, unknown>>,
365
261
  retrievedContext: string[],
366
262
  ): Promise<{ object: AgentMemo; telemetry: StreamedObjectTelemetry }> {
367
263
  return generateObjectFromStream({
368
264
  model: this.providerClient.getModel(),
369
265
  system: getSwarmAgentPrompt(agent),
370
- prompt: createPromptEnvelope({
371
- pageUrl: input.pageUrl,
372
- artifact: input.artifact,
373
- chunk: input.chunk,
374
- chunkIndex: input.chunkIndex,
375
- totalChunks: input.totalChunks,
376
- memory,
377
- retrievedContext,
378
- }),
266
+ prompt: createPromptEnvelope({ ...input, memory, retrievedContext }),
379
267
  schema: agentMemoSchema,
380
268
  contract: [
381
269
  "JSON contract:",
382
270
  '{"role":"string","summary":"string","observations":["string"],"evidence":["string"],"nextQuestions":["string"]}',
383
271
  ].join("\n"),
272
+ attempts: 4,
384
273
  maxRetries: 2,
385
- providerOptions: {
386
- openai: {
387
- store: false,
388
- },
389
- },
390
- onProgress: (telemetry) => {
391
- this.emitProgress({
392
- stage: "agent",
393
- state: "streaming",
394
- message: `${agent} agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
395
- artifactIndex: input.artifactIndex,
396
- artifactCount: input.artifactCount,
397
- artifactUrl: input.artifact.url,
398
- chunkIndex: input.chunkIndex + 1,
399
- chunkCount: input.totalChunks,
274
+ providerOptions: { openai: { store: false } },
275
+ onRetry: (attempt, error) =>
276
+ this.emitAgentEvent(
277
+ "streaming",
400
278
  agent,
401
- estimatedOutputTokens: telemetry.estimatedOutputTokens,
402
- ...(telemetry.outputTokens !== undefined ? { outputTokens: telemetry.outputTokens } : {}),
403
- ...(telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: telemetry.tokensPerSecond } : {}),
404
- });
405
- },
279
+ input,
280
+ `${agent} agent retry ${attempt}/4 ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}: ${error.message}`,
281
+ ),
282
+ onProgress: (telemetry) =>
283
+ this.emitAgentEvent(
284
+ "streaming",
285
+ agent,
286
+ input,
287
+ `${agent} agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
288
+ telemetry,
289
+ ),
406
290
  });
407
291
  }
408
292
 
409
293
  private async runSynthesisAgent(
410
- input: {
411
- pageUrl: string;
412
- artifact: FormattedArtifact;
413
- chunk: string;
414
- chunkIndex: number;
415
- totalChunks: number;
416
- artifactIndex: number;
417
- artifactCount: number;
418
- },
294
+ input: ChunkTaskInput,
419
295
  memory: Partial<Record<SwarmAgentName, unknown>>,
420
296
  retrievedContext: string[],
421
297
  ): Promise<{ object: ChunkAnalysis; telemetry: StreamedObjectTelemetry }> {
422
298
  return generateObjectFromStream({
423
299
  model: this.providerClient.getModel(),
424
300
  system: getSwarmAgentPrompt("synthesizer"),
425
- prompt: createPromptEnvelope({
426
- pageUrl: input.pageUrl,
427
- artifact: input.artifact,
428
- chunk: input.chunk,
429
- chunkIndex: input.chunkIndex,
430
- totalChunks: input.totalChunks,
431
- memory,
432
- retrievedContext,
433
- }),
301
+ prompt: createPromptEnvelope({ ...input, memory, retrievedContext }),
434
302
  schema: chunkAnalysisSchema,
435
303
  contract: [
436
304
  "JSON contract:",
437
305
  '{"entryPoints":[{"symbol":"string","description":"string","evidence":"string"}],"initializationFlow":["string"],"callGraph":[{"caller":"string","callee":"string","rationale":"string"}],"restoredNames":[{"originalName":"string","suggestedName":"string","justification":"string"}],"summary":"string","notableLibraries":["string"],"investigationTips":["string"],"risks":["string"]}',
438
306
  ].join("\n"),
307
+ attempts: 4,
439
308
  maxRetries: 2,
440
- providerOptions: {
441
- openai: {
442
- store: false,
443
- },
444
- },
445
- onProgress: (telemetry) => {
446
- this.emitProgress({
447
- stage: "agent",
448
- state: "streaming",
449
- message: `synthesizer agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
450
- artifactIndex: input.artifactIndex,
451
- artifactCount: input.artifactCount,
452
- artifactUrl: input.artifact.url,
453
- chunkIndex: input.chunkIndex + 1,
454
- chunkCount: input.totalChunks,
455
- agent: "synthesizer",
456
- estimatedOutputTokens: telemetry.estimatedOutputTokens,
457
- ...(telemetry.outputTokens !== undefined ? { outputTokens: telemetry.outputTokens } : {}),
458
- ...(telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: telemetry.tokensPerSecond } : {}),
459
- });
460
- },
309
+ providerOptions: { openai: { store: false } },
310
+ onRetry: (attempt, error) =>
311
+ this.emitAgentEvent(
312
+ "streaming",
313
+ "synthesizer",
314
+ input,
315
+ `synthesizer agent retry ${attempt}/4 ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}: ${error.message}`,
316
+ ),
317
+ onProgress: (telemetry) =>
318
+ this.emitAgentEvent(
319
+ "streaming",
320
+ "synthesizer",
321
+ input,
322
+ `synthesizer agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
323
+ telemetry,
324
+ ),
461
325
  });
462
326
  }
463
327
 
@@ -474,27 +338,15 @@ export class AiBundleAnalyzer {
474
338
  "You are the lead synthesis agent for the final report.",
475
339
  "Merge artifact summaries and chunk analyses into a coherent site-level reverse-engineering map with the strongest evidence available.",
476
340
  ].join(" "),
477
- prompt: [
478
- `Target page: ${pageUrl}`,
479
- "Artifact summaries:",
480
- JSON.stringify(artifactSummaries, null, 2),
481
- "Chunk analyses:",
482
- JSON.stringify(chunkAnalyses, null, 2),
483
- ].join("\n\n"),
484
- schema: finalAnalysisSchema.omit({
485
- artifactSummaries: true,
486
- analyzedChunkCount: true,
487
- }),
341
+ prompt: [`Target page: ${pageUrl}`, "Artifact summaries:", JSON.stringify(artifactSummaries, null, 2), "Chunk analyses:", JSON.stringify(chunkAnalyses, null, 2)].join("\n\n"),
342
+ schema: finalAnalysisSchema.omit({ artifactSummaries: true, analyzedChunkCount: true }),
488
343
  contract: [
489
344
  "JSON contract:",
490
345
  '{"overview":"string","entryPoints":[{"symbol":"string","description":"string","evidence":"string"}],"initializationFlow":["string"],"callGraph":[{"caller":"string","callee":"string","rationale":"string"}],"restoredNames":[{"originalName":"string","suggestedName":"string","justification":"string"}],"notableLibraries":["string"],"investigationTips":["string"],"risks":["string"]}',
491
346
  ].join("\n"),
347
+ attempts: 4,
492
348
  maxRetries: 2,
493
- providerOptions: {
494
- openai: {
495
- store: false,
496
- },
497
- },
349
+ providerOptions: { openai: { store: false } },
498
350
  });
499
351
 
500
352
  return finalAnalysisSchema.parse({
@@ -517,11 +369,7 @@ export class AiBundleAnalyzer {
517
369
 
518
370
  private getRetrievedContext(
519
371
  agent: SwarmAgentName,
520
- input: {
521
- artifact: FormattedArtifact;
522
- chunk: string;
523
- localRag: LocalArtifactRag | null;
524
- },
372
+ input: Pick<ChunkTaskInput, "artifact" | "chunk" | "localRag">,
525
373
  memory: Partial<Record<SwarmAgentName, unknown>>,
526
374
  ): string[] {
527
375
  if (!input.localRag) {
@@ -536,10 +384,7 @@ export class AiBundleAnalyzer {
536
384
  synthesizer: "entry points call graph restored names investigation tips risks runtime relationships architecture summary",
537
385
  };
538
386
 
539
- const memoryText = Object.values(memory)
540
- .map((entry) => JSON.stringify(entry))
541
- .join(" ");
542
-
387
+ const memoryText = Object.values(memory).map((entry) => JSON.stringify(entry)).join(" ");
543
388
  return input.localRag.query({
544
389
  artifactUrl: input.artifact.url,
545
390
  query: `${agentKeywords[agent]} ${input.chunk} ${memoryText}`.slice(0, 6000),
package/lib/ai-json.ts CHANGED
@@ -120,17 +120,55 @@ export async function generateObjectFromStream<TOutput>(input: {
120
120
  prompt: string;
121
121
  schema: z.ZodType<TOutput>;
122
122
  contract: string;
123
+ attempts?: number;
123
124
  maxRetries?: number;
124
125
  providerOptions?: Record<string, unknown>;
125
126
  onProgress?: (telemetry: StreamedObjectTelemetry) => void;
127
+ onRetry?: (attempt: number, error: Error) => void;
126
128
  }): Promise<StreamedObjectResult<TOutput>> {
129
+ const attempts = Math.max(1, Math.floor(input.attempts ?? 3));
130
+ let lastError: Error | undefined;
131
+
132
+ for (let attempt = 1; attempt <= attempts; attempt += 1) {
133
+ try {
134
+ return await streamSingleObjectAttempt(input, attempt);
135
+ } catch (error) {
136
+ lastError = error instanceof Error ? error : new Error("Streaming object generation failed.");
137
+ if (attempt >= attempts) {
138
+ throw lastError;
139
+ }
140
+
141
+ input.onRetry?.(attempt + 1, lastError);
142
+ }
143
+ }
144
+
145
+ throw lastError ?? new Error("Streaming object generation failed.");
146
+ }
147
+
148
+ async function streamSingleObjectAttempt<TOutput>(
149
+ input: {
150
+ model: unknown;
151
+ system: string;
152
+ prompt: string;
153
+ schema: z.ZodType<TOutput>;
154
+ contract: string;
155
+ maxRetries?: number;
156
+ providerOptions?: Record<string, unknown>;
157
+ onProgress?: (telemetry: StreamedObjectTelemetry) => void;
158
+ },
159
+ attempt: number,
160
+ ): Promise<StreamedObjectResult<TOutput>> {
127
161
  let streamedText = "";
128
162
  const startedAt = Date.now();
129
163
  let lastProgressAt = 0;
164
+ const repairHint =
165
+ attempt > 1
166
+ ? "\nPrevious attempt failed because the JSON was malformed or incomplete. Return a syntactically valid JSON object this time."
167
+ : "";
130
168
 
131
169
  const result = streamText({
132
170
  model: input.model as never,
133
- system: formatJsonSystemPrompt(input.system, input.contract),
171
+ system: `${formatJsonSystemPrompt(input.system, input.contract)}${repairHint}`,
134
172
  prompt: input.prompt,
135
173
  maxRetries: input.maxRetries ?? 2,
136
174
  ...(input.providerOptions !== undefined ? { providerOptions: input.providerOptions as never } : {}),
@@ -158,6 +196,7 @@ export async function generateObjectFromStream<TOutput>(input: {
158
196
  } catch {
159
197
  usage = undefined;
160
198
  }
199
+
161
200
  const elapsedMs = Date.now() - startedAt;
162
201
  const estimatedOutputTokens = estimateTokenCountFromText(streamedText);
163
202
  const outputTokens = usage?.outputTokens ?? undefined;
@@ -0,0 +1,39 @@
1
+ import type { AgentMemo, ChunkAnalysis } from "./analysis-schema";
2
+ import type { SwarmAgentName } from "./swarm-prompts";
3
+
4
+ export function createFallbackAgentMemo(agent: Exclude<SwarmAgentName, "synthesizer">, error: Error): AgentMemo {
5
+ return {
6
+ role: agent,
7
+ summary: `${agent} agent failed after retries: ${error.message}`,
8
+ observations: [],
9
+ evidence: [],
10
+ nextQuestions: [`Retry ${agent} analysis for this chunk manually if the finding is important.`],
11
+ };
12
+ }
13
+
14
+ export function createFallbackChunkAnalysis(input: {
15
+ artifactUrl: string;
16
+ memory: Partial<Record<SwarmAgentName, unknown>>;
17
+ error: Error;
18
+ }): ChunkAnalysis {
19
+ const memoSummaries = Object.values(input.memory)
20
+ .filter((entry): entry is AgentMemo => typeof entry === "object" && entry !== null && "summary" in entry)
21
+ .map((entry) => entry.summary.trim())
22
+ .filter(Boolean);
23
+
24
+ return {
25
+ entryPoints: [],
26
+ initializationFlow: [],
27
+ callGraph: [],
28
+ restoredNames: [],
29
+ summary:
30
+ memoSummaries.join(" ").trim() ||
31
+ `Chunk analysis for ${input.artifactUrl} fell back after retries: ${input.error.message}`,
32
+ notableLibraries: [],
33
+ investigationTips: [
34
+ `Chunk synthesis fell back after retries: ${input.error.message}`,
35
+ "Re-run with lower concurrency or inspect this chunk manually if it is critical.",
36
+ ],
37
+ risks: [],
38
+ };
39
+ }
@@ -0,0 +1,108 @@
1
+ import { Buffer } from "buffer";
2
+ import { z } from "zod";
3
+
4
+ import type { StreamedObjectTelemetry } from "./ai-json";
5
+ import type { FormattedArtifact } from "./formatter";
6
+
7
+ export const DEFAULT_CHUNK_SIZE_BYTES = 80 * 1024;
8
+
9
+ export function createPromptEnvelope(input: {
10
+ pageUrl: string;
11
+ artifact: FormattedArtifact;
12
+ chunk: string;
13
+ chunkIndex: number;
14
+ totalChunks: number;
15
+ memory?: unknown;
16
+ retrievedContext?: string[];
17
+ }): string {
18
+ return [
19
+ `Target page: ${input.pageUrl}`,
20
+ `Artifact URL: ${input.artifact.url}`,
21
+ `Artifact type: ${input.artifact.type}`,
22
+ `Discovered from: ${input.artifact.discoveredFrom}`,
23
+ `Chunk ${input.chunkIndex + 1} of ${input.totalChunks}`,
24
+ input.artifact.formattingNote ? `Formatting note: ${input.artifact.formattingNote}` : "Formatting note: none",
25
+ input.memory ? `Swarm memory:\n${JSON.stringify(input.memory, null, 2)}` : "Swarm memory: none yet",
26
+ input.retrievedContext && input.retrievedContext.length > 0
27
+ ? `Local RAG evidence:\n${input.retrievedContext.map((segment, index) => `Segment ${index + 1}:\n${segment}`).join("\n\n")}`
28
+ : "Local RAG evidence: none",
29
+ "Artifact content:",
30
+ "```text",
31
+ input.chunk,
32
+ "```",
33
+ ].join("\n\n");
34
+ }
35
+
36
+ function findSplitBoundary(source: string, start: number, end: number): number {
37
+ const minimumPreferredIndex = start + Math.max(1, Math.floor((end - start) * 0.6));
38
+ const preferredDelimiters = new Set(["\n", ";", "}", " ", ","]);
39
+
40
+ for (let cursor = end - 1; cursor >= minimumPreferredIndex; cursor -= 1) {
41
+ const character = source[cursor];
42
+ if (character && preferredDelimiters.has(character)) {
43
+ return cursor + 1;
44
+ }
45
+ }
46
+
47
+ return end;
48
+ }
49
+
50
+ export function deriveChunkSizeBytes(modelContextSize: number): number {
51
+ const validatedContextSize = z.number().int().positive().parse(modelContextSize);
52
+ const derived = Math.floor(validatedContextSize * 0.9);
53
+ return Math.max(DEFAULT_CHUNK_SIZE_BYTES, derived);
54
+ }
55
+
56
+ export function chunkTextByBytes(source: string, maxBytes = DEFAULT_CHUNK_SIZE_BYTES): string[] {
57
+ const validatedSource = z.string().parse(source);
58
+ const validatedMaxBytes = z.number().int().positive().parse(maxBytes);
59
+
60
+ if (validatedSource.length === 0) {
61
+ return [];
62
+ }
63
+
64
+ const chunks: string[] = [];
65
+ let start = 0;
66
+
67
+ while (start < validatedSource.length) {
68
+ let end = Math.min(validatedSource.length, start + validatedMaxBytes);
69
+
70
+ while (end > start && Buffer.byteLength(validatedSource.slice(start, end), "utf8") > validatedMaxBytes) {
71
+ end -= 1;
72
+ }
73
+
74
+ if (end <= start) {
75
+ end = start + 1;
76
+ }
77
+
78
+ const splitAt = end === validatedSource.length ? end : findSplitBoundary(validatedSource, start, end);
79
+ chunks.push(validatedSource.slice(start, splitAt));
80
+ start = splitAt;
81
+ }
82
+
83
+ return chunks;
84
+ }
85
+
86
+ export function normalizeAiError(error: unknown): Error {
87
+ if (!(error instanceof Error)) {
88
+ return new Error("AI analysis failed with an unknown error.");
89
+ }
90
+
91
+ const message = error.message.toLowerCase();
92
+ if (message.includes("rate limit")) {
93
+ return new Error("Provider rate limit hit during analysis. Please retry in a moment.");
94
+ }
95
+
96
+ if (message.includes("api key")) {
97
+ return new Error("The configured API key was rejected by the provider.");
98
+ }
99
+
100
+ return error;
101
+ }
102
+
103
+ export function formatAgentTelemetrySuffix(telemetry: StreamedObjectTelemetry): string {
104
+ const tokenCount = telemetry.outputTokens ?? telemetry.estimatedOutputTokens;
105
+ const tokenLabel = telemetry.outputTokens !== undefined ? `${tokenCount} tok` : `~${tokenCount} tok`;
106
+ const tpsLabel = telemetry.tokensPerSecond !== undefined ? ` ${telemetry.tokensPerSecond} tps` : "";
107
+ return ` [${tokenLabel}${tpsLabel}]`;
108
+ }
package/lib/cli-args.ts CHANGED
@@ -19,6 +19,7 @@ const rawCliArgsSchema = z.object({
19
19
  baseURL: z.string().url().optional(),
20
20
  model: z.string().min(1).optional(),
21
21
  contextSize: z.number().int().positive().optional(),
22
+ analysisConcurrency: z.number().int().positive().optional(),
22
23
  maxPages: z.number().int().positive().optional(),
23
24
  maxArtifacts: z.number().int().positive().optional(),
24
25
  maxDepth: z.number().int().nonnegative().optional(),
@@ -58,13 +59,14 @@ const optionMap = new Map<string, keyof CliArgs>([
58
59
  ["--base-url", "baseURL"],
59
60
  ["--model", "model"],
60
61
  ["--context-size", "contextSize"],
62
+ ["--analysis-concurrency", "analysisConcurrency"],
61
63
  ["--max-pages", "maxPages"],
62
64
  ["--max-artifacts", "maxArtifacts"],
63
65
  ["--max-depth", "maxDepth"],
64
66
  ]);
65
67
 
66
68
  const booleanKeys = new Set<keyof CliArgs>(["help", "version", "headless", "reconfigure", "listModels", "localRag", "verboseAgents"]);
67
- const numberKeys = new Set<keyof CliArgs>(["contextSize", "maxPages", "maxArtifacts", "maxDepth"]);
69
+ const numberKeys = new Set<keyof CliArgs>(["contextSize", "analysisConcurrency", "maxPages", "maxArtifacts", "maxDepth"]);
68
70
 
69
71
  function normalizeValue(key: keyof CliArgs, value: string): unknown {
70
72
  if (numberKeys.has(key)) {
@@ -154,6 +156,7 @@ export function renderHelpText(): string {
154
156
  " --base-url <url> Base URL for the provider",
155
157
  " --model <id> Model identifier",
156
158
  " --context-size <tokens> Model context window, for example 128000 or 512000",
159
+ " --analysis-concurrency <n> Parallel chunk analyses per artifact",
157
160
  " --list-models Fetch and print models using the resolved provider config",
158
161
  " --local-rag Enable local lexical RAG for oversized artifacts",
159
162
  " --reconfigure Force interactive provider reconfiguration",
@@ -0,0 +1,25 @@
1
+ export async function mapWithConcurrency<TInput, TOutput>(
2
+ items: readonly TInput[],
3
+ concurrency: number,
4
+ mapper: (item: TInput, index: number) => Promise<TOutput>,
5
+ ): Promise<TOutput[]> {
6
+ const normalizedConcurrency = Math.max(1, Math.floor(concurrency));
7
+ const results = new Array<TOutput>(items.length);
8
+ let cursor = 0;
9
+
10
+ const workers = Array.from({ length: Math.min(normalizedConcurrency, items.length) }, async () => {
11
+ while (true) {
12
+ const currentIndex = cursor;
13
+ cursor += 1;
14
+
15
+ if (currentIndex >= items.length) {
16
+ return;
17
+ }
18
+
19
+ results[currentIndex] = await mapper(items[currentIndex]!, currentIndex);
20
+ }
21
+ });
22
+
23
+ await Promise.all(workers);
24
+ return results;
25
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@redstone-md/mapr",
3
- "version": "0.0.4-alpha",
3
+ "version": "0.0.5-alpha",
4
4
  "type": "module",
5
5
  "description": "Bun-native CLI/TUI for reverse-engineering frontend websites, bundles, WASM, and service workers",
6
6
  "license": "SEE LICENSE IN LICENSE",