@redstone-md/mapr 0.0.3-alpha → 0.0.5-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -16,6 +16,7 @@ This repository is public for source visibility and collaboration. The license r
16
16
  - Same-origin crawler with bounded page count and crawl depth
17
17
  - JS bundle, worker, service worker, WASM, and source-map discovery
18
18
  - Iframe-aware crawling for same-origin embedded pages
19
+ - Streaming AI generation with live throughput updates in the TUI
19
20
  - Local RAG mode for multi-megabyte bundles
20
21
  - Partial-report persistence when analysis fails mid-run
21
22
  - Headless automation mode for CI or batch workflows
@@ -41,7 +42,7 @@ Mapr does not analyze images, fonts, audio, video, PDFs, archives, or other pres
41
42
  - Built-in OpenAI-compatible presets for BlackBox AI, Nvidia NIM, and OnlySQ
42
43
  - Automatic model context-size detection from provider model metadata when available
43
44
  - Headless CLI mode for automation
44
- - Live crawler and swarm progress with agent-level tracking and progress bars
45
+ - Live crawler and swarm progress with agent-level tracking, progress bars, and streaming TPS estimates
45
46
 
46
47
  ## Install
47
48
 
@@ -66,7 +67,7 @@ npx @redstone-md/mapr --help
66
67
  4. Crawl the target website, same-origin iframe pages, and discovered code artifacts with bounded page count and crawl depth
67
68
  5. Format analyzable content where possible
68
69
  6. Optionally build a local lexical RAG index for oversized artifacts
69
- 7. Run a communicating swarm of analysis agents over chunked artifact content with structured-output fallback for providers that only support plain text
70
+ 7. Run a communicating swarm of analysis agents over chunked artifact content through streaming JSON generation so long-running requests keep producing output
70
71
  8. Generate a Markdown report in the current working directory
71
72
 
72
73
  ## Provider Presets
@@ -123,7 +124,7 @@ Mapr uses a communicating agent swarm per chunk:
123
124
  - `security`: identifies risks, persistence, caching, and operator tips
124
125
  - `synthesizer`: merges the upstream notes into the final chunk analysis
125
126
 
126
- Progress is shown directly in the TUI for crawler fetches, depth skips, discovered nested artifacts, and swarm agent/chunk execution.
127
+ Progress is shown directly in the TUI for crawler fetches, depth skips, discovered nested artifacts, swarm agent/chunk execution, and live token-per-second estimates during provider streaming.
127
128
 
128
129
  ## Large Bundle Handling
129
130
 
package/index.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env bun
2
2
 
3
- import { cancel, confirm, intro, isCancel, log, outro, spinner, text } from "@clack/prompts";
3
+ import { cancel, confirm, intro, isCancel, log, outro, select, spinner, text } from "@clack/prompts";
4
4
  import pc from "picocolors";
5
5
  import packageJson from "./package.json";
6
6
 
@@ -14,6 +14,9 @@ import { ReportWriter } from "./lib/reporter";
14
14
  import { BundleScraper } from "./lib/scraper";
15
15
  import { SWARM_AGENT_ORDER } from "./lib/swarm-prompts";
16
16
 
17
+ process.env.AI_SDK_LOG_WARNINGS = "false";
18
+ (globalThis as typeof globalThis & { AI_SDK_LOG_WARNINGS?: boolean }).AI_SDK_LOG_WARNINGS = false;
19
+
17
20
  function exitIfCancelled<T>(value: T): T {
18
21
  if (isCancel(value)) {
19
22
  cancel("Operation cancelled.");
@@ -62,6 +65,30 @@ async function resolveTargetUrl(headless: boolean, prefilledUrl?: string): Promi
62
65
  );
63
66
  }
64
67
 
68
+ async function resolveAnalysisConcurrency(headless: boolean, prefilledValue: number | undefined, totalChunks: number): Promise<number> {
69
+ if (prefilledValue !== undefined) {
70
+ return prefilledValue;
71
+ }
72
+
73
+ if (headless || totalChunks <= 1) {
74
+ return 1;
75
+ }
76
+
77
+ return Number(
78
+ exitIfCancelled(
79
+ await select({
80
+ message: "Analysis concurrency",
81
+ initialValue: 2,
82
+ options: [
83
+ { value: 1, label: "1 lane", hint: "Most stable" },
84
+ { value: 2, label: "2 lanes", hint: "Recommended" },
85
+ { value: 4, label: "4 lanes", hint: "Aggressive" },
86
+ ],
87
+ }),
88
+ ),
89
+ );
90
+ }
91
+
65
92
  async function run(): Promise<void> {
66
93
  const args = parseCliArgs(process.argv.slice(2));
67
94
 
@@ -147,15 +174,17 @@ async function run(): Promise<void> {
147
174
  sum + chunkTextByBytes(artifact.formattedContent || artifact.content, deriveChunkSizeBytes(config.modelContextSize)).length,
148
175
  0,
149
176
  );
177
+ const analysisConcurrency = await resolveAnalysisConcurrency(headless, args.analysisConcurrency, totalChunks);
150
178
  const totalAgentTasks = Math.max(1, totalChunks * SWARM_AGENT_ORDER.length);
151
179
  let completedAgentTasks = 0;
152
180
 
153
181
  const analysisStep = spinner({ indicator: "timer" });
154
- analysisStep.start(formatAnalysisProgress(0, totalAgentTasks, "Starting swarm analysis"));
182
+ analysisStep.start(formatAnalysisProgress(0, totalAgentTasks, `Starting swarm analysis (${analysisConcurrency} lane${analysisConcurrency === 1 ? "" : "s"})`));
155
183
 
156
184
  const analyzer = new AiBundleAnalyzer({
157
185
  providerConfig: config,
158
186
  localRag: args.localRag,
187
+ analysisConcurrency,
159
188
  onProgress(event) {
160
189
  if (event.stage === "agent" && event.state === "completed") {
161
190
  completedAgentTasks += 1;
@@ -224,6 +253,7 @@ async function run(): Promise<void> {
224
253
  `${pc.bold("Target:")} ${scrapeResult.pageUrl}`,
225
254
  `${pc.bold("Provider:")} ${config.providerName} (${config.model})`,
226
255
  `${pc.bold("Context size:")} ${config.modelContextSize.toLocaleString()} tokens`,
256
+ `${pc.bold("Concurrency:")} ${analysisConcurrency}`,
227
257
  `${pc.bold("Local RAG:")} ${args.localRag ? "enabled" : "disabled"}`,
228
258
  `${pc.bold("Pages:")} ${scrapeResult.htmlPages.length}`,
229
259
  `${pc.bold("Artifacts:")} ${formattedArtifacts.length}`,
@@ -1,26 +1,28 @@
1
1
  import { z } from "zod";
2
- import { Buffer } from "buffer";
3
2
 
3
+ import type { AgentMemo, ArtifactSummary, BundleAnalysis, ChunkAnalysis } from "./analysis-schema";
4
4
  import {
5
5
  agentMemoSchema,
6
- artifactSummarySchema,
7
6
  buildAnalysisSnapshot,
8
7
  chunkAnalysisSchema,
9
8
  finalAnalysisSchema,
10
- type AgentMemo,
11
- type ArtifactSummary,
12
- type BundleAnalysis,
13
- type ChunkAnalysis,
14
9
  PartialAnalysisError,
15
10
  } from "./analysis-schema";
16
- import { generateObjectWithTextFallback } from "./ai-json";
11
+ import { createFallbackAgentMemo, createFallbackChunkAnalysis } from "./analysis-fallback";
12
+ import {
13
+ chunkTextByBytes,
14
+ createPromptEnvelope,
15
+ deriveChunkSizeBytes,
16
+ formatAgentTelemetrySuffix,
17
+ normalizeAiError,
18
+ } from "./analysis-helpers";
19
+ import { generateObjectFromStream, type StreamedObjectTelemetry } from "./ai-json";
17
20
  import { artifactTypeSchema } from "./artifacts";
18
21
  import type { FormattedArtifact } from "./formatter";
19
22
  import { LocalArtifactRag } from "./local-rag";
23
+ import { mapWithConcurrency } from "./promise-pool";
20
24
  import { AiProviderClient, type AiProviderConfig } from "./provider";
21
- import { SWARM_AGENT_ORDER, getGlobalMissionPrompt, getSwarmAgentPrompt, type SwarmAgentName } from "./swarm-prompts";
22
-
23
- export const DEFAULT_CHUNK_SIZE_BYTES = 80 * 1024;
25
+ import { getGlobalMissionPrompt, getSwarmAgentPrompt, SWARM_AGENT_ORDER, type SwarmAgentName } from "./swarm-prompts";
24
26
 
25
27
  const analyzeInputSchema = z.object({
26
28
  pageUrl: z.string().url(),
@@ -37,8 +39,9 @@ const analyzeInputSchema = z.object({
37
39
  }),
38
40
  ),
39
41
  });
42
+
40
43
  export type AnalysisProgressStage = "artifact" | "chunk" | "agent";
41
- export type AnalysisProgressState = "started" | "completed";
44
+ export type AnalysisProgressState = "started" | "streaming" | "completed";
42
45
 
43
46
  export interface AnalysisProgressEvent {
44
47
  stage: AnalysisProgressStage;
@@ -50,119 +53,44 @@ export interface AnalysisProgressEvent {
50
53
  chunkIndex?: number;
51
54
  chunkCount?: number;
52
55
  agent?: SwarmAgentName;
56
+ estimatedOutputTokens?: number;
57
+ outputTokens?: number;
58
+ tokensPerSecond?: number;
53
59
  }
54
60
 
55
61
  interface AnalyzerOptions {
56
62
  providerConfig: AiProviderConfig;
57
63
  chunkSizeBytes?: number;
58
64
  localRag?: boolean;
65
+ analysisConcurrency?: number;
59
66
  onProgress?: (event: AnalysisProgressEvent) => void;
60
67
  }
61
68
 
62
- function createPromptEnvelope(input: {
69
+ interface ChunkTaskInput {
63
70
  pageUrl: string;
64
71
  artifact: FormattedArtifact;
65
72
  chunk: string;
66
73
  chunkIndex: number;
67
74
  totalChunks: number;
68
- memory?: unknown;
69
- retrievedContext?: string[];
70
- }): string {
71
- return [
72
- `Target page: ${input.pageUrl}`,
73
- `Artifact URL: ${input.artifact.url}`,
74
- `Artifact type: ${input.artifact.type}`,
75
- `Discovered from: ${input.artifact.discoveredFrom}`,
76
- `Chunk ${input.chunkIndex + 1} of ${input.totalChunks}`,
77
- input.artifact.formattingNote ? `Formatting note: ${input.artifact.formattingNote}` : "Formatting note: none",
78
- input.memory ? `Swarm memory:\n${JSON.stringify(input.memory, null, 2)}` : "Swarm memory: none yet",
79
- input.retrievedContext && input.retrievedContext.length > 0
80
- ? `Local RAG evidence:\n${input.retrievedContext.map((segment, index) => `Segment ${index + 1}:\n${segment}`).join("\n\n")}`
81
- : "Local RAG evidence: none",
82
- "Artifact content:",
83
- "```text",
84
- input.chunk,
85
- "```",
86
- ].join("\n\n");
87
- }
88
-
89
- function findSplitBoundary(source: string, start: number, end: number): number {
90
- const minimumPreferredIndex = start + Math.max(1, Math.floor((end - start) * 0.6));
91
- const preferredDelimiters = new Set(["\n", ";", "}", " ", ","]);
92
-
93
- for (let cursor = end - 1; cursor >= minimumPreferredIndex; cursor -= 1) {
94
- const character = source[cursor];
95
- if (character && preferredDelimiters.has(character)) {
96
- return cursor + 1;
97
- }
98
- }
99
-
100
- return end;
101
- }
102
-
103
- export function deriveChunkSizeBytes(modelContextSize: number): number {
104
- const validatedContextSize = z.number().int().positive().parse(modelContextSize);
105
- const derived = Math.floor(validatedContextSize * 0.9);
106
- return Math.max(DEFAULT_CHUNK_SIZE_BYTES, derived);
107
- }
108
-
109
- export function chunkTextByBytes(source: string, maxBytes = DEFAULT_CHUNK_SIZE_BYTES): string[] {
110
- const validatedSource = z.string().parse(source);
111
- const validatedMaxBytes = z.number().int().positive().parse(maxBytes);
112
-
113
- if (validatedSource.length === 0) {
114
- return [];
115
- }
116
-
117
- const chunks: string[] = [];
118
- let start = 0;
119
-
120
- while (start < validatedSource.length) {
121
- let end = Math.min(validatedSource.length, start + validatedMaxBytes);
122
-
123
- while (end > start && Buffer.byteLength(validatedSource.slice(start, end), "utf8") > validatedMaxBytes) {
124
- end -= 1;
125
- }
126
-
127
- if (end <= start) {
128
- end = start + 1;
129
- }
130
-
131
- const splitAt = end === validatedSource.length ? end : findSplitBoundary(validatedSource, start, end);
132
- chunks.push(validatedSource.slice(start, splitAt));
133
- start = splitAt;
134
- }
135
-
136
- return chunks;
75
+ artifactIndex: number;
76
+ artifactCount: number;
77
+ localRag: LocalArtifactRag | null;
137
78
  }
138
79
 
139
- function normalizeAiError(error: unknown): Error {
140
- if (!(error instanceof Error)) {
141
- return new Error("AI analysis failed with an unknown error.");
142
- }
143
-
144
- const message = error.message.toLowerCase();
145
- if (message.includes("rate limit")) {
146
- return new Error("Provider rate limit hit during analysis. Please retry in a moment.");
147
- }
148
-
149
- if (message.includes("api key")) {
150
- return new Error("The configured API key was rejected by the provider.");
151
- }
152
-
153
- return error;
154
- }
80
+ export { chunkTextByBytes, deriveChunkSizeBytes } from "./analysis-helpers";
155
81
 
156
82
  export class AiBundleAnalyzer {
157
83
  private readonly providerClient: AiProviderClient;
158
84
  private readonly chunkSizeBytes: number;
159
85
  private readonly localRagEnabled: boolean;
86
+ private readonly analysisConcurrency: number;
160
87
  private readonly onProgress: ((event: AnalysisProgressEvent) => void) | undefined;
161
88
 
162
89
  public constructor(options: AnalyzerOptions) {
163
90
  this.providerClient = new AiProviderClient(options.providerConfig);
164
91
  this.chunkSizeBytes = options.chunkSizeBytes ?? deriveChunkSizeBytes(options.providerConfig.modelContextSize);
165
92
  this.localRagEnabled = options.localRag ?? false;
93
+ this.analysisConcurrency = Math.max(1, Math.floor(options.analysisConcurrency ?? 1));
166
94
  this.onProgress = options.onProgress;
167
95
  }
168
96
 
@@ -186,216 +114,214 @@ export class AiBundleAnalyzer {
186
114
 
187
115
  const chunkAnalyses: ChunkAnalysis[] = [];
188
116
  const artifactSummaries: ArtifactSummary[] = [];
117
+ const localRag = this.localRagEnabled ? new LocalArtifactRag(validatedInput.artifacts) : null;
189
118
 
190
- try {
191
- const localRag = this.localRagEnabled ? new LocalArtifactRag(validatedInput.artifacts) : null;
192
-
193
- for (let artifactIndex = 0; artifactIndex < validatedInput.artifacts.length; artifactIndex += 1) {
194
- const artifact = validatedInput.artifacts[artifactIndex]!;
195
- const chunks = chunkTextByBytes(artifact.formattedContent || artifact.content, this.chunkSizeBytes);
196
- const perArtifactChunkAnalyses: ChunkAnalysis[] = [];
197
-
198
- this.emitProgress({
199
- stage: "artifact",
200
- state: "started",
201
- message: `Starting swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
202
- artifactIndex: artifactIndex + 1,
203
- artifactCount: validatedInput.artifacts.length,
204
- artifactUrl: artifact.url,
205
- });
206
-
207
- for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex += 1) {
208
- this.emitProgress({
209
- stage: "chunk",
210
- state: "started",
211
- message: `Starting chunk ${chunkIndex + 1}/${chunks.length} for ${artifact.url}`,
212
- artifactIndex: artifactIndex + 1,
213
- artifactCount: validatedInput.artifacts.length,
214
- artifactUrl: artifact.url,
215
- chunkIndex: chunkIndex + 1,
216
- chunkCount: chunks.length,
217
- });
119
+ for (let artifactIndex = 0; artifactIndex < validatedInput.artifacts.length; artifactIndex += 1) {
120
+ const artifact = validatedInput.artifacts[artifactIndex]!;
121
+ const chunks = chunkTextByBytes(artifact.formattedContent || artifact.content, this.chunkSizeBytes);
218
122
 
219
- const analysis = await this.analyzeChunkWithSwarm({
123
+ this.emitProgress({
124
+ stage: "artifact",
125
+ state: "started",
126
+ message: `Starting swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
127
+ artifactIndex: artifactIndex + 1,
128
+ artifactCount: validatedInput.artifacts.length,
129
+ artifactUrl: artifact.url,
130
+ });
131
+
132
+ const perArtifactChunkAnalyses = await mapWithConcurrency(
133
+ chunks,
134
+ this.analysisConcurrency,
135
+ async (chunk, chunkIndex): Promise<ChunkAnalysis> => {
136
+ const chunkInput: ChunkTaskInput = {
220
137
  pageUrl: validatedInput.pageUrl,
221
138
  artifact,
222
- chunk: chunks[chunkIndex] ?? "",
139
+ chunk,
223
140
  chunkIndex,
224
141
  totalChunks: chunks.length,
225
142
  artifactIndex: artifactIndex + 1,
226
143
  artifactCount: validatedInput.artifacts.length,
227
144
  localRag,
228
- });
229
-
230
- chunkAnalyses.push(analysis);
231
- perArtifactChunkAnalyses.push(analysis);
232
-
233
- this.emitProgress({
234
- stage: "chunk",
235
- state: "completed",
236
- message: `Completed chunk ${chunkIndex + 1}/${chunks.length} for ${artifact.url}`,
237
- artifactIndex: artifactIndex + 1,
238
- artifactCount: validatedInput.artifacts.length,
239
- artifactUrl: artifact.url,
240
- chunkIndex: chunkIndex + 1,
241
- chunkCount: chunks.length,
242
- });
243
- }
244
-
245
- artifactSummaries.push({
246
- url: artifact.url,
247
- type: artifact.type,
248
- chunkCount: chunks.length,
249
- summary: perArtifactChunkAnalyses.map((analysis) => analysis.summary).join(" "),
250
- });
251
-
252
- this.emitProgress({
253
- stage: "artifact",
254
- state: "completed",
255
- message: `Completed swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
256
- artifactIndex: artifactIndex + 1,
257
- artifactCount: validatedInput.artifacts.length,
258
- artifactUrl: artifact.url,
259
- });
260
- }
145
+ };
261
146
 
262
- return await this.summarizeFindings(validatedInput.pageUrl, artifactSummaries, chunkAnalyses);
263
- } catch (error) {
264
- const normalizedError = normalizeAiError(error);
265
- const partialAnalysis = buildAnalysisSnapshot({
266
- overview:
267
- chunkAnalyses.length > 0 || artifactSummaries.length > 0
268
- ? `Partial analysis only. Processing stopped because: ${normalizedError.message}`
269
- : `Analysis aborted before any chunk completed. Cause: ${normalizedError.message}`,
270
- artifactSummaries,
271
- chunkAnalyses,
147
+ this.emitChunkEvent("started", chunkInput);
148
+ const analysis = await this.analyzeChunkWithSwarm(chunkInput);
149
+ this.emitChunkEvent("completed", chunkInput);
150
+ return analysis;
151
+ },
152
+ );
153
+
154
+ chunkAnalyses.push(...perArtifactChunkAnalyses);
155
+ artifactSummaries.push({
156
+ url: artifact.url,
157
+ type: artifact.type,
158
+ chunkCount: chunks.length,
159
+ summary: perArtifactChunkAnalyses.map((analysis) => analysis.summary).join(" "),
272
160
  });
273
161
 
274
- throw new PartialAnalysisError(normalizedError.message, partialAnalysis);
162
+ this.emitProgress({
163
+ stage: "artifact",
164
+ state: "completed",
165
+ message: `Completed swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
166
+ artifactIndex: artifactIndex + 1,
167
+ artifactCount: validatedInput.artifacts.length,
168
+ artifactUrl: artifact.url,
169
+ });
275
170
  }
171
+
172
+ return await this.summarizeFindings(validatedInput.pageUrl, artifactSummaries, chunkAnalyses);
173
+ }
174
+
175
+ private emitChunkEvent(state: Extract<AnalysisProgressState, "started" | "completed">, input: ChunkTaskInput): void {
176
+ this.emitProgress({
177
+ stage: "chunk",
178
+ state,
179
+ message: `${state === "started" ? "Starting" : "Completed"} chunk ${input.chunkIndex + 1}/${input.totalChunks} for ${input.artifact.url}`,
180
+ artifactIndex: input.artifactIndex,
181
+ artifactCount: input.artifactCount,
182
+ artifactUrl: input.artifact.url,
183
+ chunkIndex: input.chunkIndex + 1,
184
+ chunkCount: input.totalChunks,
185
+ });
276
186
  }
277
187
 
278
- private async analyzeChunkWithSwarm(input: {
279
- pageUrl: string;
280
- artifact: FormattedArtifact;
281
- chunk: string;
282
- chunkIndex: number;
283
- totalChunks: number;
284
- artifactIndex: number;
285
- artifactCount: number;
286
- localRag: LocalArtifactRag | null;
287
- }): Promise<ChunkAnalysis> {
188
+ private async analyzeChunkWithSwarm(input: ChunkTaskInput): Promise<ChunkAnalysis> {
288
189
  const memory: Partial<Record<SwarmAgentName, AgentMemo | ChunkAnalysis>> = {};
289
190
 
290
191
  for (const agent of SWARM_AGENT_ORDER) {
291
- this.emitProgress({
292
- stage: "agent",
293
- state: "started",
294
- message: `${agent} agent running on ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}`,
295
- artifactIndex: input.artifactIndex,
296
- artifactCount: input.artifactCount,
297
- artifactUrl: input.artifact.url,
298
- chunkIndex: input.chunkIndex + 1,
299
- chunkCount: input.totalChunks,
300
- agent,
301
- });
192
+ this.emitAgentEvent("started", agent, input, `${agent} agent running on ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}`);
193
+
194
+ try {
195
+ if (agent === "synthesizer") {
196
+ const synthesized = await this.runSynthesisAgent(input, memory, this.getRetrievedContext(agent, input, memory));
197
+ memory[agent] = synthesized.object;
198
+ this.emitAgentCompletion(agent, input, synthesized.telemetry);
199
+ continue;
200
+ }
302
201
 
303
- if (agent === "synthesizer") {
304
- const synthesized = await this.runSynthesisAgent(input, memory, this.getRetrievedContext(agent, input, memory));
305
- memory[agent] = synthesized;
306
- } else {
307
202
  const memo = await this.runMemoAgent(agent, input, memory, this.getRetrievedContext(agent, input, memory));
308
- memory[agent] = memo;
203
+ memory[agent] = memo.object;
204
+ this.emitAgentCompletion(agent, input, memo.telemetry);
205
+ } catch (error) {
206
+ const normalizedError = normalizeAiError(error);
207
+ memory[agent] =
208
+ agent === "synthesizer"
209
+ ? createFallbackChunkAnalysis({ artifactUrl: input.artifact.url, memory, error: normalizedError })
210
+ : createFallbackAgentMemo(agent, normalizedError);
211
+
212
+ this.emitAgentEvent(
213
+ "completed",
214
+ agent,
215
+ input,
216
+ `${agent} agent fell back ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}: ${normalizedError.message}`,
217
+ );
309
218
  }
310
-
311
- this.emitProgress({
312
- stage: "agent",
313
- state: "completed",
314
- message: `${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}`,
315
- artifactIndex: input.artifactIndex,
316
- artifactCount: input.artifactCount,
317
- artifactUrl: input.artifact.url,
318
- chunkIndex: input.chunkIndex + 1,
319
- chunkCount: input.totalChunks,
320
- agent,
321
- });
322
219
  }
323
220
 
324
221
  return chunkAnalysisSchema.parse(memory.synthesizer);
325
222
  }
326
223
 
224
+ private emitAgentCompletion(agent: SwarmAgentName, input: ChunkTaskInput, telemetry: StreamedObjectTelemetry): void {
225
+ this.emitAgentEvent(
226
+ "completed",
227
+ agent,
228
+ input,
229
+ `${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
230
+ telemetry,
231
+ );
232
+ }
233
+
234
+ private emitAgentEvent(
235
+ state: AnalysisProgressState,
236
+ agent: SwarmAgentName,
237
+ input: ChunkTaskInput,
238
+ message: string,
239
+ telemetry?: StreamedObjectTelemetry,
240
+ ): void {
241
+ this.emitProgress({
242
+ stage: "agent",
243
+ state,
244
+ message,
245
+ artifactIndex: input.artifactIndex,
246
+ artifactCount: input.artifactCount,
247
+ artifactUrl: input.artifact.url,
248
+ chunkIndex: input.chunkIndex + 1,
249
+ chunkCount: input.totalChunks,
250
+ agent,
251
+ ...(telemetry !== undefined ? { estimatedOutputTokens: telemetry.estimatedOutputTokens } : {}),
252
+ ...(telemetry?.outputTokens !== undefined ? { outputTokens: telemetry.outputTokens } : {}),
253
+ ...(telemetry?.tokensPerSecond !== undefined ? { tokensPerSecond: telemetry.tokensPerSecond } : {}),
254
+ });
255
+ }
256
+
327
257
  private async runMemoAgent(
328
258
  agent: Exclude<SwarmAgentName, "synthesizer">,
329
- input: {
330
- pageUrl: string;
331
- artifact: FormattedArtifact;
332
- chunk: string;
333
- chunkIndex: number;
334
- totalChunks: number;
335
- },
259
+ input: ChunkTaskInput,
336
260
  memory: Partial<Record<SwarmAgentName, unknown>>,
337
261
  retrievedContext: string[],
338
- ): Promise<AgentMemo> {
339
- return generateObjectWithTextFallback({
262
+ ): Promise<{ object: AgentMemo; telemetry: StreamedObjectTelemetry }> {
263
+ return generateObjectFromStream({
340
264
  model: this.providerClient.getModel(),
341
265
  system: getSwarmAgentPrompt(agent),
342
- prompt: createPromptEnvelope({
343
- pageUrl: input.pageUrl,
344
- artifact: input.artifact,
345
- chunk: input.chunk,
346
- chunkIndex: input.chunkIndex,
347
- totalChunks: input.totalChunks,
348
- memory,
349
- retrievedContext,
350
- }),
266
+ prompt: createPromptEnvelope({ ...input, memory, retrievedContext }),
351
267
  schema: agentMemoSchema,
352
268
  contract: [
353
269
  "JSON contract:",
354
270
  '{"role":"string","summary":"string","observations":["string"],"evidence":["string"],"nextQuestions":["string"]}',
355
271
  ].join("\n"),
272
+ attempts: 4,
356
273
  maxRetries: 2,
357
- providerOptions: {
358
- openai: {
359
- store: false,
360
- },
361
- },
274
+ providerOptions: { openai: { store: false } },
275
+ onRetry: (attempt, error) =>
276
+ this.emitAgentEvent(
277
+ "streaming",
278
+ agent,
279
+ input,
280
+ `${agent} agent retry ${attempt}/4 ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}: ${error.message}`,
281
+ ),
282
+ onProgress: (telemetry) =>
283
+ this.emitAgentEvent(
284
+ "streaming",
285
+ agent,
286
+ input,
287
+ `${agent} agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
288
+ telemetry,
289
+ ),
362
290
  });
363
291
  }
364
292
 
365
293
  private async runSynthesisAgent(
366
- input: {
367
- pageUrl: string;
368
- artifact: FormattedArtifact;
369
- chunk: string;
370
- chunkIndex: number;
371
- totalChunks: number;
372
- },
294
+ input: ChunkTaskInput,
373
295
  memory: Partial<Record<SwarmAgentName, unknown>>,
374
296
  retrievedContext: string[],
375
- ): Promise<ChunkAnalysis> {
376
- return generateObjectWithTextFallback({
297
+ ): Promise<{ object: ChunkAnalysis; telemetry: StreamedObjectTelemetry }> {
298
+ return generateObjectFromStream({
377
299
  model: this.providerClient.getModel(),
378
300
  system: getSwarmAgentPrompt("synthesizer"),
379
- prompt: createPromptEnvelope({
380
- pageUrl: input.pageUrl,
381
- artifact: input.artifact,
382
- chunk: input.chunk,
383
- chunkIndex: input.chunkIndex,
384
- totalChunks: input.totalChunks,
385
- memory,
386
- retrievedContext,
387
- }),
301
+ prompt: createPromptEnvelope({ ...input, memory, retrievedContext }),
388
302
  schema: chunkAnalysisSchema,
389
303
  contract: [
390
304
  "JSON contract:",
391
305
  '{"entryPoints":[{"symbol":"string","description":"string","evidence":"string"}],"initializationFlow":["string"],"callGraph":[{"caller":"string","callee":"string","rationale":"string"}],"restoredNames":[{"originalName":"string","suggestedName":"string","justification":"string"}],"summary":"string","notableLibraries":["string"],"investigationTips":["string"],"risks":["string"]}',
392
306
  ].join("\n"),
307
+ attempts: 4,
393
308
  maxRetries: 2,
394
- providerOptions: {
395
- openai: {
396
- store: false,
397
- },
398
- },
309
+ providerOptions: { openai: { store: false } },
310
+ onRetry: (attempt, error) =>
311
+ this.emitAgentEvent(
312
+ "streaming",
313
+ "synthesizer",
314
+ input,
315
+ `synthesizer agent retry ${attempt}/4 ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}: ${error.message}`,
316
+ ),
317
+ onProgress: (telemetry) =>
318
+ this.emitAgentEvent(
319
+ "streaming",
320
+ "synthesizer",
321
+ input,
322
+ `synthesizer agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
323
+ telemetry,
324
+ ),
399
325
  });
400
326
  }
401
327
 
@@ -405,38 +331,26 @@ export class AiBundleAnalyzer {
405
331
  chunkAnalyses: ChunkAnalysis[],
406
332
  ): Promise<BundleAnalysis> {
407
333
  try {
408
- const result = await generateObjectWithTextFallback({
334
+ const result = await generateObjectFromStream({
409
335
  model: this.providerClient.getModel(),
410
336
  system: [
411
337
  getGlobalMissionPrompt(),
412
338
  "You are the lead synthesis agent for the final report.",
413
339
  "Merge artifact summaries and chunk analyses into a coherent site-level reverse-engineering map with the strongest evidence available.",
414
340
  ].join(" "),
415
- prompt: [
416
- `Target page: ${pageUrl}`,
417
- "Artifact summaries:",
418
- JSON.stringify(artifactSummaries, null, 2),
419
- "Chunk analyses:",
420
- JSON.stringify(chunkAnalyses, null, 2),
421
- ].join("\n\n"),
422
- schema: finalAnalysisSchema.omit({
423
- artifactSummaries: true,
424
- analyzedChunkCount: true,
425
- }),
341
+ prompt: [`Target page: ${pageUrl}`, "Artifact summaries:", JSON.stringify(artifactSummaries, null, 2), "Chunk analyses:", JSON.stringify(chunkAnalyses, null, 2)].join("\n\n"),
342
+ schema: finalAnalysisSchema.omit({ artifactSummaries: true, analyzedChunkCount: true }),
426
343
  contract: [
427
344
  "JSON contract:",
428
345
  '{"overview":"string","entryPoints":[{"symbol":"string","description":"string","evidence":"string"}],"initializationFlow":["string"],"callGraph":[{"caller":"string","callee":"string","rationale":"string"}],"restoredNames":[{"originalName":"string","suggestedName":"string","justification":"string"}],"notableLibraries":["string"],"investigationTips":["string"],"risks":["string"]}',
429
346
  ].join("\n"),
347
+ attempts: 4,
430
348
  maxRetries: 2,
431
- providerOptions: {
432
- openai: {
433
- store: false,
434
- },
435
- },
349
+ providerOptions: { openai: { store: false } },
436
350
  });
437
351
 
438
352
  return finalAnalysisSchema.parse({
439
- ...result,
353
+ ...result.object,
440
354
  artifactSummaries,
441
355
  analyzedChunkCount: chunkAnalyses.length,
442
356
  });
@@ -455,11 +369,7 @@ export class AiBundleAnalyzer {
455
369
 
456
370
  private getRetrievedContext(
457
371
  agent: SwarmAgentName,
458
- input: {
459
- artifact: FormattedArtifact;
460
- chunk: string;
461
- localRag: LocalArtifactRag | null;
462
- },
372
+ input: Pick<ChunkTaskInput, "artifact" | "chunk" | "localRag">,
463
373
  memory: Partial<Record<SwarmAgentName, unknown>>,
464
374
  ): string[] {
465
375
  if (!input.localRag) {
@@ -474,10 +384,7 @@ export class AiBundleAnalyzer {
474
384
  synthesizer: "entry points call graph restored names investigation tips risks runtime relationships architecture summary",
475
385
  };
476
386
 
477
- const memoryText = Object.values(memory)
478
- .map((entry) => JSON.stringify(entry))
479
- .join(" ");
480
-
387
+ const memoryText = Object.values(memory).map((entry) => JSON.stringify(entry)).join(" ");
481
388
  return input.localRag.query({
482
389
  artifactUrl: input.artifact.url,
483
390
  query: `${agentKeywords[agent]} ${input.chunk} ${memoryText}`.slice(0, 6000),
package/lib/ai-json.ts CHANGED
@@ -1,7 +1,21 @@
1
- import { generateText, Output } from "ai";
1
+ import { streamText } from "ai";
2
2
  import { z } from "zod";
3
3
 
4
4
  const jsonFencePattern = /^```(?:json)?\s*([\s\S]*?)\s*```$/i;
5
+ const STREAM_PROGRESS_INTERVAL_MS = 750;
6
+ const ESTIMATED_CHARS_PER_TOKEN = 4;
7
+
8
+ export interface StreamedObjectTelemetry {
9
+ elapsedMs: number;
10
+ estimatedOutputTokens: number;
11
+ outputTokens?: number;
12
+ tokensPerSecond?: number;
13
+ }
14
+
15
+ export interface StreamedObjectResult<TOutput> {
16
+ object: TOutput;
17
+ telemetry: StreamedObjectTelemetry;
18
+ }
5
19
 
6
20
  function extractBalancedJsonSlice(source: string): string | null {
7
21
  const startIndex = source.search(/[\[{]/);
@@ -15,7 +29,6 @@ function extractBalancedJsonSlice(source: string): string | null {
15
29
 
16
30
  for (let index = startIndex; index < source.length; index += 1) {
17
31
  const character = source[index];
18
-
19
32
  if (!character) {
20
33
  continue;
21
34
  }
@@ -53,6 +66,33 @@ function extractBalancedJsonSlice(source: string): string | null {
53
66
  return null;
54
67
  }
55
68
 
69
+ function formatJsonSystemPrompt(system: string, contract: string): string {
70
+ return [
71
+ system,
72
+ "Return only one valid JSON object.",
73
+ "Do not wrap the JSON in markdown fences.",
74
+ "Do not add explanations before or after the JSON.",
75
+ contract,
76
+ ].join("\n");
77
+ }
78
+
79
+ function calculateTokensPerSecond(tokenCount: number, elapsedMs: number): number | undefined {
80
+ if (tokenCount <= 0 || elapsedMs < 250) {
81
+ return undefined;
82
+ }
83
+
84
+ return Number((tokenCount / (elapsedMs / 1000)).toFixed(1));
85
+ }
86
+
87
+ export function estimateTokenCountFromText(source: string): number {
88
+ const trimmed = source.trim();
89
+ if (trimmed.length === 0) {
90
+ return 0;
91
+ }
92
+
93
+ return Math.max(1, Math.ceil(trimmed.length / ESTIMATED_CHARS_PER_TOKEN));
94
+ }
95
+
56
96
  export function extractJsonFromText(source: string): unknown {
57
97
  const trimmed = source.trim();
58
98
  if (!trimmed) {
@@ -74,61 +114,104 @@ export function extractJsonFromText(source: string): unknown {
74
114
  }
75
115
  }
76
116
 
77
- export function shouldFallbackToTextJson(error: unknown): boolean {
78
- if (!(error instanceof Error)) {
79
- return false;
80
- }
81
-
82
- const message = error.message.toLowerCase();
83
- return (
84
- message.includes("responseformat") ||
85
- message.includes("structured output") ||
86
- message.includes("structuredoutputs") ||
87
- message.includes("response did not match schema") ||
88
- message.includes("no object generated")
89
- );
90
- }
91
-
92
- export async function generateObjectWithTextFallback<TOutput>(input: {
117
+ export async function generateObjectFromStream<TOutput>(input: {
93
118
  model: unknown;
94
119
  system: string;
95
120
  prompt: string;
96
121
  schema: z.ZodType<TOutput>;
97
122
  contract: string;
123
+ attempts?: number;
98
124
  maxRetries?: number;
99
125
  providerOptions?: Record<string, unknown>;
100
- }): Promise<TOutput> {
101
- try {
102
- const structuredResult = await generateText({
103
- model: input.model as never,
104
- system: input.system,
105
- prompt: input.prompt,
106
- output: Output.object({ schema: input.schema }),
107
- maxRetries: input.maxRetries ?? 2,
108
- ...(input.providerOptions !== undefined ? { providerOptions: input.providerOptions as never } : {}),
109
- });
110
-
111
- return input.schema.parse(structuredResult.output);
112
- } catch (error) {
113
- if (!shouldFallbackToTextJson(error)) {
114
- throw error;
126
+ onProgress?: (telemetry: StreamedObjectTelemetry) => void;
127
+ onRetry?: (attempt: number, error: Error) => void;
128
+ }): Promise<StreamedObjectResult<TOutput>> {
129
+ const attempts = Math.max(1, Math.floor(input.attempts ?? 3));
130
+ let lastError: Error | undefined;
131
+
132
+ for (let attempt = 1; attempt <= attempts; attempt += 1) {
133
+ try {
134
+ return await streamSingleObjectAttempt(input, attempt);
135
+ } catch (error) {
136
+ lastError = error instanceof Error ? error : new Error("Streaming object generation failed.");
137
+ if (attempt >= attempts) {
138
+ throw lastError;
139
+ }
140
+
141
+ input.onRetry?.(attempt + 1, lastError);
115
142
  }
116
143
  }
117
144
 
118
- const textResult = await generateText({
145
+ throw lastError ?? new Error("Streaming object generation failed.");
146
+ }
147
+
148
+ async function streamSingleObjectAttempt<TOutput>(
149
+ input: {
150
+ model: unknown;
151
+ system: string;
152
+ prompt: string;
153
+ schema: z.ZodType<TOutput>;
154
+ contract: string;
155
+ maxRetries?: number;
156
+ providerOptions?: Record<string, unknown>;
157
+ onProgress?: (telemetry: StreamedObjectTelemetry) => void;
158
+ },
159
+ attempt: number,
160
+ ): Promise<StreamedObjectResult<TOutput>> {
161
+ let streamedText = "";
162
+ const startedAt = Date.now();
163
+ let lastProgressAt = 0;
164
+ const repairHint =
165
+ attempt > 1
166
+ ? "\nPrevious attempt failed because the JSON was malformed or incomplete. Return a syntactically valid JSON object this time."
167
+ : "";
168
+
169
+ const result = streamText({
119
170
  model: input.model as never,
120
- system: [
121
- input.system,
122
- "Return only one valid JSON object.",
123
- "Do not wrap the JSON in markdown fences.",
124
- "Do not add explanations before or after the JSON.",
125
- input.contract,
126
- ].join("\n"),
171
+ system: `${formatJsonSystemPrompt(input.system, input.contract)}${repairHint}`,
127
172
  prompt: input.prompt,
128
- output: Output.text(),
129
173
  maxRetries: input.maxRetries ?? 2,
130
174
  ...(input.providerOptions !== undefined ? { providerOptions: input.providerOptions as never } : {}),
131
175
  });
132
176
 
133
- return input.schema.parse(extractJsonFromText(textResult.output));
177
+ for await (const textPart of result.textStream) {
178
+ streamedText += textPart;
179
+
180
+ const now = Date.now();
181
+ if (input.onProgress !== undefined && now - lastProgressAt >= STREAM_PROGRESS_INTERVAL_MS) {
182
+ const estimatedOutputTokens = estimateTokenCountFromText(streamedText);
183
+ const tokensPerSecond = calculateTokensPerSecond(estimatedOutputTokens, now - startedAt);
184
+ input.onProgress({
185
+ elapsedMs: now - startedAt,
186
+ estimatedOutputTokens,
187
+ ...(tokensPerSecond !== undefined ? { tokensPerSecond } : {}),
188
+ });
189
+ lastProgressAt = now;
190
+ }
191
+ }
192
+
193
+ let usage: Awaited<typeof result.usage> | undefined;
194
+ try {
195
+ usage = await result.usage;
196
+ } catch {
197
+ usage = undefined;
198
+ }
199
+
200
+ const elapsedMs = Date.now() - startedAt;
201
+ const estimatedOutputTokens = estimateTokenCountFromText(streamedText);
202
+ const outputTokens = usage?.outputTokens ?? undefined;
203
+ const tokensPerSecond = calculateTokensPerSecond(outputTokens ?? estimatedOutputTokens, elapsedMs);
204
+ const telemetry: StreamedObjectTelemetry = {
205
+ elapsedMs,
206
+ estimatedOutputTokens,
207
+ ...(outputTokens !== undefined ? { outputTokens } : {}),
208
+ ...(tokensPerSecond !== undefined ? { tokensPerSecond } : {}),
209
+ };
210
+
211
+ input.onProgress?.(telemetry);
212
+
213
+ return {
214
+ object: input.schema.parse(extractJsonFromText(streamedText)),
215
+ telemetry,
216
+ };
134
217
  }
@@ -0,0 +1,39 @@
1
+ import type { AgentMemo, ChunkAnalysis } from "./analysis-schema";
2
+ import type { SwarmAgentName } from "./swarm-prompts";
3
+
4
+ export function createFallbackAgentMemo(agent: Exclude<SwarmAgentName, "synthesizer">, error: Error): AgentMemo {
5
+ return {
6
+ role: agent,
7
+ summary: `${agent} agent failed after retries: ${error.message}`,
8
+ observations: [],
9
+ evidence: [],
10
+ nextQuestions: [`Retry ${agent} analysis for this chunk manually if the finding is important.`],
11
+ };
12
+ }
13
+
14
+ export function createFallbackChunkAnalysis(input: {
15
+ artifactUrl: string;
16
+ memory: Partial<Record<SwarmAgentName, unknown>>;
17
+ error: Error;
18
+ }): ChunkAnalysis {
19
+ const memoSummaries = Object.values(input.memory)
20
+ .filter((entry): entry is AgentMemo => typeof entry === "object" && entry !== null && "summary" in entry)
21
+ .map((entry) => entry.summary.trim())
22
+ .filter(Boolean);
23
+
24
+ return {
25
+ entryPoints: [],
26
+ initializationFlow: [],
27
+ callGraph: [],
28
+ restoredNames: [],
29
+ summary:
30
+ memoSummaries.join(" ").trim() ||
31
+ `Chunk analysis for ${input.artifactUrl} fell back after retries: ${input.error.message}`,
32
+ notableLibraries: [],
33
+ investigationTips: [
34
+ `Chunk synthesis fell back after retries: ${input.error.message}`,
35
+ "Re-run with lower concurrency or inspect this chunk manually if it is critical.",
36
+ ],
37
+ risks: [],
38
+ };
39
+ }
@@ -0,0 +1,108 @@
1
+ import { Buffer } from "buffer";
2
+ import { z } from "zod";
3
+
4
+ import type { StreamedObjectTelemetry } from "./ai-json";
5
+ import type { FormattedArtifact } from "./formatter";
6
+
7
+ export const DEFAULT_CHUNK_SIZE_BYTES = 80 * 1024;
8
+
9
+ export function createPromptEnvelope(input: {
10
+ pageUrl: string;
11
+ artifact: FormattedArtifact;
12
+ chunk: string;
13
+ chunkIndex: number;
14
+ totalChunks: number;
15
+ memory?: unknown;
16
+ retrievedContext?: string[];
17
+ }): string {
18
+ return [
19
+ `Target page: ${input.pageUrl}`,
20
+ `Artifact URL: ${input.artifact.url}`,
21
+ `Artifact type: ${input.artifact.type}`,
22
+ `Discovered from: ${input.artifact.discoveredFrom}`,
23
+ `Chunk ${input.chunkIndex + 1} of ${input.totalChunks}`,
24
+ input.artifact.formattingNote ? `Formatting note: ${input.artifact.formattingNote}` : "Formatting note: none",
25
+ input.memory ? `Swarm memory:\n${JSON.stringify(input.memory, null, 2)}` : "Swarm memory: none yet",
26
+ input.retrievedContext && input.retrievedContext.length > 0
27
+ ? `Local RAG evidence:\n${input.retrievedContext.map((segment, index) => `Segment ${index + 1}:\n${segment}`).join("\n\n")}`
28
+ : "Local RAG evidence: none",
29
+ "Artifact content:",
30
+ "```text",
31
+ input.chunk,
32
+ "```",
33
+ ].join("\n\n");
34
+ }
35
+
36
+ function findSplitBoundary(source: string, start: number, end: number): number {
37
+ const minimumPreferredIndex = start + Math.max(1, Math.floor((end - start) * 0.6));
38
+ const preferredDelimiters = new Set(["\n", ";", "}", " ", ","]);
39
+
40
+ for (let cursor = end - 1; cursor >= minimumPreferredIndex; cursor -= 1) {
41
+ const character = source[cursor];
42
+ if (character && preferredDelimiters.has(character)) {
43
+ return cursor + 1;
44
+ }
45
+ }
46
+
47
+ return end;
48
+ }
49
+
50
+ export function deriveChunkSizeBytes(modelContextSize: number): number {
51
+ const validatedContextSize = z.number().int().positive().parse(modelContextSize);
52
+ const derived = Math.floor(validatedContextSize * 0.9);
53
+ return Math.max(DEFAULT_CHUNK_SIZE_BYTES, derived);
54
+ }
55
+
56
+ export function chunkTextByBytes(source: string, maxBytes = DEFAULT_CHUNK_SIZE_BYTES): string[] {
57
+ const validatedSource = z.string().parse(source);
58
+ const validatedMaxBytes = z.number().int().positive().parse(maxBytes);
59
+
60
+ if (validatedSource.length === 0) {
61
+ return [];
62
+ }
63
+
64
+ const chunks: string[] = [];
65
+ let start = 0;
66
+
67
+ while (start < validatedSource.length) {
68
+ let end = Math.min(validatedSource.length, start + validatedMaxBytes);
69
+
70
+ while (end > start && Buffer.byteLength(validatedSource.slice(start, end), "utf8") > validatedMaxBytes) {
71
+ end -= 1;
72
+ }
73
+
74
+ if (end <= start) {
75
+ end = start + 1;
76
+ }
77
+
78
+ const splitAt = end === validatedSource.length ? end : findSplitBoundary(validatedSource, start, end);
79
+ chunks.push(validatedSource.slice(start, splitAt));
80
+ start = splitAt;
81
+ }
82
+
83
+ return chunks;
84
+ }
85
+
86
+ export function normalizeAiError(error: unknown): Error {
87
+ if (!(error instanceof Error)) {
88
+ return new Error("AI analysis failed with an unknown error.");
89
+ }
90
+
91
+ const message = error.message.toLowerCase();
92
+ if (message.includes("rate limit")) {
93
+ return new Error("Provider rate limit hit during analysis. Please retry in a moment.");
94
+ }
95
+
96
+ if (message.includes("api key")) {
97
+ return new Error("The configured API key was rejected by the provider.");
98
+ }
99
+
100
+ return error;
101
+ }
102
+
103
+ export function formatAgentTelemetrySuffix(telemetry: StreamedObjectTelemetry): string {
104
+ const tokenCount = telemetry.outputTokens ?? telemetry.estimatedOutputTokens;
105
+ const tokenLabel = telemetry.outputTokens !== undefined ? `${tokenCount} tok` : `~${tokenCount} tok`;
106
+ const tpsLabel = telemetry.tokensPerSecond !== undefined ? ` ${telemetry.tokensPerSecond} tps` : "";
107
+ return ` [${tokenLabel}${tpsLabel}]`;
108
+ }
package/lib/cli-args.ts CHANGED
@@ -19,6 +19,7 @@ const rawCliArgsSchema = z.object({
19
19
  baseURL: z.string().url().optional(),
20
20
  model: z.string().min(1).optional(),
21
21
  contextSize: z.number().int().positive().optional(),
22
+ analysisConcurrency: z.number().int().positive().optional(),
22
23
  maxPages: z.number().int().positive().optional(),
23
24
  maxArtifacts: z.number().int().positive().optional(),
24
25
  maxDepth: z.number().int().nonnegative().optional(),
@@ -58,13 +59,14 @@ const optionMap = new Map<string, keyof CliArgs>([
58
59
  ["--base-url", "baseURL"],
59
60
  ["--model", "model"],
60
61
  ["--context-size", "contextSize"],
62
+ ["--analysis-concurrency", "analysisConcurrency"],
61
63
  ["--max-pages", "maxPages"],
62
64
  ["--max-artifacts", "maxArtifacts"],
63
65
  ["--max-depth", "maxDepth"],
64
66
  ]);
65
67
 
66
68
  const booleanKeys = new Set<keyof CliArgs>(["help", "version", "headless", "reconfigure", "listModels", "localRag", "verboseAgents"]);
67
- const numberKeys = new Set<keyof CliArgs>(["contextSize", "maxPages", "maxArtifacts", "maxDepth"]);
69
+ const numberKeys = new Set<keyof CliArgs>(["contextSize", "analysisConcurrency", "maxPages", "maxArtifacts", "maxDepth"]);
68
70
 
69
71
  function normalizeValue(key: keyof CliArgs, value: string): unknown {
70
72
  if (numberKeys.has(key)) {
@@ -154,6 +156,7 @@ export function renderHelpText(): string {
154
156
  " --base-url <url> Base URL for the provider",
155
157
  " --model <id> Model identifier",
156
158
  " --context-size <tokens> Model context window, for example 128000 or 512000",
159
+ " --analysis-concurrency <n> Parallel chunk analyses per artifact",
157
160
  " --list-models Fetch and print models using the resolved provider config",
158
161
  " --local-rag Enable local lexical RAG for oversized artifacts",
159
162
  " --reconfigure Force interactive provider reconfiguration",
@@ -0,0 +1,25 @@
1
+ export async function mapWithConcurrency<TInput, TOutput>(
2
+ items: readonly TInput[],
3
+ concurrency: number,
4
+ mapper: (item: TInput, index: number) => Promise<TOutput>,
5
+ ): Promise<TOutput[]> {
6
+ const normalizedConcurrency = Math.max(1, Math.floor(concurrency));
7
+ const results = new Array<TOutput>(items.length);
8
+ let cursor = 0;
9
+
10
+ const workers = Array.from({ length: Math.min(normalizedConcurrency, items.length) }, async () => {
11
+ while (true) {
12
+ const currentIndex = cursor;
13
+ cursor += 1;
14
+
15
+ if (currentIndex >= items.length) {
16
+ return;
17
+ }
18
+
19
+ results[currentIndex] = await mapper(items[currentIndex]!, currentIndex);
20
+ }
21
+ });
22
+
23
+ await Promise.all(workers);
24
+ return results;
25
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@redstone-md/mapr",
3
- "version": "0.0.3-alpha",
3
+ "version": "0.0.5-alpha",
4
4
  "type": "module",
5
5
  "description": "Bun-native CLI/TUI for reverse-engineering frontend websites, bundles, WASM, and service workers",
6
6
  "license": "SEE LICENSE IN LICENSE",