@redstone-md/mapr 0.0.3-alpha → 0.0.4-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -16,6 +16,7 @@ This repository is public for source visibility and collaboration. The license r
16
16
  - Same-origin crawler with bounded page count and crawl depth
17
17
  - JS bundle, worker, service worker, WASM, and source-map discovery
18
18
  - Iframe-aware crawling for same-origin embedded pages
19
+ - Streaming AI generation with live throughput updates in the TUI
19
20
  - Local RAG mode for multi-megabyte bundles
20
21
  - Partial-report persistence when analysis fails mid-run
21
22
  - Headless automation mode for CI or batch workflows
@@ -41,7 +42,7 @@ Mapr does not analyze images, fonts, audio, video, PDFs, archives, or other pres
41
42
  - Built-in OpenAI-compatible presets for BlackBox AI, Nvidia NIM, and OnlySQ
42
43
  - Automatic model context-size detection from provider model metadata when available
43
44
  - Headless CLI mode for automation
44
- - Live crawler and swarm progress with agent-level tracking and progress bars
45
+ - Live crawler and swarm progress with agent-level tracking, progress bars, and streaming TPS estimates
45
46
 
46
47
  ## Install
47
48
 
@@ -66,7 +67,7 @@ npx @redstone-md/mapr --help
66
67
  4. Crawl the target website, same-origin iframe pages, and discovered code artifacts with bounded page count and crawl depth
67
68
  5. Format analyzable content where possible
68
69
  6. Optionally build a local lexical RAG index for oversized artifacts
69
- 7. Run a communicating swarm of analysis agents over chunked artifact content with structured-output fallback for providers that only support plain text
70
+ 7. Run a communicating swarm of analysis agents over chunked artifact content through streaming JSON generation so long-running requests keep producing output
70
71
  8. Generate a Markdown report in the current working directory
71
72
 
72
73
  ## Provider Presets
@@ -123,7 +124,7 @@ Mapr uses a communicating agent swarm per chunk:
123
124
  - `security`: identifies risks, persistence, caching, and operator tips
124
125
  - `synthesizer`: merges the upstream notes into the final chunk analysis
125
126
 
126
- Progress is shown directly in the TUI for crawler fetches, depth skips, discovered nested artifacts, and swarm agent/chunk execution.
127
+ Progress is shown directly in the TUI for crawler fetches, depth skips, discovered nested artifacts, swarm agent/chunk execution, and live token-per-second estimates during provider streaming.
127
128
 
128
129
  ## Large Bundle Handling
129
130
 
@@ -13,7 +13,7 @@ import {
13
13
  type ChunkAnalysis,
14
14
  PartialAnalysisError,
15
15
  } from "./analysis-schema";
16
- import { generateObjectWithTextFallback } from "./ai-json";
16
+ import { generateObjectFromStream, type StreamedObjectTelemetry } from "./ai-json";
17
17
  import { artifactTypeSchema } from "./artifacts";
18
18
  import type { FormattedArtifact } from "./formatter";
19
19
  import { LocalArtifactRag } from "./local-rag";
@@ -38,7 +38,7 @@ const analyzeInputSchema = z.object({
38
38
  ),
39
39
  });
40
40
  export type AnalysisProgressStage = "artifact" | "chunk" | "agent";
41
- export type AnalysisProgressState = "started" | "completed";
41
+ export type AnalysisProgressState = "started" | "streaming" | "completed";
42
42
 
43
43
  export interface AnalysisProgressEvent {
44
44
  stage: AnalysisProgressStage;
@@ -50,6 +50,9 @@ export interface AnalysisProgressEvent {
50
50
  chunkIndex?: number;
51
51
  chunkCount?: number;
52
52
  agent?: SwarmAgentName;
53
+ estimatedOutputTokens?: number;
54
+ outputTokens?: number;
55
+ tokensPerSecond?: number;
53
56
  }
54
57
 
55
58
  interface AnalyzerOptions {
@@ -153,6 +156,13 @@ function normalizeAiError(error: unknown): Error {
153
156
  return error;
154
157
  }
155
158
 
159
+ function formatAgentTelemetrySuffix(telemetry: StreamedObjectTelemetry): string {
160
+ const tokenCount = telemetry.outputTokens ?? telemetry.estimatedOutputTokens;
161
+ const tokenLabel = telemetry.outputTokens !== undefined ? `${tokenCount} tok` : `~${tokenCount} tok`;
162
+ const tpsLabel = telemetry.tokensPerSecond !== undefined ? ` ${telemetry.tokensPerSecond} tps` : "";
163
+ return ` [${tokenLabel}${tpsLabel}]`;
164
+ }
165
+
156
166
  export class AiBundleAnalyzer {
157
167
  private readonly providerClient: AiProviderClient;
158
168
  private readonly chunkSizeBytes: number;
@@ -302,23 +312,39 @@ export class AiBundleAnalyzer {
302
312
 
303
313
  if (agent === "synthesizer") {
304
314
  const synthesized = await this.runSynthesisAgent(input, memory, this.getRetrievedContext(agent, input, memory));
305
- memory[agent] = synthesized;
315
+ memory[agent] = synthesized.object;
316
+ this.emitProgress({
317
+ stage: "agent",
318
+ state: "completed",
319
+ message: `${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(synthesized.telemetry)}`,
320
+ artifactIndex: input.artifactIndex,
321
+ artifactCount: input.artifactCount,
322
+ artifactUrl: input.artifact.url,
323
+ chunkIndex: input.chunkIndex + 1,
324
+ chunkCount: input.totalChunks,
325
+ agent,
326
+ estimatedOutputTokens: synthesized.telemetry.estimatedOutputTokens,
327
+ ...(synthesized.telemetry.outputTokens !== undefined ? { outputTokens: synthesized.telemetry.outputTokens } : {}),
328
+ ...(synthesized.telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: synthesized.telemetry.tokensPerSecond } : {}),
329
+ });
306
330
  } else {
307
331
  const memo = await this.runMemoAgent(agent, input, memory, this.getRetrievedContext(agent, input, memory));
308
- memory[agent] = memo;
332
+ memory[agent] = memo.object;
333
+ this.emitProgress({
334
+ stage: "agent",
335
+ state: "completed",
336
+ message: `${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(memo.telemetry)}`,
337
+ artifactIndex: input.artifactIndex,
338
+ artifactCount: input.artifactCount,
339
+ artifactUrl: input.artifact.url,
340
+ chunkIndex: input.chunkIndex + 1,
341
+ chunkCount: input.totalChunks,
342
+ agent,
343
+ estimatedOutputTokens: memo.telemetry.estimatedOutputTokens,
344
+ ...(memo.telemetry.outputTokens !== undefined ? { outputTokens: memo.telemetry.outputTokens } : {}),
345
+ ...(memo.telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: memo.telemetry.tokensPerSecond } : {}),
346
+ });
309
347
  }
310
-
311
- this.emitProgress({
312
- stage: "agent",
313
- state: "completed",
314
- message: `${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}`,
315
- artifactIndex: input.artifactIndex,
316
- artifactCount: input.artifactCount,
317
- artifactUrl: input.artifact.url,
318
- chunkIndex: input.chunkIndex + 1,
319
- chunkCount: input.totalChunks,
320
- agent,
321
- });
322
348
  }
323
349
 
324
350
  return chunkAnalysisSchema.parse(memory.synthesizer);
@@ -332,11 +358,13 @@ export class AiBundleAnalyzer {
332
358
  chunk: string;
333
359
  chunkIndex: number;
334
360
  totalChunks: number;
361
+ artifactIndex: number;
362
+ artifactCount: number;
335
363
  },
336
364
  memory: Partial<Record<SwarmAgentName, unknown>>,
337
365
  retrievedContext: string[],
338
- ): Promise<AgentMemo> {
339
- return generateObjectWithTextFallback({
366
+ ): Promise<{ object: AgentMemo; telemetry: StreamedObjectTelemetry }> {
367
+ return generateObjectFromStream({
340
368
  model: this.providerClient.getModel(),
341
369
  system: getSwarmAgentPrompt(agent),
342
370
  prompt: createPromptEnvelope({
@@ -359,6 +387,22 @@ export class AiBundleAnalyzer {
359
387
  store: false,
360
388
  },
361
389
  },
390
+ onProgress: (telemetry) => {
391
+ this.emitProgress({
392
+ stage: "agent",
393
+ state: "streaming",
394
+ message: `${agent} agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
395
+ artifactIndex: input.artifactIndex,
396
+ artifactCount: input.artifactCount,
397
+ artifactUrl: input.artifact.url,
398
+ chunkIndex: input.chunkIndex + 1,
399
+ chunkCount: input.totalChunks,
400
+ agent,
401
+ estimatedOutputTokens: telemetry.estimatedOutputTokens,
402
+ ...(telemetry.outputTokens !== undefined ? { outputTokens: telemetry.outputTokens } : {}),
403
+ ...(telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: telemetry.tokensPerSecond } : {}),
404
+ });
405
+ },
362
406
  });
363
407
  }
364
408
 
@@ -369,11 +413,13 @@ export class AiBundleAnalyzer {
369
413
  chunk: string;
370
414
  chunkIndex: number;
371
415
  totalChunks: number;
416
+ artifactIndex: number;
417
+ artifactCount: number;
372
418
  },
373
419
  memory: Partial<Record<SwarmAgentName, unknown>>,
374
420
  retrievedContext: string[],
375
- ): Promise<ChunkAnalysis> {
376
- return generateObjectWithTextFallback({
421
+ ): Promise<{ object: ChunkAnalysis; telemetry: StreamedObjectTelemetry }> {
422
+ return generateObjectFromStream({
377
423
  model: this.providerClient.getModel(),
378
424
  system: getSwarmAgentPrompt("synthesizer"),
379
425
  prompt: createPromptEnvelope({
@@ -396,6 +442,22 @@ export class AiBundleAnalyzer {
396
442
  store: false,
397
443
  },
398
444
  },
445
+ onProgress: (telemetry) => {
446
+ this.emitProgress({
447
+ stage: "agent",
448
+ state: "streaming",
449
+ message: `synthesizer agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
450
+ artifactIndex: input.artifactIndex,
451
+ artifactCount: input.artifactCount,
452
+ artifactUrl: input.artifact.url,
453
+ chunkIndex: input.chunkIndex + 1,
454
+ chunkCount: input.totalChunks,
455
+ agent: "synthesizer",
456
+ estimatedOutputTokens: telemetry.estimatedOutputTokens,
457
+ ...(telemetry.outputTokens !== undefined ? { outputTokens: telemetry.outputTokens } : {}),
458
+ ...(telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: telemetry.tokensPerSecond } : {}),
459
+ });
460
+ },
399
461
  });
400
462
  }
401
463
 
@@ -405,7 +467,7 @@ export class AiBundleAnalyzer {
405
467
  chunkAnalyses: ChunkAnalysis[],
406
468
  ): Promise<BundleAnalysis> {
407
469
  try {
408
- const result = await generateObjectWithTextFallback({
470
+ const result = await generateObjectFromStream({
409
471
  model: this.providerClient.getModel(),
410
472
  system: [
411
473
  getGlobalMissionPrompt(),
@@ -436,7 +498,7 @@ export class AiBundleAnalyzer {
436
498
  });
437
499
 
438
500
  return finalAnalysisSchema.parse({
439
- ...result,
501
+ ...result.object,
440
502
  artifactSummaries,
441
503
  analyzedChunkCount: chunkAnalyses.length,
442
504
  });
package/lib/ai-json.ts CHANGED
@@ -1,7 +1,21 @@
1
- import { generateText, Output } from "ai";
1
+ import { streamText } from "ai";
2
2
  import { z } from "zod";
3
3
 
4
4
  const jsonFencePattern = /^```(?:json)?\s*([\s\S]*?)\s*```$/i;
5
+ const STREAM_PROGRESS_INTERVAL_MS = 750;
6
+ const ESTIMATED_CHARS_PER_TOKEN = 4;
7
+
8
+ export interface StreamedObjectTelemetry {
9
+ elapsedMs: number;
10
+ estimatedOutputTokens: number;
11
+ outputTokens?: number;
12
+ tokensPerSecond?: number;
13
+ }
14
+
15
+ export interface StreamedObjectResult<TOutput> {
16
+ object: TOutput;
17
+ telemetry: StreamedObjectTelemetry;
18
+ }
5
19
 
6
20
  function extractBalancedJsonSlice(source: string): string | null {
7
21
  const startIndex = source.search(/[\[{]/);
@@ -15,7 +29,6 @@ function extractBalancedJsonSlice(source: string): string | null {
15
29
 
16
30
  for (let index = startIndex; index < source.length; index += 1) {
17
31
  const character = source[index];
18
-
19
32
  if (!character) {
20
33
  continue;
21
34
  }
@@ -53,6 +66,33 @@ function extractBalancedJsonSlice(source: string): string | null {
53
66
  return null;
54
67
  }
55
68
 
69
+ function formatJsonSystemPrompt(system: string, contract: string): string {
70
+ return [
71
+ system,
72
+ "Return only one valid JSON object.",
73
+ "Do not wrap the JSON in markdown fences.",
74
+ "Do not add explanations before or after the JSON.",
75
+ contract,
76
+ ].join("\n");
77
+ }
78
+
79
+ function calculateTokensPerSecond(tokenCount: number, elapsedMs: number): number | undefined {
80
+ if (tokenCount <= 0 || elapsedMs < 250) {
81
+ return undefined;
82
+ }
83
+
84
+ return Number((tokenCount / (elapsedMs / 1000)).toFixed(1));
85
+ }
86
+
87
+ export function estimateTokenCountFromText(source: string): number {
88
+ const trimmed = source.trim();
89
+ if (trimmed.length === 0) {
90
+ return 0;
91
+ }
92
+
93
+ return Math.max(1, Math.ceil(trimmed.length / ESTIMATED_CHARS_PER_TOKEN));
94
+ }
95
+
56
96
  export function extractJsonFromText(source: string): unknown {
57
97
  const trimmed = source.trim();
58
98
  if (!trimmed) {
@@ -74,22 +114,7 @@ export function extractJsonFromText(source: string): unknown {
74
114
  }
75
115
  }
76
116
 
77
- export function shouldFallbackToTextJson(error: unknown): boolean {
78
- if (!(error instanceof Error)) {
79
- return false;
80
- }
81
-
82
- const message = error.message.toLowerCase();
83
- return (
84
- message.includes("responseformat") ||
85
- message.includes("structured output") ||
86
- message.includes("structuredoutputs") ||
87
- message.includes("response did not match schema") ||
88
- message.includes("no object generated")
89
- );
90
- }
91
-
92
- export async function generateObjectWithTextFallback<TOutput>(input: {
117
+ export async function generateObjectFromStream<TOutput>(input: {
93
118
  model: unknown;
94
119
  system: string;
95
120
  prompt: string;
@@ -97,38 +122,57 @@ export async function generateObjectWithTextFallback<TOutput>(input: {
97
122
  contract: string;
98
123
  maxRetries?: number;
99
124
  providerOptions?: Record<string, unknown>;
100
- }): Promise<TOutput> {
101
- try {
102
- const structuredResult = await generateText({
103
- model: input.model as never,
104
- system: input.system,
105
- prompt: input.prompt,
106
- output: Output.object({ schema: input.schema }),
107
- maxRetries: input.maxRetries ?? 2,
108
- ...(input.providerOptions !== undefined ? { providerOptions: input.providerOptions as never } : {}),
109
- });
110
-
111
- return input.schema.parse(structuredResult.output);
112
- } catch (error) {
113
- if (!shouldFallbackToTextJson(error)) {
114
- throw error;
115
- }
116
- }
125
+ onProgress?: (telemetry: StreamedObjectTelemetry) => void;
126
+ }): Promise<StreamedObjectResult<TOutput>> {
127
+ let streamedText = "";
128
+ const startedAt = Date.now();
129
+ let lastProgressAt = 0;
117
130
 
118
- const textResult = await generateText({
131
+ const result = streamText({
119
132
  model: input.model as never,
120
- system: [
121
- input.system,
122
- "Return only one valid JSON object.",
123
- "Do not wrap the JSON in markdown fences.",
124
- "Do not add explanations before or after the JSON.",
125
- input.contract,
126
- ].join("\n"),
133
+ system: formatJsonSystemPrompt(input.system, input.contract),
127
134
  prompt: input.prompt,
128
- output: Output.text(),
129
135
  maxRetries: input.maxRetries ?? 2,
130
136
  ...(input.providerOptions !== undefined ? { providerOptions: input.providerOptions as never } : {}),
131
137
  });
132
138
 
133
- return input.schema.parse(extractJsonFromText(textResult.output));
139
+ for await (const textPart of result.textStream) {
140
+ streamedText += textPart;
141
+
142
+ const now = Date.now();
143
+ if (input.onProgress !== undefined && now - lastProgressAt >= STREAM_PROGRESS_INTERVAL_MS) {
144
+ const estimatedOutputTokens = estimateTokenCountFromText(streamedText);
145
+ const tokensPerSecond = calculateTokensPerSecond(estimatedOutputTokens, now - startedAt);
146
+ input.onProgress({
147
+ elapsedMs: now - startedAt,
148
+ estimatedOutputTokens,
149
+ ...(tokensPerSecond !== undefined ? { tokensPerSecond } : {}),
150
+ });
151
+ lastProgressAt = now;
152
+ }
153
+ }
154
+
155
+ let usage: Awaited<typeof result.usage> | undefined;
156
+ try {
157
+ usage = await result.usage;
158
+ } catch {
159
+ usage = undefined;
160
+ }
161
+ const elapsedMs = Date.now() - startedAt;
162
+ const estimatedOutputTokens = estimateTokenCountFromText(streamedText);
163
+ const outputTokens = usage?.outputTokens ?? undefined;
164
+ const tokensPerSecond = calculateTokensPerSecond(outputTokens ?? estimatedOutputTokens, elapsedMs);
165
+ const telemetry: StreamedObjectTelemetry = {
166
+ elapsedMs,
167
+ estimatedOutputTokens,
168
+ ...(outputTokens !== undefined ? { outputTokens } : {}),
169
+ ...(tokensPerSecond !== undefined ? { tokensPerSecond } : {}),
170
+ };
171
+
172
+ input.onProgress?.(telemetry);
173
+
174
+ return {
175
+ object: input.schema.parse(extractJsonFromText(streamedText)),
176
+ telemetry,
177
+ };
134
178
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@redstone-md/mapr",
3
- "version": "0.0.3-alpha",
3
+ "version": "0.0.4-alpha",
4
4
  "type": "module",
5
5
  "description": "Bun-native CLI/TUI for reverse-engineering frontend websites, bundles, WASM, and service workers",
6
6
  "license": "SEE LICENSE IN LICENSE",