@redstone-md/mapr 0.0.3-alpha → 0.0.4-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -3
- package/lib/ai-analyzer.ts +84 -22
- package/lib/ai-json.ts +89 -45
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -16,6 +16,7 @@ This repository is public for source visibility and collaboration. The license r
|
|
|
16
16
|
- Same-origin crawler with bounded page count and crawl depth
|
|
17
17
|
- JS bundle, worker, service worker, WASM, and source-map discovery
|
|
18
18
|
- Iframe-aware crawling for same-origin embedded pages
|
|
19
|
+
- Streaming AI generation with live throughput updates in the TUI
|
|
19
20
|
- Local RAG mode for multi-megabyte bundles
|
|
20
21
|
- Partial-report persistence when analysis fails mid-run
|
|
21
22
|
- Headless automation mode for CI or batch workflows
|
|
@@ -41,7 +42,7 @@ Mapr does not analyze images, fonts, audio, video, PDFs, archives, or other pres
|
|
|
41
42
|
- Built-in OpenAI-compatible presets for BlackBox AI, Nvidia NIM, and OnlySQ
|
|
42
43
|
- Automatic model context-size detection from provider model metadata when available
|
|
43
44
|
- Headless CLI mode for automation
|
|
44
|
-
- Live crawler and swarm progress with agent-level tracking and
|
|
45
|
+
- Live crawler and swarm progress with agent-level tracking, progress bars, and streaming TPS estimates
|
|
45
46
|
|
|
46
47
|
## Install
|
|
47
48
|
|
|
@@ -66,7 +67,7 @@ npx @redstone-md/mapr --help
|
|
|
66
67
|
4. Crawl the target website, same-origin iframe pages, and discovered code artifacts with bounded page count and crawl depth
|
|
67
68
|
5. Format analyzable content where possible
|
|
68
69
|
6. Optionally build a local lexical RAG index for oversized artifacts
|
|
69
|
-
7. Run a communicating swarm of analysis agents over chunked artifact content
|
|
70
|
+
7. Run a communicating swarm of analysis agents over chunked artifact content through streaming JSON generation so long-running requests keep producing output
|
|
70
71
|
8. Generate a Markdown report in the current working directory
|
|
71
72
|
|
|
72
73
|
## Provider Presets
|
|
@@ -123,7 +124,7 @@ Mapr uses a communicating agent swarm per chunk:
|
|
|
123
124
|
- `security`: identifies risks, persistence, caching, and operator tips
|
|
124
125
|
- `synthesizer`: merges the upstream notes into the final chunk analysis
|
|
125
126
|
|
|
126
|
-
Progress is shown directly in the TUI for crawler fetches, depth skips, discovered nested artifacts,
|
|
127
|
+
Progress is shown directly in the TUI for crawler fetches, depth skips, discovered nested artifacts, swarm agent/chunk execution, and live token-per-second estimates during provider streaming.
|
|
127
128
|
|
|
128
129
|
## Large Bundle Handling
|
|
129
130
|
|
package/lib/ai-analyzer.ts
CHANGED
|
@@ -13,7 +13,7 @@ import {
|
|
|
13
13
|
type ChunkAnalysis,
|
|
14
14
|
PartialAnalysisError,
|
|
15
15
|
} from "./analysis-schema";
|
|
16
|
-
import {
|
|
16
|
+
import { generateObjectFromStream, type StreamedObjectTelemetry } from "./ai-json";
|
|
17
17
|
import { artifactTypeSchema } from "./artifacts";
|
|
18
18
|
import type { FormattedArtifact } from "./formatter";
|
|
19
19
|
import { LocalArtifactRag } from "./local-rag";
|
|
@@ -38,7 +38,7 @@ const analyzeInputSchema = z.object({
|
|
|
38
38
|
),
|
|
39
39
|
});
|
|
40
40
|
export type AnalysisProgressStage = "artifact" | "chunk" | "agent";
|
|
41
|
-
export type AnalysisProgressState = "started" | "completed";
|
|
41
|
+
export type AnalysisProgressState = "started" | "streaming" | "completed";
|
|
42
42
|
|
|
43
43
|
export interface AnalysisProgressEvent {
|
|
44
44
|
stage: AnalysisProgressStage;
|
|
@@ -50,6 +50,9 @@ export interface AnalysisProgressEvent {
|
|
|
50
50
|
chunkIndex?: number;
|
|
51
51
|
chunkCount?: number;
|
|
52
52
|
agent?: SwarmAgentName;
|
|
53
|
+
estimatedOutputTokens?: number;
|
|
54
|
+
outputTokens?: number;
|
|
55
|
+
tokensPerSecond?: number;
|
|
53
56
|
}
|
|
54
57
|
|
|
55
58
|
interface AnalyzerOptions {
|
|
@@ -153,6 +156,13 @@ function normalizeAiError(error: unknown): Error {
|
|
|
153
156
|
return error;
|
|
154
157
|
}
|
|
155
158
|
|
|
159
|
+
function formatAgentTelemetrySuffix(telemetry: StreamedObjectTelemetry): string {
|
|
160
|
+
const tokenCount = telemetry.outputTokens ?? telemetry.estimatedOutputTokens;
|
|
161
|
+
const tokenLabel = telemetry.outputTokens !== undefined ? `${tokenCount} tok` : `~${tokenCount} tok`;
|
|
162
|
+
const tpsLabel = telemetry.tokensPerSecond !== undefined ? ` ${telemetry.tokensPerSecond} tps` : "";
|
|
163
|
+
return ` [${tokenLabel}${tpsLabel}]`;
|
|
164
|
+
}
|
|
165
|
+
|
|
156
166
|
export class AiBundleAnalyzer {
|
|
157
167
|
private readonly providerClient: AiProviderClient;
|
|
158
168
|
private readonly chunkSizeBytes: number;
|
|
@@ -302,23 +312,39 @@ export class AiBundleAnalyzer {
|
|
|
302
312
|
|
|
303
313
|
if (agent === "synthesizer") {
|
|
304
314
|
const synthesized = await this.runSynthesisAgent(input, memory, this.getRetrievedContext(agent, input, memory));
|
|
305
|
-
memory[agent] = synthesized;
|
|
315
|
+
memory[agent] = synthesized.object;
|
|
316
|
+
this.emitProgress({
|
|
317
|
+
stage: "agent",
|
|
318
|
+
state: "completed",
|
|
319
|
+
message: `${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(synthesized.telemetry)}`,
|
|
320
|
+
artifactIndex: input.artifactIndex,
|
|
321
|
+
artifactCount: input.artifactCount,
|
|
322
|
+
artifactUrl: input.artifact.url,
|
|
323
|
+
chunkIndex: input.chunkIndex + 1,
|
|
324
|
+
chunkCount: input.totalChunks,
|
|
325
|
+
agent,
|
|
326
|
+
estimatedOutputTokens: synthesized.telemetry.estimatedOutputTokens,
|
|
327
|
+
...(synthesized.telemetry.outputTokens !== undefined ? { outputTokens: synthesized.telemetry.outputTokens } : {}),
|
|
328
|
+
...(synthesized.telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: synthesized.telemetry.tokensPerSecond } : {}),
|
|
329
|
+
});
|
|
306
330
|
} else {
|
|
307
331
|
const memo = await this.runMemoAgent(agent, input, memory, this.getRetrievedContext(agent, input, memory));
|
|
308
|
-
memory[agent] = memo;
|
|
332
|
+
memory[agent] = memo.object;
|
|
333
|
+
this.emitProgress({
|
|
334
|
+
stage: "agent",
|
|
335
|
+
state: "completed",
|
|
336
|
+
message: `${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(memo.telemetry)}`,
|
|
337
|
+
artifactIndex: input.artifactIndex,
|
|
338
|
+
artifactCount: input.artifactCount,
|
|
339
|
+
artifactUrl: input.artifact.url,
|
|
340
|
+
chunkIndex: input.chunkIndex + 1,
|
|
341
|
+
chunkCount: input.totalChunks,
|
|
342
|
+
agent,
|
|
343
|
+
estimatedOutputTokens: memo.telemetry.estimatedOutputTokens,
|
|
344
|
+
...(memo.telemetry.outputTokens !== undefined ? { outputTokens: memo.telemetry.outputTokens } : {}),
|
|
345
|
+
...(memo.telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: memo.telemetry.tokensPerSecond } : {}),
|
|
346
|
+
});
|
|
309
347
|
}
|
|
310
|
-
|
|
311
|
-
this.emitProgress({
|
|
312
|
-
stage: "agent",
|
|
313
|
-
state: "completed",
|
|
314
|
-
message: `${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}`,
|
|
315
|
-
artifactIndex: input.artifactIndex,
|
|
316
|
-
artifactCount: input.artifactCount,
|
|
317
|
-
artifactUrl: input.artifact.url,
|
|
318
|
-
chunkIndex: input.chunkIndex + 1,
|
|
319
|
-
chunkCount: input.totalChunks,
|
|
320
|
-
agent,
|
|
321
|
-
});
|
|
322
348
|
}
|
|
323
349
|
|
|
324
350
|
return chunkAnalysisSchema.parse(memory.synthesizer);
|
|
@@ -332,11 +358,13 @@ export class AiBundleAnalyzer {
|
|
|
332
358
|
chunk: string;
|
|
333
359
|
chunkIndex: number;
|
|
334
360
|
totalChunks: number;
|
|
361
|
+
artifactIndex: number;
|
|
362
|
+
artifactCount: number;
|
|
335
363
|
},
|
|
336
364
|
memory: Partial<Record<SwarmAgentName, unknown>>,
|
|
337
365
|
retrievedContext: string[],
|
|
338
|
-
): Promise<AgentMemo> {
|
|
339
|
-
return
|
|
366
|
+
): Promise<{ object: AgentMemo; telemetry: StreamedObjectTelemetry }> {
|
|
367
|
+
return generateObjectFromStream({
|
|
340
368
|
model: this.providerClient.getModel(),
|
|
341
369
|
system: getSwarmAgentPrompt(agent),
|
|
342
370
|
prompt: createPromptEnvelope({
|
|
@@ -359,6 +387,22 @@ export class AiBundleAnalyzer {
|
|
|
359
387
|
store: false,
|
|
360
388
|
},
|
|
361
389
|
},
|
|
390
|
+
onProgress: (telemetry) => {
|
|
391
|
+
this.emitProgress({
|
|
392
|
+
stage: "agent",
|
|
393
|
+
state: "streaming",
|
|
394
|
+
message: `${agent} agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
|
|
395
|
+
artifactIndex: input.artifactIndex,
|
|
396
|
+
artifactCount: input.artifactCount,
|
|
397
|
+
artifactUrl: input.artifact.url,
|
|
398
|
+
chunkIndex: input.chunkIndex + 1,
|
|
399
|
+
chunkCount: input.totalChunks,
|
|
400
|
+
agent,
|
|
401
|
+
estimatedOutputTokens: telemetry.estimatedOutputTokens,
|
|
402
|
+
...(telemetry.outputTokens !== undefined ? { outputTokens: telemetry.outputTokens } : {}),
|
|
403
|
+
...(telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: telemetry.tokensPerSecond } : {}),
|
|
404
|
+
});
|
|
405
|
+
},
|
|
362
406
|
});
|
|
363
407
|
}
|
|
364
408
|
|
|
@@ -369,11 +413,13 @@ export class AiBundleAnalyzer {
|
|
|
369
413
|
chunk: string;
|
|
370
414
|
chunkIndex: number;
|
|
371
415
|
totalChunks: number;
|
|
416
|
+
artifactIndex: number;
|
|
417
|
+
artifactCount: number;
|
|
372
418
|
},
|
|
373
419
|
memory: Partial<Record<SwarmAgentName, unknown>>,
|
|
374
420
|
retrievedContext: string[],
|
|
375
|
-
): Promise<ChunkAnalysis> {
|
|
376
|
-
return
|
|
421
|
+
): Promise<{ object: ChunkAnalysis; telemetry: StreamedObjectTelemetry }> {
|
|
422
|
+
return generateObjectFromStream({
|
|
377
423
|
model: this.providerClient.getModel(),
|
|
378
424
|
system: getSwarmAgentPrompt("synthesizer"),
|
|
379
425
|
prompt: createPromptEnvelope({
|
|
@@ -396,6 +442,22 @@ export class AiBundleAnalyzer {
|
|
|
396
442
|
store: false,
|
|
397
443
|
},
|
|
398
444
|
},
|
|
445
|
+
onProgress: (telemetry) => {
|
|
446
|
+
this.emitProgress({
|
|
447
|
+
stage: "agent",
|
|
448
|
+
state: "streaming",
|
|
449
|
+
message: `synthesizer agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
|
|
450
|
+
artifactIndex: input.artifactIndex,
|
|
451
|
+
artifactCount: input.artifactCount,
|
|
452
|
+
artifactUrl: input.artifact.url,
|
|
453
|
+
chunkIndex: input.chunkIndex + 1,
|
|
454
|
+
chunkCount: input.totalChunks,
|
|
455
|
+
agent: "synthesizer",
|
|
456
|
+
estimatedOutputTokens: telemetry.estimatedOutputTokens,
|
|
457
|
+
...(telemetry.outputTokens !== undefined ? { outputTokens: telemetry.outputTokens } : {}),
|
|
458
|
+
...(telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: telemetry.tokensPerSecond } : {}),
|
|
459
|
+
});
|
|
460
|
+
},
|
|
399
461
|
});
|
|
400
462
|
}
|
|
401
463
|
|
|
@@ -405,7 +467,7 @@ export class AiBundleAnalyzer {
|
|
|
405
467
|
chunkAnalyses: ChunkAnalysis[],
|
|
406
468
|
): Promise<BundleAnalysis> {
|
|
407
469
|
try {
|
|
408
|
-
const result = await
|
|
470
|
+
const result = await generateObjectFromStream({
|
|
409
471
|
model: this.providerClient.getModel(),
|
|
410
472
|
system: [
|
|
411
473
|
getGlobalMissionPrompt(),
|
|
@@ -436,7 +498,7 @@ export class AiBundleAnalyzer {
|
|
|
436
498
|
});
|
|
437
499
|
|
|
438
500
|
return finalAnalysisSchema.parse({
|
|
439
|
-
...result,
|
|
501
|
+
...result.object,
|
|
440
502
|
artifactSummaries,
|
|
441
503
|
analyzedChunkCount: chunkAnalyses.length,
|
|
442
504
|
});
|
package/lib/ai-json.ts
CHANGED
|
@@ -1,7 +1,21 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { streamText } from "ai";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
|
|
4
4
|
const jsonFencePattern = /^```(?:json)?\s*([\s\S]*?)\s*```$/i;
|
|
5
|
+
const STREAM_PROGRESS_INTERVAL_MS = 750;
|
|
6
|
+
const ESTIMATED_CHARS_PER_TOKEN = 4;
|
|
7
|
+
|
|
8
|
+
export interface StreamedObjectTelemetry {
|
|
9
|
+
elapsedMs: number;
|
|
10
|
+
estimatedOutputTokens: number;
|
|
11
|
+
outputTokens?: number;
|
|
12
|
+
tokensPerSecond?: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface StreamedObjectResult<TOutput> {
|
|
16
|
+
object: TOutput;
|
|
17
|
+
telemetry: StreamedObjectTelemetry;
|
|
18
|
+
}
|
|
5
19
|
|
|
6
20
|
function extractBalancedJsonSlice(source: string): string | null {
|
|
7
21
|
const startIndex = source.search(/[\[{]/);
|
|
@@ -15,7 +29,6 @@ function extractBalancedJsonSlice(source: string): string | null {
|
|
|
15
29
|
|
|
16
30
|
for (let index = startIndex; index < source.length; index += 1) {
|
|
17
31
|
const character = source[index];
|
|
18
|
-
|
|
19
32
|
if (!character) {
|
|
20
33
|
continue;
|
|
21
34
|
}
|
|
@@ -53,6 +66,33 @@ function extractBalancedJsonSlice(source: string): string | null {
|
|
|
53
66
|
return null;
|
|
54
67
|
}
|
|
55
68
|
|
|
69
|
+
function formatJsonSystemPrompt(system: string, contract: string): string {
|
|
70
|
+
return [
|
|
71
|
+
system,
|
|
72
|
+
"Return only one valid JSON object.",
|
|
73
|
+
"Do not wrap the JSON in markdown fences.",
|
|
74
|
+
"Do not add explanations before or after the JSON.",
|
|
75
|
+
contract,
|
|
76
|
+
].join("\n");
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function calculateTokensPerSecond(tokenCount: number, elapsedMs: number): number | undefined {
|
|
80
|
+
if (tokenCount <= 0 || elapsedMs < 250) {
|
|
81
|
+
return undefined;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return Number((tokenCount / (elapsedMs / 1000)).toFixed(1));
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export function estimateTokenCountFromText(source: string): number {
|
|
88
|
+
const trimmed = source.trim();
|
|
89
|
+
if (trimmed.length === 0) {
|
|
90
|
+
return 0;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return Math.max(1, Math.ceil(trimmed.length / ESTIMATED_CHARS_PER_TOKEN));
|
|
94
|
+
}
|
|
95
|
+
|
|
56
96
|
export function extractJsonFromText(source: string): unknown {
|
|
57
97
|
const trimmed = source.trim();
|
|
58
98
|
if (!trimmed) {
|
|
@@ -74,22 +114,7 @@ export function extractJsonFromText(source: string): unknown {
|
|
|
74
114
|
}
|
|
75
115
|
}
|
|
76
116
|
|
|
77
|
-
export function
|
|
78
|
-
if (!(error instanceof Error)) {
|
|
79
|
-
return false;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
const message = error.message.toLowerCase();
|
|
83
|
-
return (
|
|
84
|
-
message.includes("responseformat") ||
|
|
85
|
-
message.includes("structured output") ||
|
|
86
|
-
message.includes("structuredoutputs") ||
|
|
87
|
-
message.includes("response did not match schema") ||
|
|
88
|
-
message.includes("no object generated")
|
|
89
|
-
);
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
export async function generateObjectWithTextFallback<TOutput>(input: {
|
|
117
|
+
export async function generateObjectFromStream<TOutput>(input: {
|
|
93
118
|
model: unknown;
|
|
94
119
|
system: string;
|
|
95
120
|
prompt: string;
|
|
@@ -97,38 +122,57 @@ export async function generateObjectWithTextFallback<TOutput>(input: {
|
|
|
97
122
|
contract: string;
|
|
98
123
|
maxRetries?: number;
|
|
99
124
|
providerOptions?: Record<string, unknown>;
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
prompt: input.prompt,
|
|
106
|
-
output: Output.object({ schema: input.schema }),
|
|
107
|
-
maxRetries: input.maxRetries ?? 2,
|
|
108
|
-
...(input.providerOptions !== undefined ? { providerOptions: input.providerOptions as never } : {}),
|
|
109
|
-
});
|
|
110
|
-
|
|
111
|
-
return input.schema.parse(structuredResult.output);
|
|
112
|
-
} catch (error) {
|
|
113
|
-
if (!shouldFallbackToTextJson(error)) {
|
|
114
|
-
throw error;
|
|
115
|
-
}
|
|
116
|
-
}
|
|
125
|
+
onProgress?: (telemetry: StreamedObjectTelemetry) => void;
|
|
126
|
+
}): Promise<StreamedObjectResult<TOutput>> {
|
|
127
|
+
let streamedText = "";
|
|
128
|
+
const startedAt = Date.now();
|
|
129
|
+
let lastProgressAt = 0;
|
|
117
130
|
|
|
118
|
-
const
|
|
131
|
+
const result = streamText({
|
|
119
132
|
model: input.model as never,
|
|
120
|
-
system:
|
|
121
|
-
input.system,
|
|
122
|
-
"Return only one valid JSON object.",
|
|
123
|
-
"Do not wrap the JSON in markdown fences.",
|
|
124
|
-
"Do not add explanations before or after the JSON.",
|
|
125
|
-
input.contract,
|
|
126
|
-
].join("\n"),
|
|
133
|
+
system: formatJsonSystemPrompt(input.system, input.contract),
|
|
127
134
|
prompt: input.prompt,
|
|
128
|
-
output: Output.text(),
|
|
129
135
|
maxRetries: input.maxRetries ?? 2,
|
|
130
136
|
...(input.providerOptions !== undefined ? { providerOptions: input.providerOptions as never } : {}),
|
|
131
137
|
});
|
|
132
138
|
|
|
133
|
-
|
|
139
|
+
for await (const textPart of result.textStream) {
|
|
140
|
+
streamedText += textPart;
|
|
141
|
+
|
|
142
|
+
const now = Date.now();
|
|
143
|
+
if (input.onProgress !== undefined && now - lastProgressAt >= STREAM_PROGRESS_INTERVAL_MS) {
|
|
144
|
+
const estimatedOutputTokens = estimateTokenCountFromText(streamedText);
|
|
145
|
+
const tokensPerSecond = calculateTokensPerSecond(estimatedOutputTokens, now - startedAt);
|
|
146
|
+
input.onProgress({
|
|
147
|
+
elapsedMs: now - startedAt,
|
|
148
|
+
estimatedOutputTokens,
|
|
149
|
+
...(tokensPerSecond !== undefined ? { tokensPerSecond } : {}),
|
|
150
|
+
});
|
|
151
|
+
lastProgressAt = now;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
let usage: Awaited<typeof result.usage> | undefined;
|
|
156
|
+
try {
|
|
157
|
+
usage = await result.usage;
|
|
158
|
+
} catch {
|
|
159
|
+
usage = undefined;
|
|
160
|
+
}
|
|
161
|
+
const elapsedMs = Date.now() - startedAt;
|
|
162
|
+
const estimatedOutputTokens = estimateTokenCountFromText(streamedText);
|
|
163
|
+
const outputTokens = usage?.outputTokens ?? undefined;
|
|
164
|
+
const tokensPerSecond = calculateTokensPerSecond(outputTokens ?? estimatedOutputTokens, elapsedMs);
|
|
165
|
+
const telemetry: StreamedObjectTelemetry = {
|
|
166
|
+
elapsedMs,
|
|
167
|
+
estimatedOutputTokens,
|
|
168
|
+
...(outputTokens !== undefined ? { outputTokens } : {}),
|
|
169
|
+
...(tokensPerSecond !== undefined ? { tokensPerSecond } : {}),
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
input.onProgress?.(telemetry);
|
|
173
|
+
|
|
174
|
+
return {
|
|
175
|
+
object: input.schema.parse(extractJsonFromText(streamedText)),
|
|
176
|
+
telemetry,
|
|
177
|
+
};
|
|
134
178
|
}
|
package/package.json
CHANGED