@redstone-md/mapr 0.0.3-alpha → 0.0.5-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -3
- package/index.ts +32 -2
- package/lib/ai-analyzer.ts +189 -282
- package/lib/ai-json.ts +126 -43
- package/lib/analysis-fallback.ts +39 -0
- package/lib/analysis-helpers.ts +108 -0
- package/lib/cli-args.ts +4 -1
- package/lib/promise-pool.ts +25 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -16,6 +16,7 @@ This repository is public for source visibility and collaboration. The license r
|
|
|
16
16
|
- Same-origin crawler with bounded page count and crawl depth
|
|
17
17
|
- JS bundle, worker, service worker, WASM, and source-map discovery
|
|
18
18
|
- Iframe-aware crawling for same-origin embedded pages
|
|
19
|
+
- Streaming AI generation with live throughput updates in the TUI
|
|
19
20
|
- Local RAG mode for multi-megabyte bundles
|
|
20
21
|
- Partial-report persistence when analysis fails mid-run
|
|
21
22
|
- Headless automation mode for CI or batch workflows
|
|
@@ -41,7 +42,7 @@ Mapr does not analyze images, fonts, audio, video, PDFs, archives, or other pres
|
|
|
41
42
|
- Built-in OpenAI-compatible presets for BlackBox AI, Nvidia NIM, and OnlySQ
|
|
42
43
|
- Automatic model context-size detection from provider model metadata when available
|
|
43
44
|
- Headless CLI mode for automation
|
|
44
|
-
- Live crawler and swarm progress with agent-level tracking and
|
|
45
|
+
- Live crawler and swarm progress with agent-level tracking, progress bars, and streaming TPS estimates
|
|
45
46
|
|
|
46
47
|
## Install
|
|
47
48
|
|
|
@@ -66,7 +67,7 @@ npx @redstone-md/mapr --help
|
|
|
66
67
|
4. Crawl the target website, same-origin iframe pages, and discovered code artifacts with bounded page count and crawl depth
|
|
67
68
|
5. Format analyzable content where possible
|
|
68
69
|
6. Optionally build a local lexical RAG index for oversized artifacts
|
|
69
|
-
7. Run a communicating swarm of analysis agents over chunked artifact content
|
|
70
|
+
7. Run a communicating swarm of analysis agents over chunked artifact content through streaming JSON generation so long-running requests keep producing output
|
|
70
71
|
8. Generate a Markdown report in the current working directory
|
|
71
72
|
|
|
72
73
|
## Provider Presets
|
|
@@ -123,7 +124,7 @@ Mapr uses a communicating agent swarm per chunk:
|
|
|
123
124
|
- `security`: identifies risks, persistence, caching, and operator tips
|
|
124
125
|
- `synthesizer`: merges the upstream notes into the final chunk analysis
|
|
125
126
|
|
|
126
|
-
Progress is shown directly in the TUI for crawler fetches, depth skips, discovered nested artifacts,
|
|
127
|
+
Progress is shown directly in the TUI for crawler fetches, depth skips, discovered nested artifacts, swarm agent/chunk execution, and live token-per-second estimates during provider streaming.
|
|
127
128
|
|
|
128
129
|
## Large Bundle Handling
|
|
129
130
|
|
package/index.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
|
|
3
|
-
import { cancel, confirm, intro, isCancel, log, outro, spinner, text } from "@clack/prompts";
|
|
3
|
+
import { cancel, confirm, intro, isCancel, log, outro, select, spinner, text } from "@clack/prompts";
|
|
4
4
|
import pc from "picocolors";
|
|
5
5
|
import packageJson from "./package.json";
|
|
6
6
|
|
|
@@ -14,6 +14,9 @@ import { ReportWriter } from "./lib/reporter";
|
|
|
14
14
|
import { BundleScraper } from "./lib/scraper";
|
|
15
15
|
import { SWARM_AGENT_ORDER } from "./lib/swarm-prompts";
|
|
16
16
|
|
|
17
|
+
process.env.AI_SDK_LOG_WARNINGS = "false";
|
|
18
|
+
(globalThis as typeof globalThis & { AI_SDK_LOG_WARNINGS?: boolean }).AI_SDK_LOG_WARNINGS = false;
|
|
19
|
+
|
|
17
20
|
function exitIfCancelled<T>(value: T): T {
|
|
18
21
|
if (isCancel(value)) {
|
|
19
22
|
cancel("Operation cancelled.");
|
|
@@ -62,6 +65,30 @@ async function resolveTargetUrl(headless: boolean, prefilledUrl?: string): Promi
|
|
|
62
65
|
);
|
|
63
66
|
}
|
|
64
67
|
|
|
68
|
+
async function resolveAnalysisConcurrency(headless: boolean, prefilledValue: number | undefined, totalChunks: number): Promise<number> {
|
|
69
|
+
if (prefilledValue !== undefined) {
|
|
70
|
+
return prefilledValue;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (headless || totalChunks <= 1) {
|
|
74
|
+
return 1;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return Number(
|
|
78
|
+
exitIfCancelled(
|
|
79
|
+
await select({
|
|
80
|
+
message: "Analysis concurrency",
|
|
81
|
+
initialValue: 2,
|
|
82
|
+
options: [
|
|
83
|
+
{ value: 1, label: "1 lane", hint: "Most stable" },
|
|
84
|
+
{ value: 2, label: "2 lanes", hint: "Recommended" },
|
|
85
|
+
{ value: 4, label: "4 lanes", hint: "Aggressive" },
|
|
86
|
+
],
|
|
87
|
+
}),
|
|
88
|
+
),
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
|
|
65
92
|
async function run(): Promise<void> {
|
|
66
93
|
const args = parseCliArgs(process.argv.slice(2));
|
|
67
94
|
|
|
@@ -147,15 +174,17 @@ async function run(): Promise<void> {
|
|
|
147
174
|
sum + chunkTextByBytes(artifact.formattedContent || artifact.content, deriveChunkSizeBytes(config.modelContextSize)).length,
|
|
148
175
|
0,
|
|
149
176
|
);
|
|
177
|
+
const analysisConcurrency = await resolveAnalysisConcurrency(headless, args.analysisConcurrency, totalChunks);
|
|
150
178
|
const totalAgentTasks = Math.max(1, totalChunks * SWARM_AGENT_ORDER.length);
|
|
151
179
|
let completedAgentTasks = 0;
|
|
152
180
|
|
|
153
181
|
const analysisStep = spinner({ indicator: "timer" });
|
|
154
|
-
analysisStep.start(formatAnalysisProgress(0, totalAgentTasks,
|
|
182
|
+
analysisStep.start(formatAnalysisProgress(0, totalAgentTasks, `Starting swarm analysis (${analysisConcurrency} lane${analysisConcurrency === 1 ? "" : "s"})`));
|
|
155
183
|
|
|
156
184
|
const analyzer = new AiBundleAnalyzer({
|
|
157
185
|
providerConfig: config,
|
|
158
186
|
localRag: args.localRag,
|
|
187
|
+
analysisConcurrency,
|
|
159
188
|
onProgress(event) {
|
|
160
189
|
if (event.stage === "agent" && event.state === "completed") {
|
|
161
190
|
completedAgentTasks += 1;
|
|
@@ -224,6 +253,7 @@ async function run(): Promise<void> {
|
|
|
224
253
|
`${pc.bold("Target:")} ${scrapeResult.pageUrl}`,
|
|
225
254
|
`${pc.bold("Provider:")} ${config.providerName} (${config.model})`,
|
|
226
255
|
`${pc.bold("Context size:")} ${config.modelContextSize.toLocaleString()} tokens`,
|
|
256
|
+
`${pc.bold("Concurrency:")} ${analysisConcurrency}`,
|
|
227
257
|
`${pc.bold("Local RAG:")} ${args.localRag ? "enabled" : "disabled"}`,
|
|
228
258
|
`${pc.bold("Pages:")} ${scrapeResult.htmlPages.length}`,
|
|
229
259
|
`${pc.bold("Artifacts:")} ${formattedArtifacts.length}`,
|
package/lib/ai-analyzer.ts
CHANGED
|
@@ -1,26 +1,28 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
-
import { Buffer } from "buffer";
|
|
3
2
|
|
|
3
|
+
import type { AgentMemo, ArtifactSummary, BundleAnalysis, ChunkAnalysis } from "./analysis-schema";
|
|
4
4
|
import {
|
|
5
5
|
agentMemoSchema,
|
|
6
|
-
artifactSummarySchema,
|
|
7
6
|
buildAnalysisSnapshot,
|
|
8
7
|
chunkAnalysisSchema,
|
|
9
8
|
finalAnalysisSchema,
|
|
10
|
-
type AgentMemo,
|
|
11
|
-
type ArtifactSummary,
|
|
12
|
-
type BundleAnalysis,
|
|
13
|
-
type ChunkAnalysis,
|
|
14
9
|
PartialAnalysisError,
|
|
15
10
|
} from "./analysis-schema";
|
|
16
|
-
import {
|
|
11
|
+
import { createFallbackAgentMemo, createFallbackChunkAnalysis } from "./analysis-fallback";
|
|
12
|
+
import {
|
|
13
|
+
chunkTextByBytes,
|
|
14
|
+
createPromptEnvelope,
|
|
15
|
+
deriveChunkSizeBytes,
|
|
16
|
+
formatAgentTelemetrySuffix,
|
|
17
|
+
normalizeAiError,
|
|
18
|
+
} from "./analysis-helpers";
|
|
19
|
+
import { generateObjectFromStream, type StreamedObjectTelemetry } from "./ai-json";
|
|
17
20
|
import { artifactTypeSchema } from "./artifacts";
|
|
18
21
|
import type { FormattedArtifact } from "./formatter";
|
|
19
22
|
import { LocalArtifactRag } from "./local-rag";
|
|
23
|
+
import { mapWithConcurrency } from "./promise-pool";
|
|
20
24
|
import { AiProviderClient, type AiProviderConfig } from "./provider";
|
|
21
|
-
import {
|
|
22
|
-
|
|
23
|
-
export const DEFAULT_CHUNK_SIZE_BYTES = 80 * 1024;
|
|
25
|
+
import { getGlobalMissionPrompt, getSwarmAgentPrompt, SWARM_AGENT_ORDER, type SwarmAgentName } from "./swarm-prompts";
|
|
24
26
|
|
|
25
27
|
const analyzeInputSchema = z.object({
|
|
26
28
|
pageUrl: z.string().url(),
|
|
@@ -37,8 +39,9 @@ const analyzeInputSchema = z.object({
|
|
|
37
39
|
}),
|
|
38
40
|
),
|
|
39
41
|
});
|
|
42
|
+
|
|
40
43
|
export type AnalysisProgressStage = "artifact" | "chunk" | "agent";
|
|
41
|
-
export type AnalysisProgressState = "started" | "completed";
|
|
44
|
+
export type AnalysisProgressState = "started" | "streaming" | "completed";
|
|
42
45
|
|
|
43
46
|
export interface AnalysisProgressEvent {
|
|
44
47
|
stage: AnalysisProgressStage;
|
|
@@ -50,119 +53,44 @@ export interface AnalysisProgressEvent {
|
|
|
50
53
|
chunkIndex?: number;
|
|
51
54
|
chunkCount?: number;
|
|
52
55
|
agent?: SwarmAgentName;
|
|
56
|
+
estimatedOutputTokens?: number;
|
|
57
|
+
outputTokens?: number;
|
|
58
|
+
tokensPerSecond?: number;
|
|
53
59
|
}
|
|
54
60
|
|
|
55
61
|
interface AnalyzerOptions {
|
|
56
62
|
providerConfig: AiProviderConfig;
|
|
57
63
|
chunkSizeBytes?: number;
|
|
58
64
|
localRag?: boolean;
|
|
65
|
+
analysisConcurrency?: number;
|
|
59
66
|
onProgress?: (event: AnalysisProgressEvent) => void;
|
|
60
67
|
}
|
|
61
68
|
|
|
62
|
-
|
|
69
|
+
interface ChunkTaskInput {
|
|
63
70
|
pageUrl: string;
|
|
64
71
|
artifact: FormattedArtifact;
|
|
65
72
|
chunk: string;
|
|
66
73
|
chunkIndex: number;
|
|
67
74
|
totalChunks: number;
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
return [
|
|
72
|
-
`Target page: ${input.pageUrl}`,
|
|
73
|
-
`Artifact URL: ${input.artifact.url}`,
|
|
74
|
-
`Artifact type: ${input.artifact.type}`,
|
|
75
|
-
`Discovered from: ${input.artifact.discoveredFrom}`,
|
|
76
|
-
`Chunk ${input.chunkIndex + 1} of ${input.totalChunks}`,
|
|
77
|
-
input.artifact.formattingNote ? `Formatting note: ${input.artifact.formattingNote}` : "Formatting note: none",
|
|
78
|
-
input.memory ? `Swarm memory:\n${JSON.stringify(input.memory, null, 2)}` : "Swarm memory: none yet",
|
|
79
|
-
input.retrievedContext && input.retrievedContext.length > 0
|
|
80
|
-
? `Local RAG evidence:\n${input.retrievedContext.map((segment, index) => `Segment ${index + 1}:\n${segment}`).join("\n\n")}`
|
|
81
|
-
: "Local RAG evidence: none",
|
|
82
|
-
"Artifact content:",
|
|
83
|
-
"```text",
|
|
84
|
-
input.chunk,
|
|
85
|
-
"```",
|
|
86
|
-
].join("\n\n");
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
function findSplitBoundary(source: string, start: number, end: number): number {
|
|
90
|
-
const minimumPreferredIndex = start + Math.max(1, Math.floor((end - start) * 0.6));
|
|
91
|
-
const preferredDelimiters = new Set(["\n", ";", "}", " ", ","]);
|
|
92
|
-
|
|
93
|
-
for (let cursor = end - 1; cursor >= minimumPreferredIndex; cursor -= 1) {
|
|
94
|
-
const character = source[cursor];
|
|
95
|
-
if (character && preferredDelimiters.has(character)) {
|
|
96
|
-
return cursor + 1;
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
return end;
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
export function deriveChunkSizeBytes(modelContextSize: number): number {
|
|
104
|
-
const validatedContextSize = z.number().int().positive().parse(modelContextSize);
|
|
105
|
-
const derived = Math.floor(validatedContextSize * 0.9);
|
|
106
|
-
return Math.max(DEFAULT_CHUNK_SIZE_BYTES, derived);
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
export function chunkTextByBytes(source: string, maxBytes = DEFAULT_CHUNK_SIZE_BYTES): string[] {
|
|
110
|
-
const validatedSource = z.string().parse(source);
|
|
111
|
-
const validatedMaxBytes = z.number().int().positive().parse(maxBytes);
|
|
112
|
-
|
|
113
|
-
if (validatedSource.length === 0) {
|
|
114
|
-
return [];
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
const chunks: string[] = [];
|
|
118
|
-
let start = 0;
|
|
119
|
-
|
|
120
|
-
while (start < validatedSource.length) {
|
|
121
|
-
let end = Math.min(validatedSource.length, start + validatedMaxBytes);
|
|
122
|
-
|
|
123
|
-
while (end > start && Buffer.byteLength(validatedSource.slice(start, end), "utf8") > validatedMaxBytes) {
|
|
124
|
-
end -= 1;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
if (end <= start) {
|
|
128
|
-
end = start + 1;
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
const splitAt = end === validatedSource.length ? end : findSplitBoundary(validatedSource, start, end);
|
|
132
|
-
chunks.push(validatedSource.slice(start, splitAt));
|
|
133
|
-
start = splitAt;
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
return chunks;
|
|
75
|
+
artifactIndex: number;
|
|
76
|
+
artifactCount: number;
|
|
77
|
+
localRag: LocalArtifactRag | null;
|
|
137
78
|
}
|
|
138
79
|
|
|
139
|
-
|
|
140
|
-
if (!(error instanceof Error)) {
|
|
141
|
-
return new Error("AI analysis failed with an unknown error.");
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
const message = error.message.toLowerCase();
|
|
145
|
-
if (message.includes("rate limit")) {
|
|
146
|
-
return new Error("Provider rate limit hit during analysis. Please retry in a moment.");
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
if (message.includes("api key")) {
|
|
150
|
-
return new Error("The configured API key was rejected by the provider.");
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
return error;
|
|
154
|
-
}
|
|
80
|
+
export { chunkTextByBytes, deriveChunkSizeBytes } from "./analysis-helpers";
|
|
155
81
|
|
|
156
82
|
export class AiBundleAnalyzer {
|
|
157
83
|
private readonly providerClient: AiProviderClient;
|
|
158
84
|
private readonly chunkSizeBytes: number;
|
|
159
85
|
private readonly localRagEnabled: boolean;
|
|
86
|
+
private readonly analysisConcurrency: number;
|
|
160
87
|
private readonly onProgress: ((event: AnalysisProgressEvent) => void) | undefined;
|
|
161
88
|
|
|
162
89
|
public constructor(options: AnalyzerOptions) {
|
|
163
90
|
this.providerClient = new AiProviderClient(options.providerConfig);
|
|
164
91
|
this.chunkSizeBytes = options.chunkSizeBytes ?? deriveChunkSizeBytes(options.providerConfig.modelContextSize);
|
|
165
92
|
this.localRagEnabled = options.localRag ?? false;
|
|
93
|
+
this.analysisConcurrency = Math.max(1, Math.floor(options.analysisConcurrency ?? 1));
|
|
166
94
|
this.onProgress = options.onProgress;
|
|
167
95
|
}
|
|
168
96
|
|
|
@@ -186,216 +114,214 @@ export class AiBundleAnalyzer {
|
|
|
186
114
|
|
|
187
115
|
const chunkAnalyses: ChunkAnalysis[] = [];
|
|
188
116
|
const artifactSummaries: ArtifactSummary[] = [];
|
|
117
|
+
const localRag = this.localRagEnabled ? new LocalArtifactRag(validatedInput.artifacts) : null;
|
|
189
118
|
|
|
190
|
-
|
|
191
|
-
const
|
|
192
|
-
|
|
193
|
-
for (let artifactIndex = 0; artifactIndex < validatedInput.artifacts.length; artifactIndex += 1) {
|
|
194
|
-
const artifact = validatedInput.artifacts[artifactIndex]!;
|
|
195
|
-
const chunks = chunkTextByBytes(artifact.formattedContent || artifact.content, this.chunkSizeBytes);
|
|
196
|
-
const perArtifactChunkAnalyses: ChunkAnalysis[] = [];
|
|
197
|
-
|
|
198
|
-
this.emitProgress({
|
|
199
|
-
stage: "artifact",
|
|
200
|
-
state: "started",
|
|
201
|
-
message: `Starting swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
|
|
202
|
-
artifactIndex: artifactIndex + 1,
|
|
203
|
-
artifactCount: validatedInput.artifacts.length,
|
|
204
|
-
artifactUrl: artifact.url,
|
|
205
|
-
});
|
|
206
|
-
|
|
207
|
-
for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex += 1) {
|
|
208
|
-
this.emitProgress({
|
|
209
|
-
stage: "chunk",
|
|
210
|
-
state: "started",
|
|
211
|
-
message: `Starting chunk ${chunkIndex + 1}/${chunks.length} for ${artifact.url}`,
|
|
212
|
-
artifactIndex: artifactIndex + 1,
|
|
213
|
-
artifactCount: validatedInput.artifacts.length,
|
|
214
|
-
artifactUrl: artifact.url,
|
|
215
|
-
chunkIndex: chunkIndex + 1,
|
|
216
|
-
chunkCount: chunks.length,
|
|
217
|
-
});
|
|
119
|
+
for (let artifactIndex = 0; artifactIndex < validatedInput.artifacts.length; artifactIndex += 1) {
|
|
120
|
+
const artifact = validatedInput.artifacts[artifactIndex]!;
|
|
121
|
+
const chunks = chunkTextByBytes(artifact.formattedContent || artifact.content, this.chunkSizeBytes);
|
|
218
122
|
|
|
219
|
-
|
|
123
|
+
this.emitProgress({
|
|
124
|
+
stage: "artifact",
|
|
125
|
+
state: "started",
|
|
126
|
+
message: `Starting swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
|
|
127
|
+
artifactIndex: artifactIndex + 1,
|
|
128
|
+
artifactCount: validatedInput.artifacts.length,
|
|
129
|
+
artifactUrl: artifact.url,
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
const perArtifactChunkAnalyses = await mapWithConcurrency(
|
|
133
|
+
chunks,
|
|
134
|
+
this.analysisConcurrency,
|
|
135
|
+
async (chunk, chunkIndex): Promise<ChunkAnalysis> => {
|
|
136
|
+
const chunkInput: ChunkTaskInput = {
|
|
220
137
|
pageUrl: validatedInput.pageUrl,
|
|
221
138
|
artifact,
|
|
222
|
-
chunk
|
|
139
|
+
chunk,
|
|
223
140
|
chunkIndex,
|
|
224
141
|
totalChunks: chunks.length,
|
|
225
142
|
artifactIndex: artifactIndex + 1,
|
|
226
143
|
artifactCount: validatedInput.artifacts.length,
|
|
227
144
|
localRag,
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
chunkAnalyses.push(analysis);
|
|
231
|
-
perArtifactChunkAnalyses.push(analysis);
|
|
232
|
-
|
|
233
|
-
this.emitProgress({
|
|
234
|
-
stage: "chunk",
|
|
235
|
-
state: "completed",
|
|
236
|
-
message: `Completed chunk ${chunkIndex + 1}/${chunks.length} for ${artifact.url}`,
|
|
237
|
-
artifactIndex: artifactIndex + 1,
|
|
238
|
-
artifactCount: validatedInput.artifacts.length,
|
|
239
|
-
artifactUrl: artifact.url,
|
|
240
|
-
chunkIndex: chunkIndex + 1,
|
|
241
|
-
chunkCount: chunks.length,
|
|
242
|
-
});
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
artifactSummaries.push({
|
|
246
|
-
url: artifact.url,
|
|
247
|
-
type: artifact.type,
|
|
248
|
-
chunkCount: chunks.length,
|
|
249
|
-
summary: perArtifactChunkAnalyses.map((analysis) => analysis.summary).join(" "),
|
|
250
|
-
});
|
|
251
|
-
|
|
252
|
-
this.emitProgress({
|
|
253
|
-
stage: "artifact",
|
|
254
|
-
state: "completed",
|
|
255
|
-
message: `Completed swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
|
|
256
|
-
artifactIndex: artifactIndex + 1,
|
|
257
|
-
artifactCount: validatedInput.artifacts.length,
|
|
258
|
-
artifactUrl: artifact.url,
|
|
259
|
-
});
|
|
260
|
-
}
|
|
145
|
+
};
|
|
261
146
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
147
|
+
this.emitChunkEvent("started", chunkInput);
|
|
148
|
+
const analysis = await this.analyzeChunkWithSwarm(chunkInput);
|
|
149
|
+
this.emitChunkEvent("completed", chunkInput);
|
|
150
|
+
return analysis;
|
|
151
|
+
},
|
|
152
|
+
);
|
|
153
|
+
|
|
154
|
+
chunkAnalyses.push(...perArtifactChunkAnalyses);
|
|
155
|
+
artifactSummaries.push({
|
|
156
|
+
url: artifact.url,
|
|
157
|
+
type: artifact.type,
|
|
158
|
+
chunkCount: chunks.length,
|
|
159
|
+
summary: perArtifactChunkAnalyses.map((analysis) => analysis.summary).join(" "),
|
|
272
160
|
});
|
|
273
161
|
|
|
274
|
-
|
|
162
|
+
this.emitProgress({
|
|
163
|
+
stage: "artifact",
|
|
164
|
+
state: "completed",
|
|
165
|
+
message: `Completed swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
|
|
166
|
+
artifactIndex: artifactIndex + 1,
|
|
167
|
+
artifactCount: validatedInput.artifacts.length,
|
|
168
|
+
artifactUrl: artifact.url,
|
|
169
|
+
});
|
|
275
170
|
}
|
|
171
|
+
|
|
172
|
+
return await this.summarizeFindings(validatedInput.pageUrl, artifactSummaries, chunkAnalyses);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
private emitChunkEvent(state: Extract<AnalysisProgressState, "started" | "completed">, input: ChunkTaskInput): void {
|
|
176
|
+
this.emitProgress({
|
|
177
|
+
stage: "chunk",
|
|
178
|
+
state,
|
|
179
|
+
message: `${state === "started" ? "Starting" : "Completed"} chunk ${input.chunkIndex + 1}/${input.totalChunks} for ${input.artifact.url}`,
|
|
180
|
+
artifactIndex: input.artifactIndex,
|
|
181
|
+
artifactCount: input.artifactCount,
|
|
182
|
+
artifactUrl: input.artifact.url,
|
|
183
|
+
chunkIndex: input.chunkIndex + 1,
|
|
184
|
+
chunkCount: input.totalChunks,
|
|
185
|
+
});
|
|
276
186
|
}
|
|
277
187
|
|
|
278
|
-
private async analyzeChunkWithSwarm(input: {
|
|
279
|
-
pageUrl: string;
|
|
280
|
-
artifact: FormattedArtifact;
|
|
281
|
-
chunk: string;
|
|
282
|
-
chunkIndex: number;
|
|
283
|
-
totalChunks: number;
|
|
284
|
-
artifactIndex: number;
|
|
285
|
-
artifactCount: number;
|
|
286
|
-
localRag: LocalArtifactRag | null;
|
|
287
|
-
}): Promise<ChunkAnalysis> {
|
|
188
|
+
private async analyzeChunkWithSwarm(input: ChunkTaskInput): Promise<ChunkAnalysis> {
|
|
288
189
|
const memory: Partial<Record<SwarmAgentName, AgentMemo | ChunkAnalysis>> = {};
|
|
289
190
|
|
|
290
191
|
for (const agent of SWARM_AGENT_ORDER) {
|
|
291
|
-
this.
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
agent,
|
|
301
|
-
});
|
|
192
|
+
this.emitAgentEvent("started", agent, input, `${agent} agent running on ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}`);
|
|
193
|
+
|
|
194
|
+
try {
|
|
195
|
+
if (agent === "synthesizer") {
|
|
196
|
+
const synthesized = await this.runSynthesisAgent(input, memory, this.getRetrievedContext(agent, input, memory));
|
|
197
|
+
memory[agent] = synthesized.object;
|
|
198
|
+
this.emitAgentCompletion(agent, input, synthesized.telemetry);
|
|
199
|
+
continue;
|
|
200
|
+
}
|
|
302
201
|
|
|
303
|
-
if (agent === "synthesizer") {
|
|
304
|
-
const synthesized = await this.runSynthesisAgent(input, memory, this.getRetrievedContext(agent, input, memory));
|
|
305
|
-
memory[agent] = synthesized;
|
|
306
|
-
} else {
|
|
307
202
|
const memo = await this.runMemoAgent(agent, input, memory, this.getRetrievedContext(agent, input, memory));
|
|
308
|
-
memory[agent] = memo;
|
|
203
|
+
memory[agent] = memo.object;
|
|
204
|
+
this.emitAgentCompletion(agent, input, memo.telemetry);
|
|
205
|
+
} catch (error) {
|
|
206
|
+
const normalizedError = normalizeAiError(error);
|
|
207
|
+
memory[agent] =
|
|
208
|
+
agent === "synthesizer"
|
|
209
|
+
? createFallbackChunkAnalysis({ artifactUrl: input.artifact.url, memory, error: normalizedError })
|
|
210
|
+
: createFallbackAgentMemo(agent, normalizedError);
|
|
211
|
+
|
|
212
|
+
this.emitAgentEvent(
|
|
213
|
+
"completed",
|
|
214
|
+
agent,
|
|
215
|
+
input,
|
|
216
|
+
`${agent} agent fell back ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}: ${normalizedError.message}`,
|
|
217
|
+
);
|
|
309
218
|
}
|
|
310
|
-
|
|
311
|
-
this.emitProgress({
|
|
312
|
-
stage: "agent",
|
|
313
|
-
state: "completed",
|
|
314
|
-
message: `${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}`,
|
|
315
|
-
artifactIndex: input.artifactIndex,
|
|
316
|
-
artifactCount: input.artifactCount,
|
|
317
|
-
artifactUrl: input.artifact.url,
|
|
318
|
-
chunkIndex: input.chunkIndex + 1,
|
|
319
|
-
chunkCount: input.totalChunks,
|
|
320
|
-
agent,
|
|
321
|
-
});
|
|
322
219
|
}
|
|
323
220
|
|
|
324
221
|
return chunkAnalysisSchema.parse(memory.synthesizer);
|
|
325
222
|
}
|
|
326
223
|
|
|
224
|
+
private emitAgentCompletion(agent: SwarmAgentName, input: ChunkTaskInput, telemetry: StreamedObjectTelemetry): void {
|
|
225
|
+
this.emitAgentEvent(
|
|
226
|
+
"completed",
|
|
227
|
+
agent,
|
|
228
|
+
input,
|
|
229
|
+
`${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
|
|
230
|
+
telemetry,
|
|
231
|
+
);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
private emitAgentEvent(
|
|
235
|
+
state: AnalysisProgressState,
|
|
236
|
+
agent: SwarmAgentName,
|
|
237
|
+
input: ChunkTaskInput,
|
|
238
|
+
message: string,
|
|
239
|
+
telemetry?: StreamedObjectTelemetry,
|
|
240
|
+
): void {
|
|
241
|
+
this.emitProgress({
|
|
242
|
+
stage: "agent",
|
|
243
|
+
state,
|
|
244
|
+
message,
|
|
245
|
+
artifactIndex: input.artifactIndex,
|
|
246
|
+
artifactCount: input.artifactCount,
|
|
247
|
+
artifactUrl: input.artifact.url,
|
|
248
|
+
chunkIndex: input.chunkIndex + 1,
|
|
249
|
+
chunkCount: input.totalChunks,
|
|
250
|
+
agent,
|
|
251
|
+
...(telemetry !== undefined ? { estimatedOutputTokens: telemetry.estimatedOutputTokens } : {}),
|
|
252
|
+
...(telemetry?.outputTokens !== undefined ? { outputTokens: telemetry.outputTokens } : {}),
|
|
253
|
+
...(telemetry?.tokensPerSecond !== undefined ? { tokensPerSecond: telemetry.tokensPerSecond } : {}),
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
|
|
327
257
|
private async runMemoAgent(
|
|
328
258
|
agent: Exclude<SwarmAgentName, "synthesizer">,
|
|
329
|
-
input:
|
|
330
|
-
pageUrl: string;
|
|
331
|
-
artifact: FormattedArtifact;
|
|
332
|
-
chunk: string;
|
|
333
|
-
chunkIndex: number;
|
|
334
|
-
totalChunks: number;
|
|
335
|
-
},
|
|
259
|
+
input: ChunkTaskInput,
|
|
336
260
|
memory: Partial<Record<SwarmAgentName, unknown>>,
|
|
337
261
|
retrievedContext: string[],
|
|
338
|
-
): Promise<AgentMemo> {
|
|
339
|
-
return
|
|
262
|
+
): Promise<{ object: AgentMemo; telemetry: StreamedObjectTelemetry }> {
|
|
263
|
+
return generateObjectFromStream({
|
|
340
264
|
model: this.providerClient.getModel(),
|
|
341
265
|
system: getSwarmAgentPrompt(agent),
|
|
342
|
-
prompt: createPromptEnvelope({
|
|
343
|
-
pageUrl: input.pageUrl,
|
|
344
|
-
artifact: input.artifact,
|
|
345
|
-
chunk: input.chunk,
|
|
346
|
-
chunkIndex: input.chunkIndex,
|
|
347
|
-
totalChunks: input.totalChunks,
|
|
348
|
-
memory,
|
|
349
|
-
retrievedContext,
|
|
350
|
-
}),
|
|
266
|
+
prompt: createPromptEnvelope({ ...input, memory, retrievedContext }),
|
|
351
267
|
schema: agentMemoSchema,
|
|
352
268
|
contract: [
|
|
353
269
|
"JSON contract:",
|
|
354
270
|
'{"role":"string","summary":"string","observations":["string"],"evidence":["string"],"nextQuestions":["string"]}',
|
|
355
271
|
].join("\n"),
|
|
272
|
+
attempts: 4,
|
|
356
273
|
maxRetries: 2,
|
|
357
|
-
providerOptions: {
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
274
|
+
providerOptions: { openai: { store: false } },
|
|
275
|
+
onRetry: (attempt, error) =>
|
|
276
|
+
this.emitAgentEvent(
|
|
277
|
+
"streaming",
|
|
278
|
+
agent,
|
|
279
|
+
input,
|
|
280
|
+
`${agent} agent retry ${attempt}/4 ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}: ${error.message}`,
|
|
281
|
+
),
|
|
282
|
+
onProgress: (telemetry) =>
|
|
283
|
+
this.emitAgentEvent(
|
|
284
|
+
"streaming",
|
|
285
|
+
agent,
|
|
286
|
+
input,
|
|
287
|
+
`${agent} agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
|
|
288
|
+
telemetry,
|
|
289
|
+
),
|
|
362
290
|
});
|
|
363
291
|
}
|
|
364
292
|
|
|
365
293
|
private async runSynthesisAgent(
|
|
366
|
-
input:
|
|
367
|
-
pageUrl: string;
|
|
368
|
-
artifact: FormattedArtifact;
|
|
369
|
-
chunk: string;
|
|
370
|
-
chunkIndex: number;
|
|
371
|
-
totalChunks: number;
|
|
372
|
-
},
|
|
294
|
+
input: ChunkTaskInput,
|
|
373
295
|
memory: Partial<Record<SwarmAgentName, unknown>>,
|
|
374
296
|
retrievedContext: string[],
|
|
375
|
-
): Promise<ChunkAnalysis> {
|
|
376
|
-
return
|
|
297
|
+
): Promise<{ object: ChunkAnalysis; telemetry: StreamedObjectTelemetry }> {
|
|
298
|
+
return generateObjectFromStream({
|
|
377
299
|
model: this.providerClient.getModel(),
|
|
378
300
|
system: getSwarmAgentPrompt("synthesizer"),
|
|
379
|
-
prompt: createPromptEnvelope({
|
|
380
|
-
pageUrl: input.pageUrl,
|
|
381
|
-
artifact: input.artifact,
|
|
382
|
-
chunk: input.chunk,
|
|
383
|
-
chunkIndex: input.chunkIndex,
|
|
384
|
-
totalChunks: input.totalChunks,
|
|
385
|
-
memory,
|
|
386
|
-
retrievedContext,
|
|
387
|
-
}),
|
|
301
|
+
prompt: createPromptEnvelope({ ...input, memory, retrievedContext }),
|
|
388
302
|
schema: chunkAnalysisSchema,
|
|
389
303
|
contract: [
|
|
390
304
|
"JSON contract:",
|
|
391
305
|
'{"entryPoints":[{"symbol":"string","description":"string","evidence":"string"}],"initializationFlow":["string"],"callGraph":[{"caller":"string","callee":"string","rationale":"string"}],"restoredNames":[{"originalName":"string","suggestedName":"string","justification":"string"}],"summary":"string","notableLibraries":["string"],"investigationTips":["string"],"risks":["string"]}',
|
|
392
306
|
].join("\n"),
|
|
307
|
+
attempts: 4,
|
|
393
308
|
maxRetries: 2,
|
|
394
|
-
providerOptions: {
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
309
|
+
providerOptions: { openai: { store: false } },
|
|
310
|
+
onRetry: (attempt, error) =>
|
|
311
|
+
this.emitAgentEvent(
|
|
312
|
+
"streaming",
|
|
313
|
+
"synthesizer",
|
|
314
|
+
input,
|
|
315
|
+
`synthesizer agent retry ${attempt}/4 ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}: ${error.message}`,
|
|
316
|
+
),
|
|
317
|
+
onProgress: (telemetry) =>
|
|
318
|
+
this.emitAgentEvent(
|
|
319
|
+
"streaming",
|
|
320
|
+
"synthesizer",
|
|
321
|
+
input,
|
|
322
|
+
`synthesizer agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
|
|
323
|
+
telemetry,
|
|
324
|
+
),
|
|
399
325
|
});
|
|
400
326
|
}
|
|
401
327
|
|
|
@@ -405,38 +331,26 @@ export class AiBundleAnalyzer {
|
|
|
405
331
|
chunkAnalyses: ChunkAnalysis[],
|
|
406
332
|
): Promise<BundleAnalysis> {
|
|
407
333
|
try {
|
|
408
|
-
const result = await
|
|
334
|
+
const result = await generateObjectFromStream({
|
|
409
335
|
model: this.providerClient.getModel(),
|
|
410
336
|
system: [
|
|
411
337
|
getGlobalMissionPrompt(),
|
|
412
338
|
"You are the lead synthesis agent for the final report.",
|
|
413
339
|
"Merge artifact summaries and chunk analyses into a coherent site-level reverse-engineering map with the strongest evidence available.",
|
|
414
340
|
].join(" "),
|
|
415
|
-
prompt: [
|
|
416
|
-
|
|
417
|
-
"Artifact summaries:",
|
|
418
|
-
JSON.stringify(artifactSummaries, null, 2),
|
|
419
|
-
"Chunk analyses:",
|
|
420
|
-
JSON.stringify(chunkAnalyses, null, 2),
|
|
421
|
-
].join("\n\n"),
|
|
422
|
-
schema: finalAnalysisSchema.omit({
|
|
423
|
-
artifactSummaries: true,
|
|
424
|
-
analyzedChunkCount: true,
|
|
425
|
-
}),
|
|
341
|
+
prompt: [`Target page: ${pageUrl}`, "Artifact summaries:", JSON.stringify(artifactSummaries, null, 2), "Chunk analyses:", JSON.stringify(chunkAnalyses, null, 2)].join("\n\n"),
|
|
342
|
+
schema: finalAnalysisSchema.omit({ artifactSummaries: true, analyzedChunkCount: true }),
|
|
426
343
|
contract: [
|
|
427
344
|
"JSON contract:",
|
|
428
345
|
'{"overview":"string","entryPoints":[{"symbol":"string","description":"string","evidence":"string"}],"initializationFlow":["string"],"callGraph":[{"caller":"string","callee":"string","rationale":"string"}],"restoredNames":[{"originalName":"string","suggestedName":"string","justification":"string"}],"notableLibraries":["string"],"investigationTips":["string"],"risks":["string"]}',
|
|
429
346
|
].join("\n"),
|
|
347
|
+
attempts: 4,
|
|
430
348
|
maxRetries: 2,
|
|
431
|
-
providerOptions: {
|
|
432
|
-
openai: {
|
|
433
|
-
store: false,
|
|
434
|
-
},
|
|
435
|
-
},
|
|
349
|
+
providerOptions: { openai: { store: false } },
|
|
436
350
|
});
|
|
437
351
|
|
|
438
352
|
return finalAnalysisSchema.parse({
|
|
439
|
-
...result,
|
|
353
|
+
...result.object,
|
|
440
354
|
artifactSummaries,
|
|
441
355
|
analyzedChunkCount: chunkAnalyses.length,
|
|
442
356
|
});
|
|
@@ -455,11 +369,7 @@ export class AiBundleAnalyzer {
|
|
|
455
369
|
|
|
456
370
|
private getRetrievedContext(
|
|
457
371
|
agent: SwarmAgentName,
|
|
458
|
-
input:
|
|
459
|
-
artifact: FormattedArtifact;
|
|
460
|
-
chunk: string;
|
|
461
|
-
localRag: LocalArtifactRag | null;
|
|
462
|
-
},
|
|
372
|
+
input: Pick<ChunkTaskInput, "artifact" | "chunk" | "localRag">,
|
|
463
373
|
memory: Partial<Record<SwarmAgentName, unknown>>,
|
|
464
374
|
): string[] {
|
|
465
375
|
if (!input.localRag) {
|
|
@@ -474,10 +384,7 @@ export class AiBundleAnalyzer {
|
|
|
474
384
|
synthesizer: "entry points call graph restored names investigation tips risks runtime relationships architecture summary",
|
|
475
385
|
};
|
|
476
386
|
|
|
477
|
-
const memoryText = Object.values(memory)
|
|
478
|
-
.map((entry) => JSON.stringify(entry))
|
|
479
|
-
.join(" ");
|
|
480
|
-
|
|
387
|
+
const memoryText = Object.values(memory).map((entry) => JSON.stringify(entry)).join(" ");
|
|
481
388
|
return input.localRag.query({
|
|
482
389
|
artifactUrl: input.artifact.url,
|
|
483
390
|
query: `${agentKeywords[agent]} ${input.chunk} ${memoryText}`.slice(0, 6000),
|
package/lib/ai-json.ts
CHANGED
|
@@ -1,7 +1,21 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { streamText } from "ai";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
|
|
4
4
|
const jsonFencePattern = /^```(?:json)?\s*([\s\S]*?)\s*```$/i;
|
|
5
|
+
const STREAM_PROGRESS_INTERVAL_MS = 750;
|
|
6
|
+
const ESTIMATED_CHARS_PER_TOKEN = 4;
|
|
7
|
+
|
|
8
|
+
export interface StreamedObjectTelemetry {
|
|
9
|
+
elapsedMs: number;
|
|
10
|
+
estimatedOutputTokens: number;
|
|
11
|
+
outputTokens?: number;
|
|
12
|
+
tokensPerSecond?: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface StreamedObjectResult<TOutput> {
|
|
16
|
+
object: TOutput;
|
|
17
|
+
telemetry: StreamedObjectTelemetry;
|
|
18
|
+
}
|
|
5
19
|
|
|
6
20
|
function extractBalancedJsonSlice(source: string): string | null {
|
|
7
21
|
const startIndex = source.search(/[\[{]/);
|
|
@@ -15,7 +29,6 @@ function extractBalancedJsonSlice(source: string): string | null {
|
|
|
15
29
|
|
|
16
30
|
for (let index = startIndex; index < source.length; index += 1) {
|
|
17
31
|
const character = source[index];
|
|
18
|
-
|
|
19
32
|
if (!character) {
|
|
20
33
|
continue;
|
|
21
34
|
}
|
|
@@ -53,6 +66,33 @@ function extractBalancedJsonSlice(source: string): string | null {
|
|
|
53
66
|
return null;
|
|
54
67
|
}
|
|
55
68
|
|
|
69
|
+
function formatJsonSystemPrompt(system: string, contract: string): string {
|
|
70
|
+
return [
|
|
71
|
+
system,
|
|
72
|
+
"Return only one valid JSON object.",
|
|
73
|
+
"Do not wrap the JSON in markdown fences.",
|
|
74
|
+
"Do not add explanations before or after the JSON.",
|
|
75
|
+
contract,
|
|
76
|
+
].join("\n");
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function calculateTokensPerSecond(tokenCount: number, elapsedMs: number): number | undefined {
|
|
80
|
+
if (tokenCount <= 0 || elapsedMs < 250) {
|
|
81
|
+
return undefined;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return Number((tokenCount / (elapsedMs / 1000)).toFixed(1));
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export function estimateTokenCountFromText(source: string): number {
|
|
88
|
+
const trimmed = source.trim();
|
|
89
|
+
if (trimmed.length === 0) {
|
|
90
|
+
return 0;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return Math.max(1, Math.ceil(trimmed.length / ESTIMATED_CHARS_PER_TOKEN));
|
|
94
|
+
}
|
|
95
|
+
|
|
56
96
|
export function extractJsonFromText(source: string): unknown {
|
|
57
97
|
const trimmed = source.trim();
|
|
58
98
|
if (!trimmed) {
|
|
@@ -74,61 +114,104 @@ export function extractJsonFromText(source: string): unknown {
|
|
|
74
114
|
}
|
|
75
115
|
}
|
|
76
116
|
|
|
77
|
-
export function
|
|
78
|
-
if (!(error instanceof Error)) {
|
|
79
|
-
return false;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
const message = error.message.toLowerCase();
|
|
83
|
-
return (
|
|
84
|
-
message.includes("responseformat") ||
|
|
85
|
-
message.includes("structured output") ||
|
|
86
|
-
message.includes("structuredoutputs") ||
|
|
87
|
-
message.includes("response did not match schema") ||
|
|
88
|
-
message.includes("no object generated")
|
|
89
|
-
);
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
export async function generateObjectWithTextFallback<TOutput>(input: {
|
|
117
|
+
export async function generateObjectFromStream<TOutput>(input: {
|
|
93
118
|
model: unknown;
|
|
94
119
|
system: string;
|
|
95
120
|
prompt: string;
|
|
96
121
|
schema: z.ZodType<TOutput>;
|
|
97
122
|
contract: string;
|
|
123
|
+
attempts?: number;
|
|
98
124
|
maxRetries?: number;
|
|
99
125
|
providerOptions?: Record<string, unknown>;
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
})
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
126
|
+
onProgress?: (telemetry: StreamedObjectTelemetry) => void;
|
|
127
|
+
onRetry?: (attempt: number, error: Error) => void;
|
|
128
|
+
}): Promise<StreamedObjectResult<TOutput>> {
|
|
129
|
+
const attempts = Math.max(1, Math.floor(input.attempts ?? 3));
|
|
130
|
+
let lastError: Error | undefined;
|
|
131
|
+
|
|
132
|
+
for (let attempt = 1; attempt <= attempts; attempt += 1) {
|
|
133
|
+
try {
|
|
134
|
+
return await streamSingleObjectAttempt(input, attempt);
|
|
135
|
+
} catch (error) {
|
|
136
|
+
lastError = error instanceof Error ? error : new Error("Streaming object generation failed.");
|
|
137
|
+
if (attempt >= attempts) {
|
|
138
|
+
throw lastError;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
input.onRetry?.(attempt + 1, lastError);
|
|
115
142
|
}
|
|
116
143
|
}
|
|
117
144
|
|
|
118
|
-
|
|
145
|
+
throw lastError ?? new Error("Streaming object generation failed.");
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
async function streamSingleObjectAttempt<TOutput>(
|
|
149
|
+
input: {
|
|
150
|
+
model: unknown;
|
|
151
|
+
system: string;
|
|
152
|
+
prompt: string;
|
|
153
|
+
schema: z.ZodType<TOutput>;
|
|
154
|
+
contract: string;
|
|
155
|
+
maxRetries?: number;
|
|
156
|
+
providerOptions?: Record<string, unknown>;
|
|
157
|
+
onProgress?: (telemetry: StreamedObjectTelemetry) => void;
|
|
158
|
+
},
|
|
159
|
+
attempt: number,
|
|
160
|
+
): Promise<StreamedObjectResult<TOutput>> {
|
|
161
|
+
let streamedText = "";
|
|
162
|
+
const startedAt = Date.now();
|
|
163
|
+
let lastProgressAt = 0;
|
|
164
|
+
const repairHint =
|
|
165
|
+
attempt > 1
|
|
166
|
+
? "\nPrevious attempt failed because the JSON was malformed or incomplete. Return a syntactically valid JSON object this time."
|
|
167
|
+
: "";
|
|
168
|
+
|
|
169
|
+
const result = streamText({
|
|
119
170
|
model: input.model as never,
|
|
120
|
-
system:
|
|
121
|
-
input.system,
|
|
122
|
-
"Return only one valid JSON object.",
|
|
123
|
-
"Do not wrap the JSON in markdown fences.",
|
|
124
|
-
"Do not add explanations before or after the JSON.",
|
|
125
|
-
input.contract,
|
|
126
|
-
].join("\n"),
|
|
171
|
+
system: `${formatJsonSystemPrompt(input.system, input.contract)}${repairHint}`,
|
|
127
172
|
prompt: input.prompt,
|
|
128
|
-
output: Output.text(),
|
|
129
173
|
maxRetries: input.maxRetries ?? 2,
|
|
130
174
|
...(input.providerOptions !== undefined ? { providerOptions: input.providerOptions as never } : {}),
|
|
131
175
|
});
|
|
132
176
|
|
|
133
|
-
|
|
177
|
+
for await (const textPart of result.textStream) {
|
|
178
|
+
streamedText += textPart;
|
|
179
|
+
|
|
180
|
+
const now = Date.now();
|
|
181
|
+
if (input.onProgress !== undefined && now - lastProgressAt >= STREAM_PROGRESS_INTERVAL_MS) {
|
|
182
|
+
const estimatedOutputTokens = estimateTokenCountFromText(streamedText);
|
|
183
|
+
const tokensPerSecond = calculateTokensPerSecond(estimatedOutputTokens, now - startedAt);
|
|
184
|
+
input.onProgress({
|
|
185
|
+
elapsedMs: now - startedAt,
|
|
186
|
+
estimatedOutputTokens,
|
|
187
|
+
...(tokensPerSecond !== undefined ? { tokensPerSecond } : {}),
|
|
188
|
+
});
|
|
189
|
+
lastProgressAt = now;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
let usage: Awaited<typeof result.usage> | undefined;
|
|
194
|
+
try {
|
|
195
|
+
usage = await result.usage;
|
|
196
|
+
} catch {
|
|
197
|
+
usage = undefined;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const elapsedMs = Date.now() - startedAt;
|
|
201
|
+
const estimatedOutputTokens = estimateTokenCountFromText(streamedText);
|
|
202
|
+
const outputTokens = usage?.outputTokens ?? undefined;
|
|
203
|
+
const tokensPerSecond = calculateTokensPerSecond(outputTokens ?? estimatedOutputTokens, elapsedMs);
|
|
204
|
+
const telemetry: StreamedObjectTelemetry = {
|
|
205
|
+
elapsedMs,
|
|
206
|
+
estimatedOutputTokens,
|
|
207
|
+
...(outputTokens !== undefined ? { outputTokens } : {}),
|
|
208
|
+
...(tokensPerSecond !== undefined ? { tokensPerSecond } : {}),
|
|
209
|
+
};
|
|
210
|
+
|
|
211
|
+
input.onProgress?.(telemetry);
|
|
212
|
+
|
|
213
|
+
return {
|
|
214
|
+
object: input.schema.parse(extractJsonFromText(streamedText)),
|
|
215
|
+
telemetry,
|
|
216
|
+
};
|
|
134
217
|
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type { AgentMemo, ChunkAnalysis } from "./analysis-schema";
|
|
2
|
+
import type { SwarmAgentName } from "./swarm-prompts";
|
|
3
|
+
|
|
4
|
+
export function createFallbackAgentMemo(agent: Exclude<SwarmAgentName, "synthesizer">, error: Error): AgentMemo {
|
|
5
|
+
return {
|
|
6
|
+
role: agent,
|
|
7
|
+
summary: `${agent} agent failed after retries: ${error.message}`,
|
|
8
|
+
observations: [],
|
|
9
|
+
evidence: [],
|
|
10
|
+
nextQuestions: [`Retry ${agent} analysis for this chunk manually if the finding is important.`],
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export function createFallbackChunkAnalysis(input: {
|
|
15
|
+
artifactUrl: string;
|
|
16
|
+
memory: Partial<Record<SwarmAgentName, unknown>>;
|
|
17
|
+
error: Error;
|
|
18
|
+
}): ChunkAnalysis {
|
|
19
|
+
const memoSummaries = Object.values(input.memory)
|
|
20
|
+
.filter((entry): entry is AgentMemo => typeof entry === "object" && entry !== null && "summary" in entry)
|
|
21
|
+
.map((entry) => entry.summary.trim())
|
|
22
|
+
.filter(Boolean);
|
|
23
|
+
|
|
24
|
+
return {
|
|
25
|
+
entryPoints: [],
|
|
26
|
+
initializationFlow: [],
|
|
27
|
+
callGraph: [],
|
|
28
|
+
restoredNames: [],
|
|
29
|
+
summary:
|
|
30
|
+
memoSummaries.join(" ").trim() ||
|
|
31
|
+
`Chunk analysis for ${input.artifactUrl} fell back after retries: ${input.error.message}`,
|
|
32
|
+
notableLibraries: [],
|
|
33
|
+
investigationTips: [
|
|
34
|
+
`Chunk synthesis fell back after retries: ${input.error.message}`,
|
|
35
|
+
"Re-run with lower concurrency or inspect this chunk manually if it is critical.",
|
|
36
|
+
],
|
|
37
|
+
risks: [],
|
|
38
|
+
};
|
|
39
|
+
}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import { Buffer } from "buffer";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
|
|
4
|
+
import type { StreamedObjectTelemetry } from "./ai-json";
|
|
5
|
+
import type { FormattedArtifact } from "./formatter";
|
|
6
|
+
|
|
7
|
+
export const DEFAULT_CHUNK_SIZE_BYTES = 80 * 1024;
|
|
8
|
+
|
|
9
|
+
export function createPromptEnvelope(input: {
|
|
10
|
+
pageUrl: string;
|
|
11
|
+
artifact: FormattedArtifact;
|
|
12
|
+
chunk: string;
|
|
13
|
+
chunkIndex: number;
|
|
14
|
+
totalChunks: number;
|
|
15
|
+
memory?: unknown;
|
|
16
|
+
retrievedContext?: string[];
|
|
17
|
+
}): string {
|
|
18
|
+
return [
|
|
19
|
+
`Target page: ${input.pageUrl}`,
|
|
20
|
+
`Artifact URL: ${input.artifact.url}`,
|
|
21
|
+
`Artifact type: ${input.artifact.type}`,
|
|
22
|
+
`Discovered from: ${input.artifact.discoveredFrom}`,
|
|
23
|
+
`Chunk ${input.chunkIndex + 1} of ${input.totalChunks}`,
|
|
24
|
+
input.artifact.formattingNote ? `Formatting note: ${input.artifact.formattingNote}` : "Formatting note: none",
|
|
25
|
+
input.memory ? `Swarm memory:\n${JSON.stringify(input.memory, null, 2)}` : "Swarm memory: none yet",
|
|
26
|
+
input.retrievedContext && input.retrievedContext.length > 0
|
|
27
|
+
? `Local RAG evidence:\n${input.retrievedContext.map((segment, index) => `Segment ${index + 1}:\n${segment}`).join("\n\n")}`
|
|
28
|
+
: "Local RAG evidence: none",
|
|
29
|
+
"Artifact content:",
|
|
30
|
+
"```text",
|
|
31
|
+
input.chunk,
|
|
32
|
+
"```",
|
|
33
|
+
].join("\n\n");
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function findSplitBoundary(source: string, start: number, end: number): number {
|
|
37
|
+
const minimumPreferredIndex = start + Math.max(1, Math.floor((end - start) * 0.6));
|
|
38
|
+
const preferredDelimiters = new Set(["\n", ";", "}", " ", ","]);
|
|
39
|
+
|
|
40
|
+
for (let cursor = end - 1; cursor >= minimumPreferredIndex; cursor -= 1) {
|
|
41
|
+
const character = source[cursor];
|
|
42
|
+
if (character && preferredDelimiters.has(character)) {
|
|
43
|
+
return cursor + 1;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return end;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function deriveChunkSizeBytes(modelContextSize: number): number {
|
|
51
|
+
const validatedContextSize = z.number().int().positive().parse(modelContextSize);
|
|
52
|
+
const derived = Math.floor(validatedContextSize * 0.9);
|
|
53
|
+
return Math.max(DEFAULT_CHUNK_SIZE_BYTES, derived);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export function chunkTextByBytes(source: string, maxBytes = DEFAULT_CHUNK_SIZE_BYTES): string[] {
|
|
57
|
+
const validatedSource = z.string().parse(source);
|
|
58
|
+
const validatedMaxBytes = z.number().int().positive().parse(maxBytes);
|
|
59
|
+
|
|
60
|
+
if (validatedSource.length === 0) {
|
|
61
|
+
return [];
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const chunks: string[] = [];
|
|
65
|
+
let start = 0;
|
|
66
|
+
|
|
67
|
+
while (start < validatedSource.length) {
|
|
68
|
+
let end = Math.min(validatedSource.length, start + validatedMaxBytes);
|
|
69
|
+
|
|
70
|
+
while (end > start && Buffer.byteLength(validatedSource.slice(start, end), "utf8") > validatedMaxBytes) {
|
|
71
|
+
end -= 1;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (end <= start) {
|
|
75
|
+
end = start + 1;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const splitAt = end === validatedSource.length ? end : findSplitBoundary(validatedSource, start, end);
|
|
79
|
+
chunks.push(validatedSource.slice(start, splitAt));
|
|
80
|
+
start = splitAt;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return chunks;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export function normalizeAiError(error: unknown): Error {
|
|
87
|
+
if (!(error instanceof Error)) {
|
|
88
|
+
return new Error("AI analysis failed with an unknown error.");
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const message = error.message.toLowerCase();
|
|
92
|
+
if (message.includes("rate limit")) {
|
|
93
|
+
return new Error("Provider rate limit hit during analysis. Please retry in a moment.");
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (message.includes("api key")) {
|
|
97
|
+
return new Error("The configured API key was rejected by the provider.");
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return error;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export function formatAgentTelemetrySuffix(telemetry: StreamedObjectTelemetry): string {
|
|
104
|
+
const tokenCount = telemetry.outputTokens ?? telemetry.estimatedOutputTokens;
|
|
105
|
+
const tokenLabel = telemetry.outputTokens !== undefined ? `${tokenCount} tok` : `~${tokenCount} tok`;
|
|
106
|
+
const tpsLabel = telemetry.tokensPerSecond !== undefined ? ` ${telemetry.tokensPerSecond} tps` : "";
|
|
107
|
+
return ` [${tokenLabel}${tpsLabel}]`;
|
|
108
|
+
}
|
package/lib/cli-args.ts
CHANGED
|
@@ -19,6 +19,7 @@ const rawCliArgsSchema = z.object({
|
|
|
19
19
|
baseURL: z.string().url().optional(),
|
|
20
20
|
model: z.string().min(1).optional(),
|
|
21
21
|
contextSize: z.number().int().positive().optional(),
|
|
22
|
+
analysisConcurrency: z.number().int().positive().optional(),
|
|
22
23
|
maxPages: z.number().int().positive().optional(),
|
|
23
24
|
maxArtifacts: z.number().int().positive().optional(),
|
|
24
25
|
maxDepth: z.number().int().nonnegative().optional(),
|
|
@@ -58,13 +59,14 @@ const optionMap = new Map<string, keyof CliArgs>([
|
|
|
58
59
|
["--base-url", "baseURL"],
|
|
59
60
|
["--model", "model"],
|
|
60
61
|
["--context-size", "contextSize"],
|
|
62
|
+
["--analysis-concurrency", "analysisConcurrency"],
|
|
61
63
|
["--max-pages", "maxPages"],
|
|
62
64
|
["--max-artifacts", "maxArtifacts"],
|
|
63
65
|
["--max-depth", "maxDepth"],
|
|
64
66
|
]);
|
|
65
67
|
|
|
66
68
|
const booleanKeys = new Set<keyof CliArgs>(["help", "version", "headless", "reconfigure", "listModels", "localRag", "verboseAgents"]);
|
|
67
|
-
const numberKeys = new Set<keyof CliArgs>(["contextSize", "maxPages", "maxArtifacts", "maxDepth"]);
|
|
69
|
+
const numberKeys = new Set<keyof CliArgs>(["contextSize", "analysisConcurrency", "maxPages", "maxArtifacts", "maxDepth"]);
|
|
68
70
|
|
|
69
71
|
function normalizeValue(key: keyof CliArgs, value: string): unknown {
|
|
70
72
|
if (numberKeys.has(key)) {
|
|
@@ -154,6 +156,7 @@ export function renderHelpText(): string {
|
|
|
154
156
|
" --base-url <url> Base URL for the provider",
|
|
155
157
|
" --model <id> Model identifier",
|
|
156
158
|
" --context-size <tokens> Model context window, for example 128000 or 512000",
|
|
159
|
+
" --analysis-concurrency <n> Parallel chunk analyses per artifact",
|
|
157
160
|
" --list-models Fetch and print models using the resolved provider config",
|
|
158
161
|
" --local-rag Enable local lexical RAG for oversized artifacts",
|
|
159
162
|
" --reconfigure Force interactive provider reconfiguration",
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
export async function mapWithConcurrency<TInput, TOutput>(
|
|
2
|
+
items: readonly TInput[],
|
|
3
|
+
concurrency: number,
|
|
4
|
+
mapper: (item: TInput, index: number) => Promise<TOutput>,
|
|
5
|
+
): Promise<TOutput[]> {
|
|
6
|
+
const normalizedConcurrency = Math.max(1, Math.floor(concurrency));
|
|
7
|
+
const results = new Array<TOutput>(items.length);
|
|
8
|
+
let cursor = 0;
|
|
9
|
+
|
|
10
|
+
const workers = Array.from({ length: Math.min(normalizedConcurrency, items.length) }, async () => {
|
|
11
|
+
while (true) {
|
|
12
|
+
const currentIndex = cursor;
|
|
13
|
+
cursor += 1;
|
|
14
|
+
|
|
15
|
+
if (currentIndex >= items.length) {
|
|
16
|
+
return;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
results[currentIndex] = await mapper(items[currentIndex]!, currentIndex);
|
|
20
|
+
}
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
await Promise.all(workers);
|
|
24
|
+
return results;
|
|
25
|
+
}
|
package/package.json
CHANGED