@redstone-md/mapr 0.0.4-alpha → 0.0.6-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -0
- package/index.ts +57 -4
- package/lib/ai-analyzer.ts +177 -330
- package/lib/ai-json.ts +40 -1
- package/lib/analysis-fallback.ts +39 -0
- package/lib/analysis-helpers.ts +108 -0
- package/lib/cli-args.ts +20 -2
- package/lib/codex-auth.ts +275 -0
- package/lib/config-prompts.ts +74 -0
- package/lib/config.ts +164 -55
- package/lib/progress.ts +144 -0
- package/lib/promise-pool.ts +25 -0
- package/lib/provider.ts +216 -14
- package/package.json +1 -1
package/lib/ai-analyzer.ts
CHANGED
|
@@ -1,26 +1,28 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
-
import { Buffer } from "buffer";
|
|
3
2
|
|
|
3
|
+
import type { AgentMemo, ArtifactSummary, BundleAnalysis, ChunkAnalysis } from "./analysis-schema";
|
|
4
4
|
import {
|
|
5
5
|
agentMemoSchema,
|
|
6
|
-
artifactSummarySchema,
|
|
7
6
|
buildAnalysisSnapshot,
|
|
8
7
|
chunkAnalysisSchema,
|
|
9
8
|
finalAnalysisSchema,
|
|
10
|
-
type AgentMemo,
|
|
11
|
-
type ArtifactSummary,
|
|
12
|
-
type BundleAnalysis,
|
|
13
|
-
type ChunkAnalysis,
|
|
14
9
|
PartialAnalysisError,
|
|
15
10
|
} from "./analysis-schema";
|
|
11
|
+
import { createFallbackAgentMemo, createFallbackChunkAnalysis } from "./analysis-fallback";
|
|
12
|
+
import {
|
|
13
|
+
chunkTextByBytes,
|
|
14
|
+
createPromptEnvelope,
|
|
15
|
+
deriveChunkSizeBytes,
|
|
16
|
+
formatAgentTelemetrySuffix,
|
|
17
|
+
normalizeAiError,
|
|
18
|
+
} from "./analysis-helpers";
|
|
16
19
|
import { generateObjectFromStream, type StreamedObjectTelemetry } from "./ai-json";
|
|
17
20
|
import { artifactTypeSchema } from "./artifacts";
|
|
18
21
|
import type { FormattedArtifact } from "./formatter";
|
|
19
22
|
import { LocalArtifactRag } from "./local-rag";
|
|
23
|
+
import { mapWithConcurrency } from "./promise-pool";
|
|
20
24
|
import { AiProviderClient, type AiProviderConfig } from "./provider";
|
|
21
|
-
import {
|
|
22
|
-
|
|
23
|
-
export const DEFAULT_CHUNK_SIZE_BYTES = 80 * 1024;
|
|
25
|
+
import { getGlobalMissionPrompt, getSwarmAgentPrompt, SWARM_AGENT_ORDER, type SwarmAgentName } from "./swarm-prompts";
|
|
24
26
|
|
|
25
27
|
const analyzeInputSchema = z.object({
|
|
26
28
|
pageUrl: z.string().url(),
|
|
@@ -37,6 +39,7 @@ const analyzeInputSchema = z.object({
|
|
|
37
39
|
}),
|
|
38
40
|
),
|
|
39
41
|
});
|
|
42
|
+
|
|
40
43
|
export type AnalysisProgressStage = "artifact" | "chunk" | "agent";
|
|
41
44
|
export type AnalysisProgressState = "started" | "streaming" | "completed";
|
|
42
45
|
|
|
@@ -59,120 +62,37 @@ interface AnalyzerOptions {
|
|
|
59
62
|
providerConfig: AiProviderConfig;
|
|
60
63
|
chunkSizeBytes?: number;
|
|
61
64
|
localRag?: boolean;
|
|
65
|
+
analysisConcurrency?: number;
|
|
62
66
|
onProgress?: (event: AnalysisProgressEvent) => void;
|
|
63
67
|
}
|
|
64
68
|
|
|
65
|
-
|
|
69
|
+
interface ChunkTaskInput {
|
|
66
70
|
pageUrl: string;
|
|
67
71
|
artifact: FormattedArtifact;
|
|
68
72
|
chunk: string;
|
|
69
73
|
chunkIndex: number;
|
|
70
74
|
totalChunks: number;
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
return [
|
|
75
|
-
`Target page: ${input.pageUrl}`,
|
|
76
|
-
`Artifact URL: ${input.artifact.url}`,
|
|
77
|
-
`Artifact type: ${input.artifact.type}`,
|
|
78
|
-
`Discovered from: ${input.artifact.discoveredFrom}`,
|
|
79
|
-
`Chunk ${input.chunkIndex + 1} of ${input.totalChunks}`,
|
|
80
|
-
input.artifact.formattingNote ? `Formatting note: ${input.artifact.formattingNote}` : "Formatting note: none",
|
|
81
|
-
input.memory ? `Swarm memory:\n${JSON.stringify(input.memory, null, 2)}` : "Swarm memory: none yet",
|
|
82
|
-
input.retrievedContext && input.retrievedContext.length > 0
|
|
83
|
-
? `Local RAG evidence:\n${input.retrievedContext.map((segment, index) => `Segment ${index + 1}:\n${segment}`).join("\n\n")}`
|
|
84
|
-
: "Local RAG evidence: none",
|
|
85
|
-
"Artifact content:",
|
|
86
|
-
"```text",
|
|
87
|
-
input.chunk,
|
|
88
|
-
"```",
|
|
89
|
-
].join("\n\n");
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
function findSplitBoundary(source: string, start: number, end: number): number {
|
|
93
|
-
const minimumPreferredIndex = start + Math.max(1, Math.floor((end - start) * 0.6));
|
|
94
|
-
const preferredDelimiters = new Set(["\n", ";", "}", " ", ","]);
|
|
95
|
-
|
|
96
|
-
for (let cursor = end - 1; cursor >= minimumPreferredIndex; cursor -= 1) {
|
|
97
|
-
const character = source[cursor];
|
|
98
|
-
if (character && preferredDelimiters.has(character)) {
|
|
99
|
-
return cursor + 1;
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
return end;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
export function deriveChunkSizeBytes(modelContextSize: number): number {
|
|
107
|
-
const validatedContextSize = z.number().int().positive().parse(modelContextSize);
|
|
108
|
-
const derived = Math.floor(validatedContextSize * 0.9);
|
|
109
|
-
return Math.max(DEFAULT_CHUNK_SIZE_BYTES, derived);
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
export function chunkTextByBytes(source: string, maxBytes = DEFAULT_CHUNK_SIZE_BYTES): string[] {
|
|
113
|
-
const validatedSource = z.string().parse(source);
|
|
114
|
-
const validatedMaxBytes = z.number().int().positive().parse(maxBytes);
|
|
115
|
-
|
|
116
|
-
if (validatedSource.length === 0) {
|
|
117
|
-
return [];
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
const chunks: string[] = [];
|
|
121
|
-
let start = 0;
|
|
122
|
-
|
|
123
|
-
while (start < validatedSource.length) {
|
|
124
|
-
let end = Math.min(validatedSource.length, start + validatedMaxBytes);
|
|
125
|
-
|
|
126
|
-
while (end > start && Buffer.byteLength(validatedSource.slice(start, end), "utf8") > validatedMaxBytes) {
|
|
127
|
-
end -= 1;
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
if (end <= start) {
|
|
131
|
-
end = start + 1;
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
const splitAt = end === validatedSource.length ? end : findSplitBoundary(validatedSource, start, end);
|
|
135
|
-
chunks.push(validatedSource.slice(start, splitAt));
|
|
136
|
-
start = splitAt;
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
return chunks;
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
function normalizeAiError(error: unknown): Error {
|
|
143
|
-
if (!(error instanceof Error)) {
|
|
144
|
-
return new Error("AI analysis failed with an unknown error.");
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
const message = error.message.toLowerCase();
|
|
148
|
-
if (message.includes("rate limit")) {
|
|
149
|
-
return new Error("Provider rate limit hit during analysis. Please retry in a moment.");
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
if (message.includes("api key")) {
|
|
153
|
-
return new Error("The configured API key was rejected by the provider.");
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
return error;
|
|
75
|
+
artifactIndex: number;
|
|
76
|
+
artifactCount: number;
|
|
77
|
+
localRag: LocalArtifactRag | null;
|
|
157
78
|
}
|
|
158
79
|
|
|
159
|
-
|
|
160
|
-
const tokenCount = telemetry.outputTokens ?? telemetry.estimatedOutputTokens;
|
|
161
|
-
const tokenLabel = telemetry.outputTokens !== undefined ? `${tokenCount} tok` : `~${tokenCount} tok`;
|
|
162
|
-
const tpsLabel = telemetry.tokensPerSecond !== undefined ? ` ${telemetry.tokensPerSecond} tps` : "";
|
|
163
|
-
return ` [${tokenLabel}${tpsLabel}]`;
|
|
164
|
-
}
|
|
80
|
+
export { chunkTextByBytes, deriveChunkSizeBytes } from "./analysis-helpers";
|
|
165
81
|
|
|
166
82
|
export class AiBundleAnalyzer {
|
|
167
83
|
private readonly providerClient: AiProviderClient;
|
|
168
84
|
private readonly chunkSizeBytes: number;
|
|
169
85
|
private readonly localRagEnabled: boolean;
|
|
86
|
+
private readonly analysisConcurrency: number;
|
|
170
87
|
private readonly onProgress: ((event: AnalysisProgressEvent) => void) | undefined;
|
|
88
|
+
private readonly providerOptions: Record<string, unknown>;
|
|
171
89
|
|
|
172
90
|
public constructor(options: AnalyzerOptions) {
|
|
173
91
|
this.providerClient = new AiProviderClient(options.providerConfig);
|
|
174
92
|
this.chunkSizeBytes = options.chunkSizeBytes ?? deriveChunkSizeBytes(options.providerConfig.modelContextSize);
|
|
175
93
|
this.localRagEnabled = options.localRag ?? false;
|
|
94
|
+
this.analysisConcurrency = Math.max(1, Math.floor(options.analysisConcurrency ?? 1));
|
|
95
|
+
this.providerOptions = this.providerClient.getProviderOptions();
|
|
176
96
|
this.onProgress = options.onProgress;
|
|
177
97
|
}
|
|
178
98
|
|
|
@@ -196,268 +116,214 @@ export class AiBundleAnalyzer {
|
|
|
196
116
|
|
|
197
117
|
const chunkAnalyses: ChunkAnalysis[] = [];
|
|
198
118
|
const artifactSummaries: ArtifactSummary[] = [];
|
|
119
|
+
const localRag = this.localRagEnabled ? new LocalArtifactRag(validatedInput.artifacts) : null;
|
|
199
120
|
|
|
200
|
-
|
|
201
|
-
const
|
|
202
|
-
|
|
203
|
-
for (let artifactIndex = 0; artifactIndex < validatedInput.artifacts.length; artifactIndex += 1) {
|
|
204
|
-
const artifact = validatedInput.artifacts[artifactIndex]!;
|
|
205
|
-
const chunks = chunkTextByBytes(artifact.formattedContent || artifact.content, this.chunkSizeBytes);
|
|
206
|
-
const perArtifactChunkAnalyses: ChunkAnalysis[] = [];
|
|
207
|
-
|
|
208
|
-
this.emitProgress({
|
|
209
|
-
stage: "artifact",
|
|
210
|
-
state: "started",
|
|
211
|
-
message: `Starting swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
|
|
212
|
-
artifactIndex: artifactIndex + 1,
|
|
213
|
-
artifactCount: validatedInput.artifacts.length,
|
|
214
|
-
artifactUrl: artifact.url,
|
|
215
|
-
});
|
|
216
|
-
|
|
217
|
-
for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex += 1) {
|
|
218
|
-
this.emitProgress({
|
|
219
|
-
stage: "chunk",
|
|
220
|
-
state: "started",
|
|
221
|
-
message: `Starting chunk ${chunkIndex + 1}/${chunks.length} for ${artifact.url}`,
|
|
222
|
-
artifactIndex: artifactIndex + 1,
|
|
223
|
-
artifactCount: validatedInput.artifacts.length,
|
|
224
|
-
artifactUrl: artifact.url,
|
|
225
|
-
chunkIndex: chunkIndex + 1,
|
|
226
|
-
chunkCount: chunks.length,
|
|
227
|
-
});
|
|
121
|
+
for (let artifactIndex = 0; artifactIndex < validatedInput.artifacts.length; artifactIndex += 1) {
|
|
122
|
+
const artifact = validatedInput.artifacts[artifactIndex]!;
|
|
123
|
+
const chunks = chunkTextByBytes(artifact.formattedContent || artifact.content, this.chunkSizeBytes);
|
|
228
124
|
|
|
229
|
-
|
|
125
|
+
this.emitProgress({
|
|
126
|
+
stage: "artifact",
|
|
127
|
+
state: "started",
|
|
128
|
+
message: `Starting swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
|
|
129
|
+
artifactIndex: artifactIndex + 1,
|
|
130
|
+
artifactCount: validatedInput.artifacts.length,
|
|
131
|
+
artifactUrl: artifact.url,
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
const perArtifactChunkAnalyses = await mapWithConcurrency(
|
|
135
|
+
chunks,
|
|
136
|
+
this.analysisConcurrency,
|
|
137
|
+
async (chunk, chunkIndex): Promise<ChunkAnalysis> => {
|
|
138
|
+
const chunkInput: ChunkTaskInput = {
|
|
230
139
|
pageUrl: validatedInput.pageUrl,
|
|
231
140
|
artifact,
|
|
232
|
-
chunk
|
|
141
|
+
chunk,
|
|
233
142
|
chunkIndex,
|
|
234
143
|
totalChunks: chunks.length,
|
|
235
144
|
artifactIndex: artifactIndex + 1,
|
|
236
145
|
artifactCount: validatedInput.artifacts.length,
|
|
237
146
|
localRag,
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
chunkAnalyses.push(analysis);
|
|
241
|
-
perArtifactChunkAnalyses.push(analysis);
|
|
147
|
+
};
|
|
242
148
|
|
|
243
|
-
this.
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
url: artifact.url,
|
|
257
|
-
type: artifact.type,
|
|
258
|
-
chunkCount: chunks.length,
|
|
259
|
-
summary: perArtifactChunkAnalyses.map((analysis) => analysis.summary).join(" "),
|
|
260
|
-
});
|
|
261
|
-
|
|
262
|
-
this.emitProgress({
|
|
263
|
-
stage: "artifact",
|
|
264
|
-
state: "completed",
|
|
265
|
-
message: `Completed swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
|
|
266
|
-
artifactIndex: artifactIndex + 1,
|
|
267
|
-
artifactCount: validatedInput.artifacts.length,
|
|
268
|
-
artifactUrl: artifact.url,
|
|
269
|
-
});
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
return await this.summarizeFindings(validatedInput.pageUrl, artifactSummaries, chunkAnalyses);
|
|
273
|
-
} catch (error) {
|
|
274
|
-
const normalizedError = normalizeAiError(error);
|
|
275
|
-
const partialAnalysis = buildAnalysisSnapshot({
|
|
276
|
-
overview:
|
|
277
|
-
chunkAnalyses.length > 0 || artifactSummaries.length > 0
|
|
278
|
-
? `Partial analysis only. Processing stopped because: ${normalizedError.message}`
|
|
279
|
-
: `Analysis aborted before any chunk completed. Cause: ${normalizedError.message}`,
|
|
280
|
-
artifactSummaries,
|
|
281
|
-
chunkAnalyses,
|
|
149
|
+
this.emitChunkEvent("started", chunkInput);
|
|
150
|
+
const analysis = await this.analyzeChunkWithSwarm(chunkInput);
|
|
151
|
+
this.emitChunkEvent("completed", chunkInput);
|
|
152
|
+
return analysis;
|
|
153
|
+
},
|
|
154
|
+
);
|
|
155
|
+
|
|
156
|
+
chunkAnalyses.push(...perArtifactChunkAnalyses);
|
|
157
|
+
artifactSummaries.push({
|
|
158
|
+
url: artifact.url,
|
|
159
|
+
type: artifact.type,
|
|
160
|
+
chunkCount: chunks.length,
|
|
161
|
+
summary: perArtifactChunkAnalyses.map((analysis) => analysis.summary).join(" "),
|
|
282
162
|
});
|
|
283
163
|
|
|
284
|
-
|
|
164
|
+
this.emitProgress({
|
|
165
|
+
stage: "artifact",
|
|
166
|
+
state: "completed",
|
|
167
|
+
message: `Completed swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
|
|
168
|
+
artifactIndex: artifactIndex + 1,
|
|
169
|
+
artifactCount: validatedInput.artifacts.length,
|
|
170
|
+
artifactUrl: artifact.url,
|
|
171
|
+
});
|
|
285
172
|
}
|
|
173
|
+
|
|
174
|
+
return await this.summarizeFindings(validatedInput.pageUrl, artifactSummaries, chunkAnalyses);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
private emitChunkEvent(state: Extract<AnalysisProgressState, "started" | "completed">, input: ChunkTaskInput): void {
|
|
178
|
+
this.emitProgress({
|
|
179
|
+
stage: "chunk",
|
|
180
|
+
state,
|
|
181
|
+
message: `${state === "started" ? "Starting" : "Completed"} chunk ${input.chunkIndex + 1}/${input.totalChunks} for ${input.artifact.url}`,
|
|
182
|
+
artifactIndex: input.artifactIndex,
|
|
183
|
+
artifactCount: input.artifactCount,
|
|
184
|
+
artifactUrl: input.artifact.url,
|
|
185
|
+
chunkIndex: input.chunkIndex + 1,
|
|
186
|
+
chunkCount: input.totalChunks,
|
|
187
|
+
});
|
|
286
188
|
}
|
|
287
189
|
|
|
288
|
-
private async analyzeChunkWithSwarm(input: {
|
|
289
|
-
pageUrl: string;
|
|
290
|
-
artifact: FormattedArtifact;
|
|
291
|
-
chunk: string;
|
|
292
|
-
chunkIndex: number;
|
|
293
|
-
totalChunks: number;
|
|
294
|
-
artifactIndex: number;
|
|
295
|
-
artifactCount: number;
|
|
296
|
-
localRag: LocalArtifactRag | null;
|
|
297
|
-
}): Promise<ChunkAnalysis> {
|
|
190
|
+
private async analyzeChunkWithSwarm(input: ChunkTaskInput): Promise<ChunkAnalysis> {
|
|
298
191
|
const memory: Partial<Record<SwarmAgentName, AgentMemo | ChunkAnalysis>> = {};
|
|
299
192
|
|
|
300
193
|
for (const agent of SWARM_AGENT_ORDER) {
|
|
301
|
-
this.
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
agent,
|
|
311
|
-
});
|
|
194
|
+
this.emitAgentEvent("started", agent, input, `${agent} agent running on ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}`);
|
|
195
|
+
|
|
196
|
+
try {
|
|
197
|
+
if (agent === "synthesizer") {
|
|
198
|
+
const synthesized = await this.runSynthesisAgent(input, memory, this.getRetrievedContext(agent, input, memory));
|
|
199
|
+
memory[agent] = synthesized.object;
|
|
200
|
+
this.emitAgentCompletion(agent, input, synthesized.telemetry);
|
|
201
|
+
continue;
|
|
202
|
+
}
|
|
312
203
|
|
|
313
|
-
if (agent === "synthesizer") {
|
|
314
|
-
const synthesized = await this.runSynthesisAgent(input, memory, this.getRetrievedContext(agent, input, memory));
|
|
315
|
-
memory[agent] = synthesized.object;
|
|
316
|
-
this.emitProgress({
|
|
317
|
-
stage: "agent",
|
|
318
|
-
state: "completed",
|
|
319
|
-
message: `${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(synthesized.telemetry)}`,
|
|
320
|
-
artifactIndex: input.artifactIndex,
|
|
321
|
-
artifactCount: input.artifactCount,
|
|
322
|
-
artifactUrl: input.artifact.url,
|
|
323
|
-
chunkIndex: input.chunkIndex + 1,
|
|
324
|
-
chunkCount: input.totalChunks,
|
|
325
|
-
agent,
|
|
326
|
-
estimatedOutputTokens: synthesized.telemetry.estimatedOutputTokens,
|
|
327
|
-
...(synthesized.telemetry.outputTokens !== undefined ? { outputTokens: synthesized.telemetry.outputTokens } : {}),
|
|
328
|
-
...(synthesized.telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: synthesized.telemetry.tokensPerSecond } : {}),
|
|
329
|
-
});
|
|
330
|
-
} else {
|
|
331
204
|
const memo = await this.runMemoAgent(agent, input, memory, this.getRetrievedContext(agent, input, memory));
|
|
332
205
|
memory[agent] = memo.object;
|
|
333
|
-
this.
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
206
|
+
this.emitAgentCompletion(agent, input, memo.telemetry);
|
|
207
|
+
} catch (error) {
|
|
208
|
+
const normalizedError = normalizeAiError(error);
|
|
209
|
+
memory[agent] =
|
|
210
|
+
agent === "synthesizer"
|
|
211
|
+
? createFallbackChunkAnalysis({ artifactUrl: input.artifact.url, memory, error: normalizedError })
|
|
212
|
+
: createFallbackAgentMemo(agent, normalizedError);
|
|
213
|
+
|
|
214
|
+
this.emitAgentEvent(
|
|
215
|
+
"completed",
|
|
342
216
|
agent,
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
});
|
|
217
|
+
input,
|
|
218
|
+
`${agent} agent fell back ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}: ${normalizedError.message}`,
|
|
219
|
+
);
|
|
347
220
|
}
|
|
348
221
|
}
|
|
349
222
|
|
|
350
223
|
return chunkAnalysisSchema.parse(memory.synthesizer);
|
|
351
224
|
}
|
|
352
225
|
|
|
226
|
+
private emitAgentCompletion(agent: SwarmAgentName, input: ChunkTaskInput, telemetry: StreamedObjectTelemetry): void {
|
|
227
|
+
this.emitAgentEvent(
|
|
228
|
+
"completed",
|
|
229
|
+
agent,
|
|
230
|
+
input,
|
|
231
|
+
`${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
|
|
232
|
+
telemetry,
|
|
233
|
+
);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
private emitAgentEvent(
|
|
237
|
+
state: AnalysisProgressState,
|
|
238
|
+
agent: SwarmAgentName,
|
|
239
|
+
input: ChunkTaskInput,
|
|
240
|
+
message: string,
|
|
241
|
+
telemetry?: StreamedObjectTelemetry,
|
|
242
|
+
): void {
|
|
243
|
+
this.emitProgress({
|
|
244
|
+
stage: "agent",
|
|
245
|
+
state,
|
|
246
|
+
message,
|
|
247
|
+
artifactIndex: input.artifactIndex,
|
|
248
|
+
artifactCount: input.artifactCount,
|
|
249
|
+
artifactUrl: input.artifact.url,
|
|
250
|
+
chunkIndex: input.chunkIndex + 1,
|
|
251
|
+
chunkCount: input.totalChunks,
|
|
252
|
+
agent,
|
|
253
|
+
...(telemetry !== undefined ? { estimatedOutputTokens: telemetry.estimatedOutputTokens } : {}),
|
|
254
|
+
...(telemetry?.outputTokens !== undefined ? { outputTokens: telemetry.outputTokens } : {}),
|
|
255
|
+
...(telemetry?.tokensPerSecond !== undefined ? { tokensPerSecond: telemetry.tokensPerSecond } : {}),
|
|
256
|
+
});
|
|
257
|
+
}
|
|
258
|
+
|
|
353
259
|
private async runMemoAgent(
|
|
354
260
|
agent: Exclude<SwarmAgentName, "synthesizer">,
|
|
355
|
-
input:
|
|
356
|
-
pageUrl: string;
|
|
357
|
-
artifact: FormattedArtifact;
|
|
358
|
-
chunk: string;
|
|
359
|
-
chunkIndex: number;
|
|
360
|
-
totalChunks: number;
|
|
361
|
-
artifactIndex: number;
|
|
362
|
-
artifactCount: number;
|
|
363
|
-
},
|
|
261
|
+
input: ChunkTaskInput,
|
|
364
262
|
memory: Partial<Record<SwarmAgentName, unknown>>,
|
|
365
263
|
retrievedContext: string[],
|
|
366
264
|
): Promise<{ object: AgentMemo; telemetry: StreamedObjectTelemetry }> {
|
|
367
265
|
return generateObjectFromStream({
|
|
368
266
|
model: this.providerClient.getModel(),
|
|
369
267
|
system: getSwarmAgentPrompt(agent),
|
|
370
|
-
prompt: createPromptEnvelope({
|
|
371
|
-
pageUrl: input.pageUrl,
|
|
372
|
-
artifact: input.artifact,
|
|
373
|
-
chunk: input.chunk,
|
|
374
|
-
chunkIndex: input.chunkIndex,
|
|
375
|
-
totalChunks: input.totalChunks,
|
|
376
|
-
memory,
|
|
377
|
-
retrievedContext,
|
|
378
|
-
}),
|
|
268
|
+
prompt: createPromptEnvelope({ ...input, memory, retrievedContext }),
|
|
379
269
|
schema: agentMemoSchema,
|
|
380
270
|
contract: [
|
|
381
271
|
"JSON contract:",
|
|
382
272
|
'{"role":"string","summary":"string","observations":["string"],"evidence":["string"],"nextQuestions":["string"]}',
|
|
383
273
|
].join("\n"),
|
|
274
|
+
attempts: 4,
|
|
384
275
|
maxRetries: 2,
|
|
385
|
-
providerOptions:
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
},
|
|
390
|
-
onProgress: (telemetry) => {
|
|
391
|
-
this.emitProgress({
|
|
392
|
-
stage: "agent",
|
|
393
|
-
state: "streaming",
|
|
394
|
-
message: `${agent} agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
|
|
395
|
-
artifactIndex: input.artifactIndex,
|
|
396
|
-
artifactCount: input.artifactCount,
|
|
397
|
-
artifactUrl: input.artifact.url,
|
|
398
|
-
chunkIndex: input.chunkIndex + 1,
|
|
399
|
-
chunkCount: input.totalChunks,
|
|
276
|
+
providerOptions: this.providerOptions,
|
|
277
|
+
onRetry: (attempt, error) =>
|
|
278
|
+
this.emitAgentEvent(
|
|
279
|
+
"streaming",
|
|
400
280
|
agent,
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
281
|
+
input,
|
|
282
|
+
`${agent} agent retry ${attempt}/4 ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}: ${error.message}`,
|
|
283
|
+
),
|
|
284
|
+
onProgress: (telemetry) =>
|
|
285
|
+
this.emitAgentEvent(
|
|
286
|
+
"streaming",
|
|
287
|
+
agent,
|
|
288
|
+
input,
|
|
289
|
+
`${agent} agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
|
|
290
|
+
telemetry,
|
|
291
|
+
),
|
|
406
292
|
});
|
|
407
293
|
}
|
|
408
294
|
|
|
409
295
|
private async runSynthesisAgent(
|
|
410
|
-
input:
|
|
411
|
-
pageUrl: string;
|
|
412
|
-
artifact: FormattedArtifact;
|
|
413
|
-
chunk: string;
|
|
414
|
-
chunkIndex: number;
|
|
415
|
-
totalChunks: number;
|
|
416
|
-
artifactIndex: number;
|
|
417
|
-
artifactCount: number;
|
|
418
|
-
},
|
|
296
|
+
input: ChunkTaskInput,
|
|
419
297
|
memory: Partial<Record<SwarmAgentName, unknown>>,
|
|
420
298
|
retrievedContext: string[],
|
|
421
299
|
): Promise<{ object: ChunkAnalysis; telemetry: StreamedObjectTelemetry }> {
|
|
422
300
|
return generateObjectFromStream({
|
|
423
301
|
model: this.providerClient.getModel(),
|
|
424
302
|
system: getSwarmAgentPrompt("synthesizer"),
|
|
425
|
-
prompt: createPromptEnvelope({
|
|
426
|
-
pageUrl: input.pageUrl,
|
|
427
|
-
artifact: input.artifact,
|
|
428
|
-
chunk: input.chunk,
|
|
429
|
-
chunkIndex: input.chunkIndex,
|
|
430
|
-
totalChunks: input.totalChunks,
|
|
431
|
-
memory,
|
|
432
|
-
retrievedContext,
|
|
433
|
-
}),
|
|
303
|
+
prompt: createPromptEnvelope({ ...input, memory, retrievedContext }),
|
|
434
304
|
schema: chunkAnalysisSchema,
|
|
435
305
|
contract: [
|
|
436
306
|
"JSON contract:",
|
|
437
307
|
'{"entryPoints":[{"symbol":"string","description":"string","evidence":"string"}],"initializationFlow":["string"],"callGraph":[{"caller":"string","callee":"string","rationale":"string"}],"restoredNames":[{"originalName":"string","suggestedName":"string","justification":"string"}],"summary":"string","notableLibraries":["string"],"investigationTips":["string"],"risks":["string"]}',
|
|
438
308
|
].join("\n"),
|
|
309
|
+
attempts: 4,
|
|
439
310
|
maxRetries: 2,
|
|
440
|
-
providerOptions:
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
estimatedOutputTokens: telemetry.estimatedOutputTokens,
|
|
457
|
-
...(telemetry.outputTokens !== undefined ? { outputTokens: telemetry.outputTokens } : {}),
|
|
458
|
-
...(telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: telemetry.tokensPerSecond } : {}),
|
|
459
|
-
});
|
|
460
|
-
},
|
|
311
|
+
providerOptions: this.providerOptions,
|
|
312
|
+
onRetry: (attempt, error) =>
|
|
313
|
+
this.emitAgentEvent(
|
|
314
|
+
"streaming",
|
|
315
|
+
"synthesizer",
|
|
316
|
+
input,
|
|
317
|
+
`synthesizer agent retry ${attempt}/4 ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}: ${error.message}`,
|
|
318
|
+
),
|
|
319
|
+
onProgress: (telemetry) =>
|
|
320
|
+
this.emitAgentEvent(
|
|
321
|
+
"streaming",
|
|
322
|
+
"synthesizer",
|
|
323
|
+
input,
|
|
324
|
+
`synthesizer agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
|
|
325
|
+
telemetry,
|
|
326
|
+
),
|
|
461
327
|
});
|
|
462
328
|
}
|
|
463
329
|
|
|
@@ -474,27 +340,15 @@ export class AiBundleAnalyzer {
|
|
|
474
340
|
"You are the lead synthesis agent for the final report.",
|
|
475
341
|
"Merge artifact summaries and chunk analyses into a coherent site-level reverse-engineering map with the strongest evidence available.",
|
|
476
342
|
].join(" "),
|
|
477
|
-
prompt: [
|
|
478
|
-
|
|
479
|
-
"Artifact summaries:",
|
|
480
|
-
JSON.stringify(artifactSummaries, null, 2),
|
|
481
|
-
"Chunk analyses:",
|
|
482
|
-
JSON.stringify(chunkAnalyses, null, 2),
|
|
483
|
-
].join("\n\n"),
|
|
484
|
-
schema: finalAnalysisSchema.omit({
|
|
485
|
-
artifactSummaries: true,
|
|
486
|
-
analyzedChunkCount: true,
|
|
487
|
-
}),
|
|
343
|
+
prompt: [`Target page: ${pageUrl}`, "Artifact summaries:", JSON.stringify(artifactSummaries, null, 2), "Chunk analyses:", JSON.stringify(chunkAnalyses, null, 2)].join("\n\n"),
|
|
344
|
+
schema: finalAnalysisSchema.omit({ artifactSummaries: true, analyzedChunkCount: true }),
|
|
488
345
|
contract: [
|
|
489
346
|
"JSON contract:",
|
|
490
347
|
'{"overview":"string","entryPoints":[{"symbol":"string","description":"string","evidence":"string"}],"initializationFlow":["string"],"callGraph":[{"caller":"string","callee":"string","rationale":"string"}],"restoredNames":[{"originalName":"string","suggestedName":"string","justification":"string"}],"notableLibraries":["string"],"investigationTips":["string"],"risks":["string"]}',
|
|
491
348
|
].join("\n"),
|
|
349
|
+
attempts: 4,
|
|
492
350
|
maxRetries: 2,
|
|
493
|
-
providerOptions:
|
|
494
|
-
openai: {
|
|
495
|
-
store: false,
|
|
496
|
-
},
|
|
497
|
-
},
|
|
351
|
+
providerOptions: this.providerOptions,
|
|
498
352
|
});
|
|
499
353
|
|
|
500
354
|
return finalAnalysisSchema.parse({
|
|
@@ -517,11 +371,7 @@ export class AiBundleAnalyzer {
|
|
|
517
371
|
|
|
518
372
|
private getRetrievedContext(
|
|
519
373
|
agent: SwarmAgentName,
|
|
520
|
-
input:
|
|
521
|
-
artifact: FormattedArtifact;
|
|
522
|
-
chunk: string;
|
|
523
|
-
localRag: LocalArtifactRag | null;
|
|
524
|
-
},
|
|
374
|
+
input: Pick<ChunkTaskInput, "artifact" | "chunk" | "localRag">,
|
|
525
375
|
memory: Partial<Record<SwarmAgentName, unknown>>,
|
|
526
376
|
): string[] {
|
|
527
377
|
if (!input.localRag) {
|
|
@@ -536,10 +386,7 @@ export class AiBundleAnalyzer {
|
|
|
536
386
|
synthesizer: "entry points call graph restored names investigation tips risks runtime relationships architecture summary",
|
|
537
387
|
};
|
|
538
388
|
|
|
539
|
-
const memoryText = Object.values(memory)
|
|
540
|
-
.map((entry) => JSON.stringify(entry))
|
|
541
|
-
.join(" ");
|
|
542
|
-
|
|
389
|
+
const memoryText = Object.values(memory).map((entry) => JSON.stringify(entry)).join(" ");
|
|
543
390
|
return input.localRag.query({
|
|
544
391
|
artifactUrl: input.artifact.url,
|
|
545
392
|
query: `${agentKeywords[agent]} ${input.chunk} ${memoryText}`.slice(0, 6000),
|