@redstone-md/mapr 0.0.4-alpha → 0.0.5-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +32 -2
- package/lib/ai-analyzer.ts +175 -330
- package/lib/ai-json.ts +40 -1
- package/lib/analysis-fallback.ts +39 -0
- package/lib/analysis-helpers.ts +108 -0
- package/lib/cli-args.ts +4 -1
- package/lib/promise-pool.ts +25 -0
- package/package.json +1 -1
package/index.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
|
|
3
|
-
import { cancel, confirm, intro, isCancel, log, outro, spinner, text } from "@clack/prompts";
|
|
3
|
+
import { cancel, confirm, intro, isCancel, log, outro, select, spinner, text } from "@clack/prompts";
|
|
4
4
|
import pc from "picocolors";
|
|
5
5
|
import packageJson from "./package.json";
|
|
6
6
|
|
|
@@ -14,6 +14,9 @@ import { ReportWriter } from "./lib/reporter";
|
|
|
14
14
|
import { BundleScraper } from "./lib/scraper";
|
|
15
15
|
import { SWARM_AGENT_ORDER } from "./lib/swarm-prompts";
|
|
16
16
|
|
|
17
|
+
process.env.AI_SDK_LOG_WARNINGS = "false";
|
|
18
|
+
(globalThis as typeof globalThis & { AI_SDK_LOG_WARNINGS?: boolean }).AI_SDK_LOG_WARNINGS = false;
|
|
19
|
+
|
|
17
20
|
function exitIfCancelled<T>(value: T): T {
|
|
18
21
|
if (isCancel(value)) {
|
|
19
22
|
cancel("Operation cancelled.");
|
|
@@ -62,6 +65,30 @@ async function resolveTargetUrl(headless: boolean, prefilledUrl?: string): Promi
|
|
|
62
65
|
);
|
|
63
66
|
}
|
|
64
67
|
|
|
68
|
+
async function resolveAnalysisConcurrency(headless: boolean, prefilledValue: number | undefined, totalChunks: number): Promise<number> {
|
|
69
|
+
if (prefilledValue !== undefined) {
|
|
70
|
+
return prefilledValue;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (headless || totalChunks <= 1) {
|
|
74
|
+
return 1;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return Number(
|
|
78
|
+
exitIfCancelled(
|
|
79
|
+
await select({
|
|
80
|
+
message: "Analysis concurrency",
|
|
81
|
+
initialValue: 2,
|
|
82
|
+
options: [
|
|
83
|
+
{ value: 1, label: "1 lane", hint: "Most stable" },
|
|
84
|
+
{ value: 2, label: "2 lanes", hint: "Recommended" },
|
|
85
|
+
{ value: 4, label: "4 lanes", hint: "Aggressive" },
|
|
86
|
+
],
|
|
87
|
+
}),
|
|
88
|
+
),
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
|
|
65
92
|
async function run(): Promise<void> {
|
|
66
93
|
const args = parseCliArgs(process.argv.slice(2));
|
|
67
94
|
|
|
@@ -147,15 +174,17 @@ async function run(): Promise<void> {
|
|
|
147
174
|
sum + chunkTextByBytes(artifact.formattedContent || artifact.content, deriveChunkSizeBytes(config.modelContextSize)).length,
|
|
148
175
|
0,
|
|
149
176
|
);
|
|
177
|
+
const analysisConcurrency = await resolveAnalysisConcurrency(headless, args.analysisConcurrency, totalChunks);
|
|
150
178
|
const totalAgentTasks = Math.max(1, totalChunks * SWARM_AGENT_ORDER.length);
|
|
151
179
|
let completedAgentTasks = 0;
|
|
152
180
|
|
|
153
181
|
const analysisStep = spinner({ indicator: "timer" });
|
|
154
|
-
analysisStep.start(formatAnalysisProgress(0, totalAgentTasks,
|
|
182
|
+
analysisStep.start(formatAnalysisProgress(0, totalAgentTasks, `Starting swarm analysis (${analysisConcurrency} lane${analysisConcurrency === 1 ? "" : "s"})`));
|
|
155
183
|
|
|
156
184
|
const analyzer = new AiBundleAnalyzer({
|
|
157
185
|
providerConfig: config,
|
|
158
186
|
localRag: args.localRag,
|
|
187
|
+
analysisConcurrency,
|
|
159
188
|
onProgress(event) {
|
|
160
189
|
if (event.stage === "agent" && event.state === "completed") {
|
|
161
190
|
completedAgentTasks += 1;
|
|
@@ -224,6 +253,7 @@ async function run(): Promise<void> {
|
|
|
224
253
|
`${pc.bold("Target:")} ${scrapeResult.pageUrl}`,
|
|
225
254
|
`${pc.bold("Provider:")} ${config.providerName} (${config.model})`,
|
|
226
255
|
`${pc.bold("Context size:")} ${config.modelContextSize.toLocaleString()} tokens`,
|
|
256
|
+
`${pc.bold("Concurrency:")} ${analysisConcurrency}`,
|
|
227
257
|
`${pc.bold("Local RAG:")} ${args.localRag ? "enabled" : "disabled"}`,
|
|
228
258
|
`${pc.bold("Pages:")} ${scrapeResult.htmlPages.length}`,
|
|
229
259
|
`${pc.bold("Artifacts:")} ${formattedArtifacts.length}`,
|
package/lib/ai-analyzer.ts
CHANGED
|
@@ -1,26 +1,28 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
-
import { Buffer } from "buffer";
|
|
3
2
|
|
|
3
|
+
import type { AgentMemo, ArtifactSummary, BundleAnalysis, ChunkAnalysis } from "./analysis-schema";
|
|
4
4
|
import {
|
|
5
5
|
agentMemoSchema,
|
|
6
|
-
artifactSummarySchema,
|
|
7
6
|
buildAnalysisSnapshot,
|
|
8
7
|
chunkAnalysisSchema,
|
|
9
8
|
finalAnalysisSchema,
|
|
10
|
-
type AgentMemo,
|
|
11
|
-
type ArtifactSummary,
|
|
12
|
-
type BundleAnalysis,
|
|
13
|
-
type ChunkAnalysis,
|
|
14
9
|
PartialAnalysisError,
|
|
15
10
|
} from "./analysis-schema";
|
|
11
|
+
import { createFallbackAgentMemo, createFallbackChunkAnalysis } from "./analysis-fallback";
|
|
12
|
+
import {
|
|
13
|
+
chunkTextByBytes,
|
|
14
|
+
createPromptEnvelope,
|
|
15
|
+
deriveChunkSizeBytes,
|
|
16
|
+
formatAgentTelemetrySuffix,
|
|
17
|
+
normalizeAiError,
|
|
18
|
+
} from "./analysis-helpers";
|
|
16
19
|
import { generateObjectFromStream, type StreamedObjectTelemetry } from "./ai-json";
|
|
17
20
|
import { artifactTypeSchema } from "./artifacts";
|
|
18
21
|
import type { FormattedArtifact } from "./formatter";
|
|
19
22
|
import { LocalArtifactRag } from "./local-rag";
|
|
23
|
+
import { mapWithConcurrency } from "./promise-pool";
|
|
20
24
|
import { AiProviderClient, type AiProviderConfig } from "./provider";
|
|
21
|
-
import {
|
|
22
|
-
|
|
23
|
-
export const DEFAULT_CHUNK_SIZE_BYTES = 80 * 1024;
|
|
25
|
+
import { getGlobalMissionPrompt, getSwarmAgentPrompt, SWARM_AGENT_ORDER, type SwarmAgentName } from "./swarm-prompts";
|
|
24
26
|
|
|
25
27
|
const analyzeInputSchema = z.object({
|
|
26
28
|
pageUrl: z.string().url(),
|
|
@@ -37,6 +39,7 @@ const analyzeInputSchema = z.object({
|
|
|
37
39
|
}),
|
|
38
40
|
),
|
|
39
41
|
});
|
|
42
|
+
|
|
40
43
|
export type AnalysisProgressStage = "artifact" | "chunk" | "agent";
|
|
41
44
|
export type AnalysisProgressState = "started" | "streaming" | "completed";
|
|
42
45
|
|
|
@@ -59,120 +62,35 @@ interface AnalyzerOptions {
|
|
|
59
62
|
providerConfig: AiProviderConfig;
|
|
60
63
|
chunkSizeBytes?: number;
|
|
61
64
|
localRag?: boolean;
|
|
65
|
+
analysisConcurrency?: number;
|
|
62
66
|
onProgress?: (event: AnalysisProgressEvent) => void;
|
|
63
67
|
}
|
|
64
68
|
|
|
65
|
-
|
|
69
|
+
interface ChunkTaskInput {
|
|
66
70
|
pageUrl: string;
|
|
67
71
|
artifact: FormattedArtifact;
|
|
68
72
|
chunk: string;
|
|
69
73
|
chunkIndex: number;
|
|
70
74
|
totalChunks: number;
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
return [
|
|
75
|
-
`Target page: ${input.pageUrl}`,
|
|
76
|
-
`Artifact URL: ${input.artifact.url}`,
|
|
77
|
-
`Artifact type: ${input.artifact.type}`,
|
|
78
|
-
`Discovered from: ${input.artifact.discoveredFrom}`,
|
|
79
|
-
`Chunk ${input.chunkIndex + 1} of ${input.totalChunks}`,
|
|
80
|
-
input.artifact.formattingNote ? `Formatting note: ${input.artifact.formattingNote}` : "Formatting note: none",
|
|
81
|
-
input.memory ? `Swarm memory:\n${JSON.stringify(input.memory, null, 2)}` : "Swarm memory: none yet",
|
|
82
|
-
input.retrievedContext && input.retrievedContext.length > 0
|
|
83
|
-
? `Local RAG evidence:\n${input.retrievedContext.map((segment, index) => `Segment ${index + 1}:\n${segment}`).join("\n\n")}`
|
|
84
|
-
: "Local RAG evidence: none",
|
|
85
|
-
"Artifact content:",
|
|
86
|
-
"```text",
|
|
87
|
-
input.chunk,
|
|
88
|
-
"```",
|
|
89
|
-
].join("\n\n");
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
function findSplitBoundary(source: string, start: number, end: number): number {
|
|
93
|
-
const minimumPreferredIndex = start + Math.max(1, Math.floor((end - start) * 0.6));
|
|
94
|
-
const preferredDelimiters = new Set(["\n", ";", "}", " ", ","]);
|
|
95
|
-
|
|
96
|
-
for (let cursor = end - 1; cursor >= minimumPreferredIndex; cursor -= 1) {
|
|
97
|
-
const character = source[cursor];
|
|
98
|
-
if (character && preferredDelimiters.has(character)) {
|
|
99
|
-
return cursor + 1;
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
return end;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
export function deriveChunkSizeBytes(modelContextSize: number): number {
|
|
107
|
-
const validatedContextSize = z.number().int().positive().parse(modelContextSize);
|
|
108
|
-
const derived = Math.floor(validatedContextSize * 0.9);
|
|
109
|
-
return Math.max(DEFAULT_CHUNK_SIZE_BYTES, derived);
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
export function chunkTextByBytes(source: string, maxBytes = DEFAULT_CHUNK_SIZE_BYTES): string[] {
|
|
113
|
-
const validatedSource = z.string().parse(source);
|
|
114
|
-
const validatedMaxBytes = z.number().int().positive().parse(maxBytes);
|
|
115
|
-
|
|
116
|
-
if (validatedSource.length === 0) {
|
|
117
|
-
return [];
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
const chunks: string[] = [];
|
|
121
|
-
let start = 0;
|
|
122
|
-
|
|
123
|
-
while (start < validatedSource.length) {
|
|
124
|
-
let end = Math.min(validatedSource.length, start + validatedMaxBytes);
|
|
125
|
-
|
|
126
|
-
while (end > start && Buffer.byteLength(validatedSource.slice(start, end), "utf8") > validatedMaxBytes) {
|
|
127
|
-
end -= 1;
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
if (end <= start) {
|
|
131
|
-
end = start + 1;
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
const splitAt = end === validatedSource.length ? end : findSplitBoundary(validatedSource, start, end);
|
|
135
|
-
chunks.push(validatedSource.slice(start, splitAt));
|
|
136
|
-
start = splitAt;
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
return chunks;
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
function normalizeAiError(error: unknown): Error {
|
|
143
|
-
if (!(error instanceof Error)) {
|
|
144
|
-
return new Error("AI analysis failed with an unknown error.");
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
const message = error.message.toLowerCase();
|
|
148
|
-
if (message.includes("rate limit")) {
|
|
149
|
-
return new Error("Provider rate limit hit during analysis. Please retry in a moment.");
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
if (message.includes("api key")) {
|
|
153
|
-
return new Error("The configured API key was rejected by the provider.");
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
return error;
|
|
75
|
+
artifactIndex: number;
|
|
76
|
+
artifactCount: number;
|
|
77
|
+
localRag: LocalArtifactRag | null;
|
|
157
78
|
}
|
|
158
79
|
|
|
159
|
-
|
|
160
|
-
const tokenCount = telemetry.outputTokens ?? telemetry.estimatedOutputTokens;
|
|
161
|
-
const tokenLabel = telemetry.outputTokens !== undefined ? `${tokenCount} tok` : `~${tokenCount} tok`;
|
|
162
|
-
const tpsLabel = telemetry.tokensPerSecond !== undefined ? ` ${telemetry.tokensPerSecond} tps` : "";
|
|
163
|
-
return ` [${tokenLabel}${tpsLabel}]`;
|
|
164
|
-
}
|
|
80
|
+
export { chunkTextByBytes, deriveChunkSizeBytes } from "./analysis-helpers";
|
|
165
81
|
|
|
166
82
|
export class AiBundleAnalyzer {
|
|
167
83
|
private readonly providerClient: AiProviderClient;
|
|
168
84
|
private readonly chunkSizeBytes: number;
|
|
169
85
|
private readonly localRagEnabled: boolean;
|
|
86
|
+
private readonly analysisConcurrency: number;
|
|
170
87
|
private readonly onProgress: ((event: AnalysisProgressEvent) => void) | undefined;
|
|
171
88
|
|
|
172
89
|
public constructor(options: AnalyzerOptions) {
|
|
173
90
|
this.providerClient = new AiProviderClient(options.providerConfig);
|
|
174
91
|
this.chunkSizeBytes = options.chunkSizeBytes ?? deriveChunkSizeBytes(options.providerConfig.modelContextSize);
|
|
175
92
|
this.localRagEnabled = options.localRag ?? false;
|
|
93
|
+
this.analysisConcurrency = Math.max(1, Math.floor(options.analysisConcurrency ?? 1));
|
|
176
94
|
this.onProgress = options.onProgress;
|
|
177
95
|
}
|
|
178
96
|
|
|
@@ -196,268 +114,214 @@ export class AiBundleAnalyzer {
|
|
|
196
114
|
|
|
197
115
|
const chunkAnalyses: ChunkAnalysis[] = [];
|
|
198
116
|
const artifactSummaries: ArtifactSummary[] = [];
|
|
117
|
+
const localRag = this.localRagEnabled ? new LocalArtifactRag(validatedInput.artifacts) : null;
|
|
199
118
|
|
|
200
|
-
|
|
201
|
-
const
|
|
202
|
-
|
|
203
|
-
for (let artifactIndex = 0; artifactIndex < validatedInput.artifacts.length; artifactIndex += 1) {
|
|
204
|
-
const artifact = validatedInput.artifacts[artifactIndex]!;
|
|
205
|
-
const chunks = chunkTextByBytes(artifact.formattedContent || artifact.content, this.chunkSizeBytes);
|
|
206
|
-
const perArtifactChunkAnalyses: ChunkAnalysis[] = [];
|
|
207
|
-
|
|
208
|
-
this.emitProgress({
|
|
209
|
-
stage: "artifact",
|
|
210
|
-
state: "started",
|
|
211
|
-
message: `Starting swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
|
|
212
|
-
artifactIndex: artifactIndex + 1,
|
|
213
|
-
artifactCount: validatedInput.artifacts.length,
|
|
214
|
-
artifactUrl: artifact.url,
|
|
215
|
-
});
|
|
216
|
-
|
|
217
|
-
for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex += 1) {
|
|
218
|
-
this.emitProgress({
|
|
219
|
-
stage: "chunk",
|
|
220
|
-
state: "started",
|
|
221
|
-
message: `Starting chunk ${chunkIndex + 1}/${chunks.length} for ${artifact.url}`,
|
|
222
|
-
artifactIndex: artifactIndex + 1,
|
|
223
|
-
artifactCount: validatedInput.artifacts.length,
|
|
224
|
-
artifactUrl: artifact.url,
|
|
225
|
-
chunkIndex: chunkIndex + 1,
|
|
226
|
-
chunkCount: chunks.length,
|
|
227
|
-
});
|
|
119
|
+
for (let artifactIndex = 0; artifactIndex < validatedInput.artifacts.length; artifactIndex += 1) {
|
|
120
|
+
const artifact = validatedInput.artifacts[artifactIndex]!;
|
|
121
|
+
const chunks = chunkTextByBytes(artifact.formattedContent || artifact.content, this.chunkSizeBytes);
|
|
228
122
|
|
|
229
|
-
|
|
123
|
+
this.emitProgress({
|
|
124
|
+
stage: "artifact",
|
|
125
|
+
state: "started",
|
|
126
|
+
message: `Starting swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
|
|
127
|
+
artifactIndex: artifactIndex + 1,
|
|
128
|
+
artifactCount: validatedInput.artifacts.length,
|
|
129
|
+
artifactUrl: artifact.url,
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
const perArtifactChunkAnalyses = await mapWithConcurrency(
|
|
133
|
+
chunks,
|
|
134
|
+
this.analysisConcurrency,
|
|
135
|
+
async (chunk, chunkIndex): Promise<ChunkAnalysis> => {
|
|
136
|
+
const chunkInput: ChunkTaskInput = {
|
|
230
137
|
pageUrl: validatedInput.pageUrl,
|
|
231
138
|
artifact,
|
|
232
|
-
chunk
|
|
139
|
+
chunk,
|
|
233
140
|
chunkIndex,
|
|
234
141
|
totalChunks: chunks.length,
|
|
235
142
|
artifactIndex: artifactIndex + 1,
|
|
236
143
|
artifactCount: validatedInput.artifacts.length,
|
|
237
144
|
localRag,
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
chunkAnalyses.push(analysis);
|
|
241
|
-
perArtifactChunkAnalyses.push(analysis);
|
|
145
|
+
};
|
|
242
146
|
|
|
243
|
-
this.
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
url: artifact.url,
|
|
257
|
-
type: artifact.type,
|
|
258
|
-
chunkCount: chunks.length,
|
|
259
|
-
summary: perArtifactChunkAnalyses.map((analysis) => analysis.summary).join(" "),
|
|
260
|
-
});
|
|
261
|
-
|
|
262
|
-
this.emitProgress({
|
|
263
|
-
stage: "artifact",
|
|
264
|
-
state: "completed",
|
|
265
|
-
message: `Completed swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
|
|
266
|
-
artifactIndex: artifactIndex + 1,
|
|
267
|
-
artifactCount: validatedInput.artifacts.length,
|
|
268
|
-
artifactUrl: artifact.url,
|
|
269
|
-
});
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
return await this.summarizeFindings(validatedInput.pageUrl, artifactSummaries, chunkAnalyses);
|
|
273
|
-
} catch (error) {
|
|
274
|
-
const normalizedError = normalizeAiError(error);
|
|
275
|
-
const partialAnalysis = buildAnalysisSnapshot({
|
|
276
|
-
overview:
|
|
277
|
-
chunkAnalyses.length > 0 || artifactSummaries.length > 0
|
|
278
|
-
? `Partial analysis only. Processing stopped because: ${normalizedError.message}`
|
|
279
|
-
: `Analysis aborted before any chunk completed. Cause: ${normalizedError.message}`,
|
|
280
|
-
artifactSummaries,
|
|
281
|
-
chunkAnalyses,
|
|
147
|
+
this.emitChunkEvent("started", chunkInput);
|
|
148
|
+
const analysis = await this.analyzeChunkWithSwarm(chunkInput);
|
|
149
|
+
this.emitChunkEvent("completed", chunkInput);
|
|
150
|
+
return analysis;
|
|
151
|
+
},
|
|
152
|
+
);
|
|
153
|
+
|
|
154
|
+
chunkAnalyses.push(...perArtifactChunkAnalyses);
|
|
155
|
+
artifactSummaries.push({
|
|
156
|
+
url: artifact.url,
|
|
157
|
+
type: artifact.type,
|
|
158
|
+
chunkCount: chunks.length,
|
|
159
|
+
summary: perArtifactChunkAnalyses.map((analysis) => analysis.summary).join(" "),
|
|
282
160
|
});
|
|
283
161
|
|
|
284
|
-
|
|
162
|
+
this.emitProgress({
|
|
163
|
+
stage: "artifact",
|
|
164
|
+
state: "completed",
|
|
165
|
+
message: `Completed swarm analysis for artifact ${artifactIndex + 1}/${validatedInput.artifacts.length}: ${artifact.url}`,
|
|
166
|
+
artifactIndex: artifactIndex + 1,
|
|
167
|
+
artifactCount: validatedInput.artifacts.length,
|
|
168
|
+
artifactUrl: artifact.url,
|
|
169
|
+
});
|
|
285
170
|
}
|
|
171
|
+
|
|
172
|
+
return await this.summarizeFindings(validatedInput.pageUrl, artifactSummaries, chunkAnalyses);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
private emitChunkEvent(state: Extract<AnalysisProgressState, "started" | "completed">, input: ChunkTaskInput): void {
|
|
176
|
+
this.emitProgress({
|
|
177
|
+
stage: "chunk",
|
|
178
|
+
state,
|
|
179
|
+
message: `${state === "started" ? "Starting" : "Completed"} chunk ${input.chunkIndex + 1}/${input.totalChunks} for ${input.artifact.url}`,
|
|
180
|
+
artifactIndex: input.artifactIndex,
|
|
181
|
+
artifactCount: input.artifactCount,
|
|
182
|
+
artifactUrl: input.artifact.url,
|
|
183
|
+
chunkIndex: input.chunkIndex + 1,
|
|
184
|
+
chunkCount: input.totalChunks,
|
|
185
|
+
});
|
|
286
186
|
}
|
|
287
187
|
|
|
288
|
-
private async analyzeChunkWithSwarm(input: {
|
|
289
|
-
pageUrl: string;
|
|
290
|
-
artifact: FormattedArtifact;
|
|
291
|
-
chunk: string;
|
|
292
|
-
chunkIndex: number;
|
|
293
|
-
totalChunks: number;
|
|
294
|
-
artifactIndex: number;
|
|
295
|
-
artifactCount: number;
|
|
296
|
-
localRag: LocalArtifactRag | null;
|
|
297
|
-
}): Promise<ChunkAnalysis> {
|
|
188
|
+
private async analyzeChunkWithSwarm(input: ChunkTaskInput): Promise<ChunkAnalysis> {
|
|
298
189
|
const memory: Partial<Record<SwarmAgentName, AgentMemo | ChunkAnalysis>> = {};
|
|
299
190
|
|
|
300
191
|
for (const agent of SWARM_AGENT_ORDER) {
|
|
301
|
-
this.
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
agent,
|
|
311
|
-
});
|
|
192
|
+
this.emitAgentEvent("started", agent, input, `${agent} agent running on ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}`);
|
|
193
|
+
|
|
194
|
+
try {
|
|
195
|
+
if (agent === "synthesizer") {
|
|
196
|
+
const synthesized = await this.runSynthesisAgent(input, memory, this.getRetrievedContext(agent, input, memory));
|
|
197
|
+
memory[agent] = synthesized.object;
|
|
198
|
+
this.emitAgentCompletion(agent, input, synthesized.telemetry);
|
|
199
|
+
continue;
|
|
200
|
+
}
|
|
312
201
|
|
|
313
|
-
if (agent === "synthesizer") {
|
|
314
|
-
const synthesized = await this.runSynthesisAgent(input, memory, this.getRetrievedContext(agent, input, memory));
|
|
315
|
-
memory[agent] = synthesized.object;
|
|
316
|
-
this.emitProgress({
|
|
317
|
-
stage: "agent",
|
|
318
|
-
state: "completed",
|
|
319
|
-
message: `${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(synthesized.telemetry)}`,
|
|
320
|
-
artifactIndex: input.artifactIndex,
|
|
321
|
-
artifactCount: input.artifactCount,
|
|
322
|
-
artifactUrl: input.artifact.url,
|
|
323
|
-
chunkIndex: input.chunkIndex + 1,
|
|
324
|
-
chunkCount: input.totalChunks,
|
|
325
|
-
agent,
|
|
326
|
-
estimatedOutputTokens: synthesized.telemetry.estimatedOutputTokens,
|
|
327
|
-
...(synthesized.telemetry.outputTokens !== undefined ? { outputTokens: synthesized.telemetry.outputTokens } : {}),
|
|
328
|
-
...(synthesized.telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: synthesized.telemetry.tokensPerSecond } : {}),
|
|
329
|
-
});
|
|
330
|
-
} else {
|
|
331
202
|
const memo = await this.runMemoAgent(agent, input, memory, this.getRetrievedContext(agent, input, memory));
|
|
332
203
|
memory[agent] = memo.object;
|
|
333
|
-
this.
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
204
|
+
this.emitAgentCompletion(agent, input, memo.telemetry);
|
|
205
|
+
} catch (error) {
|
|
206
|
+
const normalizedError = normalizeAiError(error);
|
|
207
|
+
memory[agent] =
|
|
208
|
+
agent === "synthesizer"
|
|
209
|
+
? createFallbackChunkAnalysis({ artifactUrl: input.artifact.url, memory, error: normalizedError })
|
|
210
|
+
: createFallbackAgentMemo(agent, normalizedError);
|
|
211
|
+
|
|
212
|
+
this.emitAgentEvent(
|
|
213
|
+
"completed",
|
|
342
214
|
agent,
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
});
|
|
215
|
+
input,
|
|
216
|
+
`${agent} agent fell back ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}: ${normalizedError.message}`,
|
|
217
|
+
);
|
|
347
218
|
}
|
|
348
219
|
}
|
|
349
220
|
|
|
350
221
|
return chunkAnalysisSchema.parse(memory.synthesizer);
|
|
351
222
|
}
|
|
352
223
|
|
|
224
|
+
private emitAgentCompletion(agent: SwarmAgentName, input: ChunkTaskInput, telemetry: StreamedObjectTelemetry): void {
|
|
225
|
+
this.emitAgentEvent(
|
|
226
|
+
"completed",
|
|
227
|
+
agent,
|
|
228
|
+
input,
|
|
229
|
+
`${agent} agent completed ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
|
|
230
|
+
telemetry,
|
|
231
|
+
);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
private emitAgentEvent(
|
|
235
|
+
state: AnalysisProgressState,
|
|
236
|
+
agent: SwarmAgentName,
|
|
237
|
+
input: ChunkTaskInput,
|
|
238
|
+
message: string,
|
|
239
|
+
telemetry?: StreamedObjectTelemetry,
|
|
240
|
+
): void {
|
|
241
|
+
this.emitProgress({
|
|
242
|
+
stage: "agent",
|
|
243
|
+
state,
|
|
244
|
+
message,
|
|
245
|
+
artifactIndex: input.artifactIndex,
|
|
246
|
+
artifactCount: input.artifactCount,
|
|
247
|
+
artifactUrl: input.artifact.url,
|
|
248
|
+
chunkIndex: input.chunkIndex + 1,
|
|
249
|
+
chunkCount: input.totalChunks,
|
|
250
|
+
agent,
|
|
251
|
+
...(telemetry !== undefined ? { estimatedOutputTokens: telemetry.estimatedOutputTokens } : {}),
|
|
252
|
+
...(telemetry?.outputTokens !== undefined ? { outputTokens: telemetry.outputTokens } : {}),
|
|
253
|
+
...(telemetry?.tokensPerSecond !== undefined ? { tokensPerSecond: telemetry.tokensPerSecond } : {}),
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
|
|
353
257
|
private async runMemoAgent(
|
|
354
258
|
agent: Exclude<SwarmAgentName, "synthesizer">,
|
|
355
|
-
input:
|
|
356
|
-
pageUrl: string;
|
|
357
|
-
artifact: FormattedArtifact;
|
|
358
|
-
chunk: string;
|
|
359
|
-
chunkIndex: number;
|
|
360
|
-
totalChunks: number;
|
|
361
|
-
artifactIndex: number;
|
|
362
|
-
artifactCount: number;
|
|
363
|
-
},
|
|
259
|
+
input: ChunkTaskInput,
|
|
364
260
|
memory: Partial<Record<SwarmAgentName, unknown>>,
|
|
365
261
|
retrievedContext: string[],
|
|
366
262
|
): Promise<{ object: AgentMemo; telemetry: StreamedObjectTelemetry }> {
|
|
367
263
|
return generateObjectFromStream({
|
|
368
264
|
model: this.providerClient.getModel(),
|
|
369
265
|
system: getSwarmAgentPrompt(agent),
|
|
370
|
-
prompt: createPromptEnvelope({
|
|
371
|
-
pageUrl: input.pageUrl,
|
|
372
|
-
artifact: input.artifact,
|
|
373
|
-
chunk: input.chunk,
|
|
374
|
-
chunkIndex: input.chunkIndex,
|
|
375
|
-
totalChunks: input.totalChunks,
|
|
376
|
-
memory,
|
|
377
|
-
retrievedContext,
|
|
378
|
-
}),
|
|
266
|
+
prompt: createPromptEnvelope({ ...input, memory, retrievedContext }),
|
|
379
267
|
schema: agentMemoSchema,
|
|
380
268
|
contract: [
|
|
381
269
|
"JSON contract:",
|
|
382
270
|
'{"role":"string","summary":"string","observations":["string"],"evidence":["string"],"nextQuestions":["string"]}',
|
|
383
271
|
].join("\n"),
|
|
272
|
+
attempts: 4,
|
|
384
273
|
maxRetries: 2,
|
|
385
|
-
providerOptions: {
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
},
|
|
390
|
-
onProgress: (telemetry) => {
|
|
391
|
-
this.emitProgress({
|
|
392
|
-
stage: "agent",
|
|
393
|
-
state: "streaming",
|
|
394
|
-
message: `${agent} agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
|
|
395
|
-
artifactIndex: input.artifactIndex,
|
|
396
|
-
artifactCount: input.artifactCount,
|
|
397
|
-
artifactUrl: input.artifact.url,
|
|
398
|
-
chunkIndex: input.chunkIndex + 1,
|
|
399
|
-
chunkCount: input.totalChunks,
|
|
274
|
+
providerOptions: { openai: { store: false } },
|
|
275
|
+
onRetry: (attempt, error) =>
|
|
276
|
+
this.emitAgentEvent(
|
|
277
|
+
"streaming",
|
|
400
278
|
agent,
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
279
|
+
input,
|
|
280
|
+
`${agent} agent retry ${attempt}/4 ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}: ${error.message}`,
|
|
281
|
+
),
|
|
282
|
+
onProgress: (telemetry) =>
|
|
283
|
+
this.emitAgentEvent(
|
|
284
|
+
"streaming",
|
|
285
|
+
agent,
|
|
286
|
+
input,
|
|
287
|
+
`${agent} agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
|
|
288
|
+
telemetry,
|
|
289
|
+
),
|
|
406
290
|
});
|
|
407
291
|
}
|
|
408
292
|
|
|
409
293
|
private async runSynthesisAgent(
|
|
410
|
-
input:
|
|
411
|
-
pageUrl: string;
|
|
412
|
-
artifact: FormattedArtifact;
|
|
413
|
-
chunk: string;
|
|
414
|
-
chunkIndex: number;
|
|
415
|
-
totalChunks: number;
|
|
416
|
-
artifactIndex: number;
|
|
417
|
-
artifactCount: number;
|
|
418
|
-
},
|
|
294
|
+
input: ChunkTaskInput,
|
|
419
295
|
memory: Partial<Record<SwarmAgentName, unknown>>,
|
|
420
296
|
retrievedContext: string[],
|
|
421
297
|
): Promise<{ object: ChunkAnalysis; telemetry: StreamedObjectTelemetry }> {
|
|
422
298
|
return generateObjectFromStream({
|
|
423
299
|
model: this.providerClient.getModel(),
|
|
424
300
|
system: getSwarmAgentPrompt("synthesizer"),
|
|
425
|
-
prompt: createPromptEnvelope({
|
|
426
|
-
pageUrl: input.pageUrl,
|
|
427
|
-
artifact: input.artifact,
|
|
428
|
-
chunk: input.chunk,
|
|
429
|
-
chunkIndex: input.chunkIndex,
|
|
430
|
-
totalChunks: input.totalChunks,
|
|
431
|
-
memory,
|
|
432
|
-
retrievedContext,
|
|
433
|
-
}),
|
|
301
|
+
prompt: createPromptEnvelope({ ...input, memory, retrievedContext }),
|
|
434
302
|
schema: chunkAnalysisSchema,
|
|
435
303
|
contract: [
|
|
436
304
|
"JSON contract:",
|
|
437
305
|
'{"entryPoints":[{"symbol":"string","description":"string","evidence":"string"}],"initializationFlow":["string"],"callGraph":[{"caller":"string","callee":"string","rationale":"string"}],"restoredNames":[{"originalName":"string","suggestedName":"string","justification":"string"}],"summary":"string","notableLibraries":["string"],"investigationTips":["string"],"risks":["string"]}',
|
|
438
306
|
].join("\n"),
|
|
307
|
+
attempts: 4,
|
|
439
308
|
maxRetries: 2,
|
|
440
|
-
providerOptions: {
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
estimatedOutputTokens: telemetry.estimatedOutputTokens,
|
|
457
|
-
...(telemetry.outputTokens !== undefined ? { outputTokens: telemetry.outputTokens } : {}),
|
|
458
|
-
...(telemetry.tokensPerSecond !== undefined ? { tokensPerSecond: telemetry.tokensPerSecond } : {}),
|
|
459
|
-
});
|
|
460
|
-
},
|
|
309
|
+
providerOptions: { openai: { store: false } },
|
|
310
|
+
onRetry: (attempt, error) =>
|
|
311
|
+
this.emitAgentEvent(
|
|
312
|
+
"streaming",
|
|
313
|
+
"synthesizer",
|
|
314
|
+
input,
|
|
315
|
+
`synthesizer agent retry ${attempt}/4 ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}: ${error.message}`,
|
|
316
|
+
),
|
|
317
|
+
onProgress: (telemetry) =>
|
|
318
|
+
this.emitAgentEvent(
|
|
319
|
+
"streaming",
|
|
320
|
+
"synthesizer",
|
|
321
|
+
input,
|
|
322
|
+
`synthesizer agent streaming ${input.artifact.url} chunk ${input.chunkIndex + 1}/${input.totalChunks}${formatAgentTelemetrySuffix(telemetry)}`,
|
|
323
|
+
telemetry,
|
|
324
|
+
),
|
|
461
325
|
});
|
|
462
326
|
}
|
|
463
327
|
|
|
@@ -474,27 +338,15 @@ export class AiBundleAnalyzer {
|
|
|
474
338
|
"You are the lead synthesis agent for the final report.",
|
|
475
339
|
"Merge artifact summaries and chunk analyses into a coherent site-level reverse-engineering map with the strongest evidence available.",
|
|
476
340
|
].join(" "),
|
|
477
|
-
prompt: [
|
|
478
|
-
|
|
479
|
-
"Artifact summaries:",
|
|
480
|
-
JSON.stringify(artifactSummaries, null, 2),
|
|
481
|
-
"Chunk analyses:",
|
|
482
|
-
JSON.stringify(chunkAnalyses, null, 2),
|
|
483
|
-
].join("\n\n"),
|
|
484
|
-
schema: finalAnalysisSchema.omit({
|
|
485
|
-
artifactSummaries: true,
|
|
486
|
-
analyzedChunkCount: true,
|
|
487
|
-
}),
|
|
341
|
+
prompt: [`Target page: ${pageUrl}`, "Artifact summaries:", JSON.stringify(artifactSummaries, null, 2), "Chunk analyses:", JSON.stringify(chunkAnalyses, null, 2)].join("\n\n"),
|
|
342
|
+
schema: finalAnalysisSchema.omit({ artifactSummaries: true, analyzedChunkCount: true }),
|
|
488
343
|
contract: [
|
|
489
344
|
"JSON contract:",
|
|
490
345
|
'{"overview":"string","entryPoints":[{"symbol":"string","description":"string","evidence":"string"}],"initializationFlow":["string"],"callGraph":[{"caller":"string","callee":"string","rationale":"string"}],"restoredNames":[{"originalName":"string","suggestedName":"string","justification":"string"}],"notableLibraries":["string"],"investigationTips":["string"],"risks":["string"]}',
|
|
491
346
|
].join("\n"),
|
|
347
|
+
attempts: 4,
|
|
492
348
|
maxRetries: 2,
|
|
493
|
-
providerOptions: {
|
|
494
|
-
openai: {
|
|
495
|
-
store: false,
|
|
496
|
-
},
|
|
497
|
-
},
|
|
349
|
+
providerOptions: { openai: { store: false } },
|
|
498
350
|
});
|
|
499
351
|
|
|
500
352
|
return finalAnalysisSchema.parse({
|
|
@@ -517,11 +369,7 @@ export class AiBundleAnalyzer {
|
|
|
517
369
|
|
|
518
370
|
private getRetrievedContext(
|
|
519
371
|
agent: SwarmAgentName,
|
|
520
|
-
input:
|
|
521
|
-
artifact: FormattedArtifact;
|
|
522
|
-
chunk: string;
|
|
523
|
-
localRag: LocalArtifactRag | null;
|
|
524
|
-
},
|
|
372
|
+
input: Pick<ChunkTaskInput, "artifact" | "chunk" | "localRag">,
|
|
525
373
|
memory: Partial<Record<SwarmAgentName, unknown>>,
|
|
526
374
|
): string[] {
|
|
527
375
|
if (!input.localRag) {
|
|
@@ -536,10 +384,7 @@ export class AiBundleAnalyzer {
|
|
|
536
384
|
synthesizer: "entry points call graph restored names investigation tips risks runtime relationships architecture summary",
|
|
537
385
|
};
|
|
538
386
|
|
|
539
|
-
const memoryText = Object.values(memory)
|
|
540
|
-
.map((entry) => JSON.stringify(entry))
|
|
541
|
-
.join(" ");
|
|
542
|
-
|
|
387
|
+
const memoryText = Object.values(memory).map((entry) => JSON.stringify(entry)).join(" ");
|
|
543
388
|
return input.localRag.query({
|
|
544
389
|
artifactUrl: input.artifact.url,
|
|
545
390
|
query: `${agentKeywords[agent]} ${input.chunk} ${memoryText}`.slice(0, 6000),
|
package/lib/ai-json.ts
CHANGED
|
@@ -120,17 +120,55 @@ export async function generateObjectFromStream<TOutput>(input: {
|
|
|
120
120
|
prompt: string;
|
|
121
121
|
schema: z.ZodType<TOutput>;
|
|
122
122
|
contract: string;
|
|
123
|
+
attempts?: number;
|
|
123
124
|
maxRetries?: number;
|
|
124
125
|
providerOptions?: Record<string, unknown>;
|
|
125
126
|
onProgress?: (telemetry: StreamedObjectTelemetry) => void;
|
|
127
|
+
onRetry?: (attempt: number, error: Error) => void;
|
|
126
128
|
}): Promise<StreamedObjectResult<TOutput>> {
|
|
129
|
+
const attempts = Math.max(1, Math.floor(input.attempts ?? 3));
|
|
130
|
+
let lastError: Error | undefined;
|
|
131
|
+
|
|
132
|
+
for (let attempt = 1; attempt <= attempts; attempt += 1) {
|
|
133
|
+
try {
|
|
134
|
+
return await streamSingleObjectAttempt(input, attempt);
|
|
135
|
+
} catch (error) {
|
|
136
|
+
lastError = error instanceof Error ? error : new Error("Streaming object generation failed.");
|
|
137
|
+
if (attempt >= attempts) {
|
|
138
|
+
throw lastError;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
input.onRetry?.(attempt + 1, lastError);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
throw lastError ?? new Error("Streaming object generation failed.");
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
async function streamSingleObjectAttempt<TOutput>(
|
|
149
|
+
input: {
|
|
150
|
+
model: unknown;
|
|
151
|
+
system: string;
|
|
152
|
+
prompt: string;
|
|
153
|
+
schema: z.ZodType<TOutput>;
|
|
154
|
+
contract: string;
|
|
155
|
+
maxRetries?: number;
|
|
156
|
+
providerOptions?: Record<string, unknown>;
|
|
157
|
+
onProgress?: (telemetry: StreamedObjectTelemetry) => void;
|
|
158
|
+
},
|
|
159
|
+
attempt: number,
|
|
160
|
+
): Promise<StreamedObjectResult<TOutput>> {
|
|
127
161
|
let streamedText = "";
|
|
128
162
|
const startedAt = Date.now();
|
|
129
163
|
let lastProgressAt = 0;
|
|
164
|
+
const repairHint =
|
|
165
|
+
attempt > 1
|
|
166
|
+
? "\nPrevious attempt failed because the JSON was malformed or incomplete. Return a syntactically valid JSON object this time."
|
|
167
|
+
: "";
|
|
130
168
|
|
|
131
169
|
const result = streamText({
|
|
132
170
|
model: input.model as never,
|
|
133
|
-
system: formatJsonSystemPrompt(input.system, input.contract)
|
|
171
|
+
system: `${formatJsonSystemPrompt(input.system, input.contract)}${repairHint}`,
|
|
134
172
|
prompt: input.prompt,
|
|
135
173
|
maxRetries: input.maxRetries ?? 2,
|
|
136
174
|
...(input.providerOptions !== undefined ? { providerOptions: input.providerOptions as never } : {}),
|
|
@@ -158,6 +196,7 @@ export async function generateObjectFromStream<TOutput>(input: {
|
|
|
158
196
|
} catch {
|
|
159
197
|
usage = undefined;
|
|
160
198
|
}
|
|
199
|
+
|
|
161
200
|
const elapsedMs = Date.now() - startedAt;
|
|
162
201
|
const estimatedOutputTokens = estimateTokenCountFromText(streamedText);
|
|
163
202
|
const outputTokens = usage?.outputTokens ?? undefined;
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type { AgentMemo, ChunkAnalysis } from "./analysis-schema";
|
|
2
|
+
import type { SwarmAgentName } from "./swarm-prompts";
|
|
3
|
+
|
|
4
|
+
export function createFallbackAgentMemo(agent: Exclude<SwarmAgentName, "synthesizer">, error: Error): AgentMemo {
|
|
5
|
+
return {
|
|
6
|
+
role: agent,
|
|
7
|
+
summary: `${agent} agent failed after retries: ${error.message}`,
|
|
8
|
+
observations: [],
|
|
9
|
+
evidence: [],
|
|
10
|
+
nextQuestions: [`Retry ${agent} analysis for this chunk manually if the finding is important.`],
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export function createFallbackChunkAnalysis(input: {
|
|
15
|
+
artifactUrl: string;
|
|
16
|
+
memory: Partial<Record<SwarmAgentName, unknown>>;
|
|
17
|
+
error: Error;
|
|
18
|
+
}): ChunkAnalysis {
|
|
19
|
+
const memoSummaries = Object.values(input.memory)
|
|
20
|
+
.filter((entry): entry is AgentMemo => typeof entry === "object" && entry !== null && "summary" in entry)
|
|
21
|
+
.map((entry) => entry.summary.trim())
|
|
22
|
+
.filter(Boolean);
|
|
23
|
+
|
|
24
|
+
return {
|
|
25
|
+
entryPoints: [],
|
|
26
|
+
initializationFlow: [],
|
|
27
|
+
callGraph: [],
|
|
28
|
+
restoredNames: [],
|
|
29
|
+
summary:
|
|
30
|
+
memoSummaries.join(" ").trim() ||
|
|
31
|
+
`Chunk analysis for ${input.artifactUrl} fell back after retries: ${input.error.message}`,
|
|
32
|
+
notableLibraries: [],
|
|
33
|
+
investigationTips: [
|
|
34
|
+
`Chunk synthesis fell back after retries: ${input.error.message}`,
|
|
35
|
+
"Re-run with lower concurrency or inspect this chunk manually if it is critical.",
|
|
36
|
+
],
|
|
37
|
+
risks: [],
|
|
38
|
+
};
|
|
39
|
+
}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import { Buffer } from "buffer";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
|
|
4
|
+
import type { StreamedObjectTelemetry } from "./ai-json";
|
|
5
|
+
import type { FormattedArtifact } from "./formatter";
|
|
6
|
+
|
|
7
|
+
export const DEFAULT_CHUNK_SIZE_BYTES = 80 * 1024;
|
|
8
|
+
|
|
9
|
+
export function createPromptEnvelope(input: {
|
|
10
|
+
pageUrl: string;
|
|
11
|
+
artifact: FormattedArtifact;
|
|
12
|
+
chunk: string;
|
|
13
|
+
chunkIndex: number;
|
|
14
|
+
totalChunks: number;
|
|
15
|
+
memory?: unknown;
|
|
16
|
+
retrievedContext?: string[];
|
|
17
|
+
}): string {
|
|
18
|
+
return [
|
|
19
|
+
`Target page: ${input.pageUrl}`,
|
|
20
|
+
`Artifact URL: ${input.artifact.url}`,
|
|
21
|
+
`Artifact type: ${input.artifact.type}`,
|
|
22
|
+
`Discovered from: ${input.artifact.discoveredFrom}`,
|
|
23
|
+
`Chunk ${input.chunkIndex + 1} of ${input.totalChunks}`,
|
|
24
|
+
input.artifact.formattingNote ? `Formatting note: ${input.artifact.formattingNote}` : "Formatting note: none",
|
|
25
|
+
input.memory ? `Swarm memory:\n${JSON.stringify(input.memory, null, 2)}` : "Swarm memory: none yet",
|
|
26
|
+
input.retrievedContext && input.retrievedContext.length > 0
|
|
27
|
+
? `Local RAG evidence:\n${input.retrievedContext.map((segment, index) => `Segment ${index + 1}:\n${segment}`).join("\n\n")}`
|
|
28
|
+
: "Local RAG evidence: none",
|
|
29
|
+
"Artifact content:",
|
|
30
|
+
"```text",
|
|
31
|
+
input.chunk,
|
|
32
|
+
"```",
|
|
33
|
+
].join("\n\n");
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function findSplitBoundary(source: string, start: number, end: number): number {
|
|
37
|
+
const minimumPreferredIndex = start + Math.max(1, Math.floor((end - start) * 0.6));
|
|
38
|
+
const preferredDelimiters = new Set(["\n", ";", "}", " ", ","]);
|
|
39
|
+
|
|
40
|
+
for (let cursor = end - 1; cursor >= minimumPreferredIndex; cursor -= 1) {
|
|
41
|
+
const character = source[cursor];
|
|
42
|
+
if (character && preferredDelimiters.has(character)) {
|
|
43
|
+
return cursor + 1;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return end;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function deriveChunkSizeBytes(modelContextSize: number): number {
|
|
51
|
+
const validatedContextSize = z.number().int().positive().parse(modelContextSize);
|
|
52
|
+
const derived = Math.floor(validatedContextSize * 0.9);
|
|
53
|
+
return Math.max(DEFAULT_CHUNK_SIZE_BYTES, derived);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export function chunkTextByBytes(source: string, maxBytes = DEFAULT_CHUNK_SIZE_BYTES): string[] {
|
|
57
|
+
const validatedSource = z.string().parse(source);
|
|
58
|
+
const validatedMaxBytes = z.number().int().positive().parse(maxBytes);
|
|
59
|
+
|
|
60
|
+
if (validatedSource.length === 0) {
|
|
61
|
+
return [];
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const chunks: string[] = [];
|
|
65
|
+
let start = 0;
|
|
66
|
+
|
|
67
|
+
while (start < validatedSource.length) {
|
|
68
|
+
let end = Math.min(validatedSource.length, start + validatedMaxBytes);
|
|
69
|
+
|
|
70
|
+
while (end > start && Buffer.byteLength(validatedSource.slice(start, end), "utf8") > validatedMaxBytes) {
|
|
71
|
+
end -= 1;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (end <= start) {
|
|
75
|
+
end = start + 1;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const splitAt = end === validatedSource.length ? end : findSplitBoundary(validatedSource, start, end);
|
|
79
|
+
chunks.push(validatedSource.slice(start, splitAt));
|
|
80
|
+
start = splitAt;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return chunks;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export function normalizeAiError(error: unknown): Error {
|
|
87
|
+
if (!(error instanceof Error)) {
|
|
88
|
+
return new Error("AI analysis failed with an unknown error.");
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const message = error.message.toLowerCase();
|
|
92
|
+
if (message.includes("rate limit")) {
|
|
93
|
+
return new Error("Provider rate limit hit during analysis. Please retry in a moment.");
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (message.includes("api key")) {
|
|
97
|
+
return new Error("The configured API key was rejected by the provider.");
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return error;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export function formatAgentTelemetrySuffix(telemetry: StreamedObjectTelemetry): string {
|
|
104
|
+
const tokenCount = telemetry.outputTokens ?? telemetry.estimatedOutputTokens;
|
|
105
|
+
const tokenLabel = telemetry.outputTokens !== undefined ? `${tokenCount} tok` : `~${tokenCount} tok`;
|
|
106
|
+
const tpsLabel = telemetry.tokensPerSecond !== undefined ? ` ${telemetry.tokensPerSecond} tps` : "";
|
|
107
|
+
return ` [${tokenLabel}${tpsLabel}]`;
|
|
108
|
+
}
|
package/lib/cli-args.ts
CHANGED
|
@@ -19,6 +19,7 @@ const rawCliArgsSchema = z.object({
|
|
|
19
19
|
baseURL: z.string().url().optional(),
|
|
20
20
|
model: z.string().min(1).optional(),
|
|
21
21
|
contextSize: z.number().int().positive().optional(),
|
|
22
|
+
analysisConcurrency: z.number().int().positive().optional(),
|
|
22
23
|
maxPages: z.number().int().positive().optional(),
|
|
23
24
|
maxArtifacts: z.number().int().positive().optional(),
|
|
24
25
|
maxDepth: z.number().int().nonnegative().optional(),
|
|
@@ -58,13 +59,14 @@ const optionMap = new Map<string, keyof CliArgs>([
|
|
|
58
59
|
["--base-url", "baseURL"],
|
|
59
60
|
["--model", "model"],
|
|
60
61
|
["--context-size", "contextSize"],
|
|
62
|
+
["--analysis-concurrency", "analysisConcurrency"],
|
|
61
63
|
["--max-pages", "maxPages"],
|
|
62
64
|
["--max-artifacts", "maxArtifacts"],
|
|
63
65
|
["--max-depth", "maxDepth"],
|
|
64
66
|
]);
|
|
65
67
|
|
|
66
68
|
const booleanKeys = new Set<keyof CliArgs>(["help", "version", "headless", "reconfigure", "listModels", "localRag", "verboseAgents"]);
|
|
67
|
-
const numberKeys = new Set<keyof CliArgs>(["contextSize", "maxPages", "maxArtifacts", "maxDepth"]);
|
|
69
|
+
const numberKeys = new Set<keyof CliArgs>(["contextSize", "analysisConcurrency", "maxPages", "maxArtifacts", "maxDepth"]);
|
|
68
70
|
|
|
69
71
|
function normalizeValue(key: keyof CliArgs, value: string): unknown {
|
|
70
72
|
if (numberKeys.has(key)) {
|
|
@@ -154,6 +156,7 @@ export function renderHelpText(): string {
|
|
|
154
156
|
" --base-url <url> Base URL for the provider",
|
|
155
157
|
" --model <id> Model identifier",
|
|
156
158
|
" --context-size <tokens> Model context window, for example 128000 or 512000",
|
|
159
|
+
" --analysis-concurrency <n> Parallel chunk analyses per artifact",
|
|
157
160
|
" --list-models Fetch and print models using the resolved provider config",
|
|
158
161
|
" --local-rag Enable local lexical RAG for oversized artifacts",
|
|
159
162
|
" --reconfigure Force interactive provider reconfiguration",
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
export async function mapWithConcurrency<TInput, TOutput>(
|
|
2
|
+
items: readonly TInput[],
|
|
3
|
+
concurrency: number,
|
|
4
|
+
mapper: (item: TInput, index: number) => Promise<TOutput>,
|
|
5
|
+
): Promise<TOutput[]> {
|
|
6
|
+
const normalizedConcurrency = Math.max(1, Math.floor(concurrency));
|
|
7
|
+
const results = new Array<TOutput>(items.length);
|
|
8
|
+
let cursor = 0;
|
|
9
|
+
|
|
10
|
+
const workers = Array.from({ length: Math.min(normalizedConcurrency, items.length) }, async () => {
|
|
11
|
+
while (true) {
|
|
12
|
+
const currentIndex = cursor;
|
|
13
|
+
cursor += 1;
|
|
14
|
+
|
|
15
|
+
if (currentIndex >= items.length) {
|
|
16
|
+
return;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
results[currentIndex] = await mapper(items[currentIndex]!, currentIndex);
|
|
20
|
+
}
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
await Promise.all(workers);
|
|
24
|
+
return results;
|
|
25
|
+
}
|
package/package.json
CHANGED