@redstone-md/mapr 0.0.1-alpha → 0.0.2-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -28
- package/index.ts +13 -4
- package/lib/ai-analyzer.ts +44 -155
- package/lib/ai-json.ts +134 -0
- package/lib/analysis-schema.ts +135 -0
- package/lib/artifacts.ts +57 -73
- package/lib/cli-args.ts +16 -2
- package/lib/config.ts +95 -37
- package/lib/formatter.ts +1 -4
- package/lib/provider.ts +307 -14
- package/lib/reporter.ts +1 -1
- package/lib/scraper.ts +232 -18
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,69 +1,116 @@
|
|
|
1
1
|
# Mapr
|
|
2
2
|
|
|
3
|
-
Mapr is a Bun-native CLI/TUI for reverse-engineering frontend websites and build outputs. It crawls a target site,
|
|
3
|
+
Mapr is a Bun-native CLI/TUI for reverse-engineering frontend websites and build outputs. It crawls a target site, collects analyzable frontend artifacts, runs a multi-agent AI analysis pipeline over chunked code, and writes a Markdown report with entry points, initialization flow, inferred call graph edges, restored names, artifact summaries, and investigation tips.
|
|
4
|
+
|
|
5
|
+
This repository is public for source visibility and collaboration. The license remains source-available and restricted. Read the contribution and license sections before reusing or contributing to the codebase.
|
|
6
|
+
|
|
7
|
+
## Highlights
|
|
8
|
+
|
|
9
|
+
- Bun-only CLI/TUI with interactive setup through `@clack/prompts`
|
|
10
|
+
- OpenAI and OpenAI-compatible provider support
|
|
11
|
+
- Built-in provider presets for BlackBox AI, Nvidia NIM, and OnlySQ
|
|
12
|
+
- Model discovery with searchable selection
|
|
13
|
+
- Automatic context-window detection from provider model metadata when available
|
|
14
|
+
- Same-origin crawler with bounded page count and crawl depth
|
|
15
|
+
- JS bundle, worker, service worker, WASM, and source-map discovery
|
|
16
|
+
- Iframe-aware crawling for same-origin embedded pages
|
|
17
|
+
- Local RAG mode for multi-megabyte bundles
|
|
18
|
+
- Partial-report persistence when analysis fails mid-run
|
|
19
|
+
- Headless automation mode for CI or batch workflows
|
|
4
20
|
|
|
5
21
|
## What It Analyzes
|
|
6
22
|
|
|
7
|
-
- HTML entry pages and linked same-origin pages
|
|
8
|
-
- JavaScript bundles and
|
|
23
|
+
- HTML entry pages and linked same-origin pages for discovery
|
|
24
|
+
- JavaScript bundles, imported chunks, and inline bootstraps
|
|
9
25
|
- Service workers and worker scripts
|
|
10
|
-
-
|
|
11
|
-
-
|
|
12
|
-
-
|
|
26
|
+
- WASM modules through binary summaries
|
|
27
|
+
- Source maps and extracted original sources when available
|
|
28
|
+
- Same-origin iframe pages and the JS/WASM artifacts discovered inside them
|
|
13
29
|
- Optional local lexical RAG for oversized artifacts such as multi-megabyte bundles
|
|
14
30
|
|
|
31
|
+
Mapr does not analyze images, fonts, audio, video, PDFs, archives, or other presentation/binary assets.
|
|
32
|
+
|
|
15
33
|
## Runtime
|
|
16
34
|
|
|
17
35
|
- Bun only
|
|
18
36
|
- TypeScript in strict mode
|
|
19
37
|
- Interactive terminal UX with `@clack/prompts`
|
|
20
38
|
- AI analysis through Vercel AI SDK using OpenAI or OpenAI-compatible providers
|
|
39
|
+
- Built-in OpenAI-compatible presets for BlackBox AI, Nvidia NIM, and OnlySQ
|
|
40
|
+
- Automatic model context-size detection from provider model metadata when available
|
|
21
41
|
- Headless CLI mode for automation
|
|
22
|
-
- Live swarm progress with agent-level tracking and progress bars
|
|
42
|
+
- Live crawler and swarm progress with agent-level tracking and progress bars
|
|
43
|
+
|
|
44
|
+
## Install
|
|
45
|
+
|
|
46
|
+
Local development:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
bun install
|
|
50
|
+
bun run index.ts
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Published package usage:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
npx @redstone-md/mapr --help
|
|
57
|
+
```
|
|
23
58
|
|
|
24
59
|
## Workflow
|
|
25
60
|
|
|
26
61
|
1. Load or configure AI provider settings from `~/.mapr/config.json`
|
|
27
|
-
2. Discover models from the provider
|
|
28
|
-
3. Let the user search and select a model,
|
|
29
|
-
4. Crawl the target website and
|
|
62
|
+
2. Discover models from the provider catalog endpoint
|
|
63
|
+
3. Let the user search and select a model, auto-detect the model context size when possible, and fall back to a manual prompt when needed
|
|
64
|
+
4. Crawl the target website, same-origin iframe pages, and discovered code artifacts with bounded page count and crawl depth
|
|
30
65
|
5. Format analyzable content where possible
|
|
31
66
|
6. Optionally build a local lexical RAG index for oversized artifacts
|
|
32
|
-
7. Run a communicating swarm of analysis agents over chunked artifact content
|
|
67
|
+
7. Run a communicating swarm of analysis agents over chunked artifact content with structured-output fallback for providers that only support plain text
|
|
33
68
|
8. Generate a Markdown report in the current working directory
|
|
34
69
|
|
|
35
|
-
##
|
|
70
|
+
## Provider Presets
|
|
36
71
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
72
|
+
- `blackbox` -> `https://api.blackbox.ai`
|
|
73
|
+
- `nvidia-nim` -> `https://integrate.api.nvidia.com/v1`
|
|
74
|
+
- `onlysq` -> `https://api.onlysq.ru/ai/openai`
|
|
75
|
+
- `custom` -> any other OpenAI-compatible endpoint
|
|
76
|
+
|
|
77
|
+
## Usage
|
|
41
78
|
|
|
42
|
-
|
|
79
|
+
Interactive:
|
|
43
80
|
|
|
44
81
|
```bash
|
|
45
|
-
|
|
82
|
+
bun start
|
|
46
83
|
```
|
|
47
84
|
|
|
48
|
-
|
|
85
|
+
Headless:
|
|
49
86
|
|
|
50
87
|
```bash
|
|
51
88
|
npx @redstone-md/mapr \
|
|
52
89
|
--headless \
|
|
53
90
|
--url http://localhost:5178 \
|
|
54
|
-
--provider-
|
|
55
|
-
--provider-name "Local vLLM" \
|
|
91
|
+
--provider-preset onlysq \
|
|
56
92
|
--api-key secret \
|
|
57
|
-
--
|
|
58
|
-
--model qwen2.5-coder \
|
|
93
|
+
--model mistralai/devstral-small-2507 \
|
|
59
94
|
--context-size 512000 \
|
|
60
|
-
--local-rag
|
|
95
|
+
--local-rag \
|
|
96
|
+
--max-depth 3
|
|
61
97
|
```
|
|
62
98
|
|
|
99
|
+
List models with detected context sizes when available:
|
|
100
|
+
|
|
63
101
|
```bash
|
|
64
|
-
npx @redstone-md/mapr --list-models --headless --provider-
|
|
102
|
+
npx @redstone-md/mapr --list-models --headless --provider-preset nvidia-nim --api-key secret
|
|
65
103
|
```
|
|
66
104
|
|
|
105
|
+
Useful flags:
|
|
106
|
+
|
|
107
|
+
- `--max-pages <n>` limits same-origin HTML pages
|
|
108
|
+
- `--max-artifacts <n>` limits total fetched analyzable artifacts
|
|
109
|
+
- `--max-depth <n>` limits crawler hop depth from the entry page
|
|
110
|
+
- `--local-rag` enables local lexical retrieval for oversized bundles
|
|
111
|
+
- `--verbose-agents` prints swarm completion events as they finish
|
|
112
|
+
- `--reconfigure` forces provider setup even if config already exists
|
|
113
|
+
|
|
67
114
|
## Swarm Design
|
|
68
115
|
|
|
69
116
|
Mapr uses a communicating agent swarm per chunk:
|
|
@@ -74,22 +121,32 @@ Mapr uses a communicating agent swarm per chunk:
|
|
|
74
121
|
- `security`: identifies risks, persistence, caching, and operator tips
|
|
75
122
|
- `synthesizer`: merges the upstream notes into the final chunk analysis
|
|
76
123
|
|
|
77
|
-
Progress is shown
|
|
124
|
+
Progress is shown directly in the TUI for crawler fetches, depth skips, discovered nested artifacts, and swarm agent/chunk execution.
|
|
78
125
|
|
|
79
126
|
## Large Bundle Handling
|
|
80
127
|
|
|
81
128
|
- Mapr stores the selected model context size and derives a larger chunk budget from it.
|
|
129
|
+
- When a provider exposes context metadata in its model catalog, Mapr saves that value automatically.
|
|
82
130
|
- Optional `--local-rag` mode builds a local lexical retrieval index so very large artifacts such as 5 MB bundles can feed more relevant sibling segments into the swarm without forcing the whole file into one prompt.
|
|
83
131
|
- Formatting no longer has a hard artifact-size cutoff. If formatting fails, Mapr falls back to raw content instead of skipping by size.
|
|
84
132
|
|
|
85
133
|
## Output
|
|
86
134
|
|
|
87
|
-
Each run writes a file named like:
|
|
135
|
+
Each run writes a Markdown file named like:
|
|
88
136
|
|
|
89
137
|
```text
|
|
90
138
|
report-example.com-2026-03-15T12-34-56-789Z.md
|
|
91
139
|
```
|
|
92
140
|
|
|
141
|
+
If analysis fails after artifact discovery or formatting has already completed, Mapr still writes a partial report and includes the analysis error in the document.
|
|
142
|
+
|
|
143
|
+
## Limitations
|
|
144
|
+
|
|
145
|
+
- AI-generated call graphs and symbol renames are inferred, not authoritative.
|
|
146
|
+
- WASM analysis is summary-based unless deeper lifting/disassembly is added.
|
|
147
|
+
- Crawl scope is intentionally bounded by same-origin policy, page limits, artifact limits, and depth limits.
|
|
148
|
+
- Very large or heavily obfuscated bundles still depend on model quality and provider behavior.
|
|
149
|
+
|
|
93
150
|
## Disclaimer
|
|
94
151
|
|
|
95
152
|
- Mapr produces assisted reverse-engineering output, not a formal proof of program behavior.
|
|
@@ -99,7 +156,7 @@ report-example.com-2026-03-15T12-34-56-789Z.md
|
|
|
99
156
|
|
|
100
157
|
## Contribution Terms
|
|
101
158
|
|
|
102
|
-
- This project is
|
|
159
|
+
- This project is public and source-available, but it is not open source.
|
|
103
160
|
- Contributions are accepted only under the repository owner’s terms.
|
|
104
161
|
- By submitting a contribution, you agree that the maintainer may use, modify, relicense, and redistribute your contribution as part of Mapr without compensation.
|
|
105
162
|
- Do not submit code unless you have the rights to contribute it.
|
package/index.ts
CHANGED
|
@@ -4,8 +4,9 @@ import { cancel, confirm, intro, isCancel, log, outro, spinner, text } from "@cl
|
|
|
4
4
|
import pc from "picocolors";
|
|
5
5
|
import packageJson from "./package.json";
|
|
6
6
|
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
7
|
+
import { buildAnalysisSnapshot, PartialAnalysisError } from "./lib/analysis-schema";
|
|
8
|
+
import { AiBundleAnalyzer, chunkTextByBytes, deriveChunkSizeBytes } from "./lib/ai-analyzer";
|
|
9
|
+
import { getConfigOverrides, parseCliArgs, renderHelpText } from "./lib/cli-args";
|
|
9
10
|
import { ConfigManager } from "./lib/config";
|
|
10
11
|
import { BundleFormatter } from "./lib/formatter";
|
|
11
12
|
import { renderProgressBar } from "./lib/progress";
|
|
@@ -98,8 +99,12 @@ async function run(): Promise<void> {
|
|
|
98
99
|
}
|
|
99
100
|
|
|
100
101
|
if (args.listModels) {
|
|
101
|
-
const models = await configManager.
|
|
102
|
-
console.log(
|
|
102
|
+
const models = await configManager.listModelCatalog(await configManager.resolveConfigDraft(configOverrides));
|
|
103
|
+
console.log(
|
|
104
|
+
models
|
|
105
|
+
.map((model) => (model.contextSize ? `${model.id}\t${model.contextSize}` : model.id))
|
|
106
|
+
.join("\n"),
|
|
107
|
+
);
|
|
103
108
|
return;
|
|
104
109
|
}
|
|
105
110
|
|
|
@@ -116,6 +121,10 @@ async function run(): Promise<void> {
|
|
|
116
121
|
const scraper = new BundleScraper(fetch, {
|
|
117
122
|
maxPages: args.maxPages,
|
|
118
123
|
maxArtifacts: args.maxArtifacts,
|
|
124
|
+
maxDepth: args.maxDepth,
|
|
125
|
+
onProgress(event) {
|
|
126
|
+
scrapeStep.message(event.message);
|
|
127
|
+
},
|
|
119
128
|
});
|
|
120
129
|
const scrapeResult = await scraper.scrape(targetUrl);
|
|
121
130
|
scrapeStep.stop(
|
package/lib/ai-analyzer.ts
CHANGED
|
@@ -1,7 +1,19 @@
|
|
|
1
|
-
import { generateText, Output } from "ai";
|
|
2
|
-
import { Buffer } from "buffer";
|
|
3
1
|
import { z } from "zod";
|
|
2
|
+
import { Buffer } from "buffer";
|
|
4
3
|
|
|
4
|
+
import {
|
|
5
|
+
agentMemoSchema,
|
|
6
|
+
artifactSummarySchema,
|
|
7
|
+
buildAnalysisSnapshot,
|
|
8
|
+
chunkAnalysisSchema,
|
|
9
|
+
finalAnalysisSchema,
|
|
10
|
+
type AgentMemo,
|
|
11
|
+
type ArtifactSummary,
|
|
12
|
+
type BundleAnalysis,
|
|
13
|
+
type ChunkAnalysis,
|
|
14
|
+
PartialAnalysisError,
|
|
15
|
+
} from "./analysis-schema";
|
|
16
|
+
import { generateObjectWithTextFallback } from "./ai-json";
|
|
5
17
|
import { artifactTypeSchema } from "./artifacts";
|
|
6
18
|
import type { FormattedArtifact } from "./formatter";
|
|
7
19
|
import { LocalArtifactRag } from "./local-rag";
|
|
@@ -10,63 +22,6 @@ import { SWARM_AGENT_ORDER, getGlobalMissionPrompt, getSwarmAgentPrompt, type Sw
|
|
|
10
22
|
|
|
11
23
|
export const DEFAULT_CHUNK_SIZE_BYTES = 80 * 1024;
|
|
12
24
|
|
|
13
|
-
const entryPointSchema = z.object({
|
|
14
|
-
symbol: z.string().min(1),
|
|
15
|
-
description: z.string().min(1),
|
|
16
|
-
evidence: z.string().min(1),
|
|
17
|
-
});
|
|
18
|
-
|
|
19
|
-
const callGraphEdgeSchema = z.object({
|
|
20
|
-
caller: z.string().min(1),
|
|
21
|
-
callee: z.string().min(1),
|
|
22
|
-
rationale: z.string().min(1),
|
|
23
|
-
});
|
|
24
|
-
|
|
25
|
-
const renamedSymbolSchema = z.object({
|
|
26
|
-
originalName: z.string().min(1),
|
|
27
|
-
suggestedName: z.string().min(1),
|
|
28
|
-
justification: z.string().min(1),
|
|
29
|
-
});
|
|
30
|
-
|
|
31
|
-
const agentMemoSchema = z.object({
|
|
32
|
-
role: z.string().min(1),
|
|
33
|
-
summary: z.string().min(1),
|
|
34
|
-
observations: z.array(z.string().min(1)).default([]),
|
|
35
|
-
evidence: z.array(z.string().min(1)).default([]),
|
|
36
|
-
nextQuestions: z.array(z.string().min(1)).default([]),
|
|
37
|
-
});
|
|
38
|
-
|
|
39
|
-
const chunkAnalysisSchema = z.object({
|
|
40
|
-
entryPoints: z.array(entryPointSchema).default([]),
|
|
41
|
-
initializationFlow: z.array(z.string().min(1)).default([]),
|
|
42
|
-
callGraph: z.array(callGraphEdgeSchema).default([]),
|
|
43
|
-
restoredNames: z.array(renamedSymbolSchema).default([]),
|
|
44
|
-
summary: z.string().min(1),
|
|
45
|
-
notableLibraries: z.array(z.string().min(1)).default([]),
|
|
46
|
-
investigationTips: z.array(z.string().min(1)).default([]),
|
|
47
|
-
risks: z.array(z.string().min(1)).default([]),
|
|
48
|
-
});
|
|
49
|
-
|
|
50
|
-
const artifactSummarySchema = z.object({
|
|
51
|
-
url: z.string().url(),
|
|
52
|
-
type: artifactTypeSchema,
|
|
53
|
-
chunkCount: z.number().int().nonnegative(),
|
|
54
|
-
summary: z.string().min(1),
|
|
55
|
-
});
|
|
56
|
-
|
|
57
|
-
const finalAnalysisSchema = z.object({
|
|
58
|
-
overview: z.string().min(1),
|
|
59
|
-
entryPoints: z.array(entryPointSchema).default([]),
|
|
60
|
-
initializationFlow: z.array(z.string().min(1)).default([]),
|
|
61
|
-
callGraph: z.array(callGraphEdgeSchema).default([]),
|
|
62
|
-
restoredNames: z.array(renamedSymbolSchema).default([]),
|
|
63
|
-
notableLibraries: z.array(z.string().min(1)).default([]),
|
|
64
|
-
investigationTips: z.array(z.string().min(1)).default([]),
|
|
65
|
-
risks: z.array(z.string().min(1)).default([]),
|
|
66
|
-
artifactSummaries: z.array(artifactSummarySchema),
|
|
67
|
-
analyzedChunkCount: z.number().int().nonnegative(),
|
|
68
|
-
});
|
|
69
|
-
|
|
70
25
|
const analyzeInputSchema = z.object({
|
|
71
26
|
pageUrl: z.string().url(),
|
|
72
27
|
artifacts: z.array(
|
|
@@ -82,8 +37,6 @@ const analyzeInputSchema = z.object({
|
|
|
82
37
|
}),
|
|
83
38
|
),
|
|
84
39
|
});
|
|
85
|
-
|
|
86
|
-
export type BundleAnalysis = z.infer<typeof finalAnalysisSchema>;
|
|
87
40
|
export type AnalysisProgressStage = "artifact" | "chunk" | "agent";
|
|
88
41
|
export type AnalysisProgressState = "started" | "completed";
|
|
89
42
|
|
|
@@ -106,16 +59,6 @@ interface AnalyzerOptions {
|
|
|
106
59
|
onProgress?: (event: AnalysisProgressEvent) => void;
|
|
107
60
|
}
|
|
108
61
|
|
|
109
|
-
export class PartialAnalysisError extends Error {
|
|
110
|
-
public readonly partialAnalysis: BundleAnalysis;
|
|
111
|
-
|
|
112
|
-
public constructor(message: string, partialAnalysis: BundleAnalysis) {
|
|
113
|
-
super(message);
|
|
114
|
-
this.name = "PartialAnalysisError";
|
|
115
|
-
this.partialAnalysis = partialAnalysis;
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
|
|
119
62
|
function createPromptEnvelope(input: {
|
|
120
63
|
pageUrl: string;
|
|
121
64
|
artifact: FormattedArtifact;
|
|
@@ -193,23 +136,6 @@ export function chunkTextByBytes(source: string, maxBytes = DEFAULT_CHUNK_SIZE_B
|
|
|
193
136
|
return chunks;
|
|
194
137
|
}
|
|
195
138
|
|
|
196
|
-
function deduplicate<T>(items: T[], keySelector: (item: T) => string): T[] {
|
|
197
|
-
const seen = new Set<string>();
|
|
198
|
-
const deduplicated: T[] = [];
|
|
199
|
-
|
|
200
|
-
for (const item of items) {
|
|
201
|
-
const key = keySelector(item);
|
|
202
|
-
if (seen.has(key)) {
|
|
203
|
-
continue;
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
seen.add(key);
|
|
207
|
-
deduplicated.push(item);
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
return deduplicated;
|
|
211
|
-
}
|
|
212
|
-
|
|
213
139
|
function normalizeAiError(error: unknown): Error {
|
|
214
140
|
if (!(error instanceof Error)) {
|
|
215
141
|
return new Error("AI analysis failed with an unknown error.");
|
|
@@ -227,49 +153,6 @@ function normalizeAiError(error: unknown): Error {
|
|
|
227
153
|
return error;
|
|
228
154
|
}
|
|
229
155
|
|
|
230
|
-
export function buildAnalysisSnapshot(input: {
|
|
231
|
-
overview: string;
|
|
232
|
-
artifactSummaries?: Array<z.infer<typeof artifactSummarySchema>>;
|
|
233
|
-
chunkAnalyses?: Array<z.infer<typeof chunkAnalysisSchema>>;
|
|
234
|
-
}): BundleAnalysis {
|
|
235
|
-
const artifactSummaries = input.artifactSummaries ?? [];
|
|
236
|
-
const chunkAnalyses = input.chunkAnalyses ?? [];
|
|
237
|
-
|
|
238
|
-
return finalAnalysisSchema.parse({
|
|
239
|
-
overview: input.overview,
|
|
240
|
-
entryPoints: deduplicate(
|
|
241
|
-
chunkAnalyses.flatMap((analysis) => analysis.entryPoints),
|
|
242
|
-
(entryPoint) => `${entryPoint.symbol}:${entryPoint.description}`,
|
|
243
|
-
),
|
|
244
|
-
initializationFlow: deduplicate(
|
|
245
|
-
chunkAnalyses.flatMap((analysis) => analysis.initializationFlow),
|
|
246
|
-
(step) => step,
|
|
247
|
-
),
|
|
248
|
-
callGraph: deduplicate(
|
|
249
|
-
chunkAnalyses.flatMap((analysis) => analysis.callGraph),
|
|
250
|
-
(edge) => `${edge.caller}->${edge.callee}`,
|
|
251
|
-
),
|
|
252
|
-
restoredNames: deduplicate(
|
|
253
|
-
chunkAnalyses.flatMap((analysis) => analysis.restoredNames),
|
|
254
|
-
(entry) => `${entry.originalName}:${entry.suggestedName}`,
|
|
255
|
-
),
|
|
256
|
-
notableLibraries: deduplicate(
|
|
257
|
-
chunkAnalyses.flatMap((analysis) => analysis.notableLibraries),
|
|
258
|
-
(library) => library,
|
|
259
|
-
),
|
|
260
|
-
investigationTips: deduplicate(
|
|
261
|
-
chunkAnalyses.flatMap((analysis) => analysis.investigationTips),
|
|
262
|
-
(tip) => tip,
|
|
263
|
-
),
|
|
264
|
-
risks: deduplicate(
|
|
265
|
-
chunkAnalyses.flatMap((analysis) => analysis.risks),
|
|
266
|
-
(risk) => risk,
|
|
267
|
-
),
|
|
268
|
-
artifactSummaries,
|
|
269
|
-
analyzedChunkCount: chunkAnalyses.length,
|
|
270
|
-
});
|
|
271
|
-
}
|
|
272
|
-
|
|
273
156
|
export class AiBundleAnalyzer {
|
|
274
157
|
private readonly providerClient: AiProviderClient;
|
|
275
158
|
private readonly chunkSizeBytes: number;
|
|
@@ -301,8 +184,8 @@ export class AiBundleAnalyzer {
|
|
|
301
184
|
});
|
|
302
185
|
}
|
|
303
186
|
|
|
304
|
-
const chunkAnalyses:
|
|
305
|
-
const artifactSummaries:
|
|
187
|
+
const chunkAnalyses: ChunkAnalysis[] = [];
|
|
188
|
+
const artifactSummaries: ArtifactSummary[] = [];
|
|
306
189
|
|
|
307
190
|
try {
|
|
308
191
|
const localRag = this.localRagEnabled ? new LocalArtifactRag(validatedInput.artifacts) : null;
|
|
@@ -310,7 +193,7 @@ export class AiBundleAnalyzer {
|
|
|
310
193
|
for (let artifactIndex = 0; artifactIndex < validatedInput.artifacts.length; artifactIndex += 1) {
|
|
311
194
|
const artifact = validatedInput.artifacts[artifactIndex]!;
|
|
312
195
|
const chunks = chunkTextByBytes(artifact.formattedContent || artifact.content, this.chunkSizeBytes);
|
|
313
|
-
const perArtifactChunkAnalyses:
|
|
196
|
+
const perArtifactChunkAnalyses: ChunkAnalysis[] = [];
|
|
314
197
|
|
|
315
198
|
this.emitProgress({
|
|
316
199
|
stage: "artifact",
|
|
@@ -401,8 +284,8 @@ export class AiBundleAnalyzer {
|
|
|
401
284
|
artifactIndex: number;
|
|
402
285
|
artifactCount: number;
|
|
403
286
|
localRag: LocalArtifactRag | null;
|
|
404
|
-
}): Promise<
|
|
405
|
-
const memory: Partial<Record<SwarmAgentName,
|
|
287
|
+
}): Promise<ChunkAnalysis> {
|
|
288
|
+
const memory: Partial<Record<SwarmAgentName, AgentMemo | ChunkAnalysis>> = {};
|
|
406
289
|
|
|
407
290
|
for (const agent of SWARM_AGENT_ORDER) {
|
|
408
291
|
this.emitProgress({
|
|
@@ -452,8 +335,8 @@ export class AiBundleAnalyzer {
|
|
|
452
335
|
},
|
|
453
336
|
memory: Partial<Record<SwarmAgentName, unknown>>,
|
|
454
337
|
retrievedContext: string[],
|
|
455
|
-
): Promise<
|
|
456
|
-
|
|
338
|
+
): Promise<AgentMemo> {
|
|
339
|
+
return generateObjectWithTextFallback({
|
|
457
340
|
model: this.providerClient.getModel(),
|
|
458
341
|
system: getSwarmAgentPrompt(agent),
|
|
459
342
|
prompt: createPromptEnvelope({
|
|
@@ -465,7 +348,11 @@ export class AiBundleAnalyzer {
|
|
|
465
348
|
memory,
|
|
466
349
|
retrievedContext,
|
|
467
350
|
}),
|
|
468
|
-
|
|
351
|
+
schema: agentMemoSchema,
|
|
352
|
+
contract: [
|
|
353
|
+
"JSON contract:",
|
|
354
|
+
'{"role":"string","summary":"string","observations":["string"],"evidence":["string"],"nextQuestions":["string"]}',
|
|
355
|
+
].join("\n"),
|
|
469
356
|
maxRetries: 2,
|
|
470
357
|
providerOptions: {
|
|
471
358
|
openai: {
|
|
@@ -473,8 +360,6 @@ export class AiBundleAnalyzer {
|
|
|
473
360
|
},
|
|
474
361
|
},
|
|
475
362
|
});
|
|
476
|
-
|
|
477
|
-
return agentMemoSchema.parse(result.output);
|
|
478
363
|
}
|
|
479
364
|
|
|
480
365
|
private async runSynthesisAgent(
|
|
@@ -487,8 +372,8 @@ export class AiBundleAnalyzer {
|
|
|
487
372
|
},
|
|
488
373
|
memory: Partial<Record<SwarmAgentName, unknown>>,
|
|
489
374
|
retrievedContext: string[],
|
|
490
|
-
): Promise<
|
|
491
|
-
|
|
375
|
+
): Promise<ChunkAnalysis> {
|
|
376
|
+
return generateObjectWithTextFallback({
|
|
492
377
|
model: this.providerClient.getModel(),
|
|
493
378
|
system: getSwarmAgentPrompt("synthesizer"),
|
|
494
379
|
prompt: createPromptEnvelope({
|
|
@@ -500,7 +385,11 @@ export class AiBundleAnalyzer {
|
|
|
500
385
|
memory,
|
|
501
386
|
retrievedContext,
|
|
502
387
|
}),
|
|
503
|
-
|
|
388
|
+
schema: chunkAnalysisSchema,
|
|
389
|
+
contract: [
|
|
390
|
+
"JSON contract:",
|
|
391
|
+
'{"entryPoints":[{"symbol":"string","description":"string","evidence":"string"}],"initializationFlow":["string"],"callGraph":[{"caller":"string","callee":"string","rationale":"string"}],"restoredNames":[{"originalName":"string","suggestedName":"string","justification":"string"}],"summary":"string","notableLibraries":["string"],"investigationTips":["string"],"risks":["string"]}',
|
|
392
|
+
].join("\n"),
|
|
504
393
|
maxRetries: 2,
|
|
505
394
|
providerOptions: {
|
|
506
395
|
openai: {
|
|
@@ -508,17 +397,15 @@ export class AiBundleAnalyzer {
|
|
|
508
397
|
},
|
|
509
398
|
},
|
|
510
399
|
});
|
|
511
|
-
|
|
512
|
-
return chunkAnalysisSchema.parse(result.output);
|
|
513
400
|
}
|
|
514
401
|
|
|
515
402
|
private async summarizeFindings(
|
|
516
403
|
pageUrl: string,
|
|
517
|
-
artifactSummaries:
|
|
518
|
-
chunkAnalyses:
|
|
404
|
+
artifactSummaries: ArtifactSummary[],
|
|
405
|
+
chunkAnalyses: ChunkAnalysis[],
|
|
519
406
|
): Promise<BundleAnalysis> {
|
|
520
407
|
try {
|
|
521
|
-
const result = await
|
|
408
|
+
const result = await generateObjectWithTextFallback({
|
|
522
409
|
model: this.providerClient.getModel(),
|
|
523
410
|
system: [
|
|
524
411
|
getGlobalMissionPrompt(),
|
|
@@ -532,12 +419,14 @@ export class AiBundleAnalyzer {
|
|
|
532
419
|
"Chunk analyses:",
|
|
533
420
|
JSON.stringify(chunkAnalyses, null, 2),
|
|
534
421
|
].join("\n\n"),
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
analyzedChunkCount: true,
|
|
539
|
-
}),
|
|
422
|
+
schema: finalAnalysisSchema.omit({
|
|
423
|
+
artifactSummaries: true,
|
|
424
|
+
analyzedChunkCount: true,
|
|
540
425
|
}),
|
|
426
|
+
contract: [
|
|
427
|
+
"JSON contract:",
|
|
428
|
+
'{"overview":"string","entryPoints":[{"symbol":"string","description":"string","evidence":"string"}],"initializationFlow":["string"],"callGraph":[{"caller":"string","callee":"string","rationale":"string"}],"restoredNames":[{"originalName":"string","suggestedName":"string","justification":"string"}],"notableLibraries":["string"],"investigationTips":["string"],"risks":["string"]}',
|
|
429
|
+
].join("\n"),
|
|
541
430
|
maxRetries: 2,
|
|
542
431
|
providerOptions: {
|
|
543
432
|
openai: {
|
|
@@ -547,7 +436,7 @@ export class AiBundleAnalyzer {
|
|
|
547
436
|
});
|
|
548
437
|
|
|
549
438
|
return finalAnalysisSchema.parse({
|
|
550
|
-
...result
|
|
439
|
+
...result,
|
|
551
440
|
artifactSummaries,
|
|
552
441
|
analyzedChunkCount: chunkAnalyses.length,
|
|
553
442
|
});
|
package/lib/ai-json.ts
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import { generateText, Output } from "ai";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
|
|
4
|
+
const jsonFencePattern = /^```(?:json)?\s*([\s\S]*?)\s*```$/i;
|
|
5
|
+
|
|
6
|
+
function extractBalancedJsonSlice(source: string): string | null {
|
|
7
|
+
const startIndex = source.search(/[\[{]/);
|
|
8
|
+
if (startIndex < 0) {
|
|
9
|
+
return null;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
let depth = 0;
|
|
13
|
+
let inString = false;
|
|
14
|
+
let escaped = false;
|
|
15
|
+
|
|
16
|
+
for (let index = startIndex; index < source.length; index += 1) {
|
|
17
|
+
const character = source[index];
|
|
18
|
+
|
|
19
|
+
if (!character) {
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
if (inString) {
|
|
24
|
+
if (escaped) {
|
|
25
|
+
escaped = false;
|
|
26
|
+
} else if (character === "\\") {
|
|
27
|
+
escaped = true;
|
|
28
|
+
} else if (character === "\"") {
|
|
29
|
+
inString = false;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (character === "\"") {
|
|
36
|
+
inString = true;
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (character === "{" || character === "[") {
|
|
41
|
+
depth += 1;
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (character === "}" || character === "]") {
|
|
46
|
+
depth -= 1;
|
|
47
|
+
if (depth === 0) {
|
|
48
|
+
return source.slice(startIndex, index + 1);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export function extractJsonFromText(source: string): unknown {
|
|
57
|
+
const trimmed = source.trim();
|
|
58
|
+
if (!trimmed) {
|
|
59
|
+
throw new Error("Model returned empty text instead of JSON.");
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const fenced = trimmed.match(jsonFencePattern)?.[1]?.trim();
|
|
63
|
+
const directCandidate = fenced ?? trimmed;
|
|
64
|
+
|
|
65
|
+
try {
|
|
66
|
+
return JSON.parse(directCandidate) as unknown;
|
|
67
|
+
} catch {
|
|
68
|
+
const balancedSlice = extractBalancedJsonSlice(directCandidate);
|
|
69
|
+
if (!balancedSlice) {
|
|
70
|
+
throw new Error("No JSON object found in model output.");
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return JSON.parse(balancedSlice) as unknown;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export function shouldFallbackToTextJson(error: unknown): boolean {
|
|
78
|
+
if (!(error instanceof Error)) {
|
|
79
|
+
return false;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const message = error.message.toLowerCase();
|
|
83
|
+
return (
|
|
84
|
+
message.includes("responseformat") ||
|
|
85
|
+
message.includes("structured output") ||
|
|
86
|
+
message.includes("structuredoutputs") ||
|
|
87
|
+
message.includes("response did not match schema") ||
|
|
88
|
+
message.includes("no object generated")
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export async function generateObjectWithTextFallback<TOutput>(input: {
|
|
93
|
+
model: unknown;
|
|
94
|
+
system: string;
|
|
95
|
+
prompt: string;
|
|
96
|
+
schema: z.ZodType<TOutput>;
|
|
97
|
+
contract: string;
|
|
98
|
+
maxRetries?: number;
|
|
99
|
+
providerOptions?: Record<string, unknown>;
|
|
100
|
+
}): Promise<TOutput> {
|
|
101
|
+
try {
|
|
102
|
+
const structuredResult = await generateText({
|
|
103
|
+
model: input.model as never,
|
|
104
|
+
system: input.system,
|
|
105
|
+
prompt: input.prompt,
|
|
106
|
+
output: Output.object({ schema: input.schema }),
|
|
107
|
+
maxRetries: input.maxRetries ?? 2,
|
|
108
|
+
...(input.providerOptions !== undefined ? { providerOptions: input.providerOptions as never } : {}),
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
return input.schema.parse(structuredResult.output);
|
|
112
|
+
} catch (error) {
|
|
113
|
+
if (!shouldFallbackToTextJson(error)) {
|
|
114
|
+
throw error;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const textResult = await generateText({
|
|
119
|
+
model: input.model as never,
|
|
120
|
+
system: [
|
|
121
|
+
input.system,
|
|
122
|
+
"Return only one valid JSON object.",
|
|
123
|
+
"Do not wrap the JSON in markdown fences.",
|
|
124
|
+
"Do not add explanations before or after the JSON.",
|
|
125
|
+
input.contract,
|
|
126
|
+
].join("\n"),
|
|
127
|
+
prompt: input.prompt,
|
|
128
|
+
output: Output.text(),
|
|
129
|
+
maxRetries: input.maxRetries ?? 2,
|
|
130
|
+
...(input.providerOptions !== undefined ? { providerOptions: input.providerOptions as never } : {}),
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
return input.schema.parse(extractJsonFromText(textResult.output));
|
|
134
|
+
}
|