@redstone-md/mapr 0.0.1-alpha → 0.0.2-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,69 +1,116 @@
1
1
  # Mapr
2
2
 
3
- Mapr is a Bun-native CLI/TUI for reverse-engineering frontend websites and build outputs. It crawls a target site, downloads related code artifacts, formats them for readability, runs a communicating AI swarm over chunked artifact content, and produces a Markdown analysis report with entry points, initialization flow, inferred call graph edges, restored names, investigation tips, and artifact summaries.
3
+ Mapr is a Bun-native CLI/TUI for reverse-engineering frontend websites and build outputs. It crawls a target site, collects analyzable frontend artifacts, runs a multi-agent AI analysis pipeline over chunked code, and writes a Markdown report with entry points, initialization flow, inferred call graph edges, restored names, artifact summaries, and investigation tips.
4
+
5
+ This repository is public for source visibility and collaboration. The license remains source-available and restricted. Read the contribution and license sections before reusing or contributing to the codebase.
6
+
7
+ ## Highlights
8
+
9
+ - Bun-only CLI/TUI with interactive setup through `@clack/prompts`
10
+ - OpenAI and OpenAI-compatible provider support
11
+ - Built-in provider presets for BlackBox AI, Nvidia NIM, and OnlySQ
12
+ - Model discovery with searchable selection
13
+ - Automatic context-window detection from provider model metadata when available
14
+ - Same-origin crawler with bounded page count and crawl depth
15
+ - JS bundle, worker, service worker, WASM, and source-map discovery
16
+ - Iframe-aware crawling for same-origin embedded pages
17
+ - Local RAG mode for multi-megabyte bundles
18
+ - Partial-report persistence when analysis fails mid-run
19
+ - Headless automation mode for CI or batch workflows
4
20
 
5
21
  ## What It Analyzes
6
22
 
7
- - HTML entry pages and linked same-origin pages
8
- - JavaScript bundles and imported chunks
23
+ - HTML entry pages and linked same-origin pages for discovery
24
+ - JavaScript bundles, imported chunks, and inline bootstraps
9
25
  - Service workers and worker scripts
10
- - Stylesheets and manifests
11
- - Referenced WASM modules through binary summaries
12
- - Cross-linked website artifacts discovered from page code
26
+ - WASM modules through binary summaries
27
+ - Source maps and extracted original sources when available
28
+ - Same-origin iframe pages and the JS/WASM artifacts discovered inside them
13
29
  - Optional local lexical RAG for oversized artifacts such as multi-megabyte bundles
14
30
 
31
+ Mapr does not analyze images, fonts, audio, video, PDFs, archives, or other presentation/binary assets.
32
+
15
33
  ## Runtime
16
34
 
17
35
  - Bun only
18
36
  - TypeScript in strict mode
19
37
  - Interactive terminal UX with `@clack/prompts`
20
38
  - AI analysis through Vercel AI SDK using OpenAI or OpenAI-compatible providers
39
+ - Built-in OpenAI-compatible presets for BlackBox AI, Nvidia NIM, and OnlySQ
40
+ - Automatic model context-size detection from provider model metadata when available
21
41
  - Headless CLI mode for automation
22
- - Live swarm progress with agent-level tracking and progress bars
42
+ - Live crawler and swarm progress with agent-level tracking and progress bars
43
+
44
+ ## Install
45
+
46
+ Local development:
47
+
48
+ ```bash
49
+ bun install
50
+ bun run index.ts
51
+ ```
52
+
53
+ Published package usage:
54
+
55
+ ```bash
56
+ npx @redstone-md/mapr --help
57
+ ```
23
58
 
24
59
  ## Workflow
25
60
 
26
61
  1. Load or configure AI provider settings from `~/.mapr/config.json`
27
- 2. Discover models from the provider `/models` endpoint
28
- 3. Let the user search and select a model, then save the model context size
29
- 4. Crawl the target website and fetch related artifacts
62
+ 2. Discover models from the provider catalog endpoint
63
+ 3. Let the user search and select a model, auto-detect the model context size when possible, and fall back to a manual prompt when needed
64
+ 4. Crawl the target website, same-origin iframe pages, and discovered code artifacts with bounded page count and crawl depth
30
65
  5. Format analyzable content where possible
31
66
  6. Optionally build a local lexical RAG index for oversized artifacts
32
- 7. Run a communicating swarm of analysis agents over chunked artifact content
67
+ 7. Run a communicating swarm of analysis agents over chunked artifact content with structured-output fallback for providers that only support plain text
33
68
  8. Generate a Markdown report in the current working directory
34
69
 
35
- ## Quick Start
70
+ ## Provider Presets
36
71
 
37
- ```bash
38
- bun install
39
- bun run index.ts
40
- ```
72
+ - `blackbox` -> `https://api.blackbox.ai`
73
+ - `nvidia-nim` -> `https://integrate.api.nvidia.com/v1`
74
+ - `onlysq` -> `https://api.onlysq.ru/ai/openai`
75
+ - `custom` -> any other OpenAI-compatible endpoint
76
+
77
+ ## Usage
41
78
 
42
- If the package is published and Bun is installed locally:
79
+ Interactive:
43
80
 
44
81
  ```bash
45
- npx @redstone-md/mapr --help
82
+ bun start
46
83
  ```
47
84
 
48
- ## Headless Examples
85
+ Headless:
49
86
 
50
87
  ```bash
51
88
  npx @redstone-md/mapr \
52
89
  --headless \
53
90
  --url http://localhost:5178 \
54
- --provider-type openai-compatible \
55
- --provider-name "Local vLLM" \
91
+ --provider-preset onlysq \
56
92
  --api-key secret \
57
- --base-url http://localhost:8000/v1 \
58
- --model qwen2.5-coder \
93
+ --model mistralai/devstral-small-2507 \
59
94
  --context-size 512000 \
60
- --local-rag
95
+ --local-rag \
96
+ --max-depth 3
61
97
  ```
62
98
 
99
+ List models with detected context sizes when available:
100
+
63
101
  ```bash
64
- npx @redstone-md/mapr --list-models --headless --provider-type openai-compatible --api-key secret --base-url http://localhost:8000/v1
102
+ npx @redstone-md/mapr --list-models --headless --provider-preset nvidia-nim --api-key secret
65
103
  ```
66
104
 
105
+ Useful flags:
106
+
107
+ - `--max-pages <n>` limits same-origin HTML pages
108
+ - `--max-artifacts <n>` limits total fetched analyzable artifacts
109
+ - `--max-depth <n>` limits crawler hop depth from the entry page
110
+ - `--local-rag` enables local lexical retrieval for oversized bundles
111
+ - `--verbose-agents` prints swarm completion events as they finish
112
+ - `--reconfigure` forces provider setup even if config already exists
113
+
67
114
  ## Swarm Design
68
115
 
69
116
  Mapr uses a communicating agent swarm per chunk:
@@ -74,22 +121,32 @@ Mapr uses a communicating agent swarm per chunk:
74
121
  - `security`: identifies risks, persistence, caching, and operator tips
75
122
  - `synthesizer`: merges the upstream notes into the final chunk analysis
76
123
 
77
- Progress is shown as a task progress bar plus agent/chunk status updates.
124
+ Progress is shown directly in the TUI for crawler fetches, depth skips, discovered nested artifacts, and swarm agent/chunk execution.
78
125
 
79
126
  ## Large Bundle Handling
80
127
 
81
128
  - Mapr stores the selected model context size and derives a larger chunk budget from it.
129
+ - When a provider exposes context metadata in its model catalog, Mapr saves that value automatically.
82
130
  - Optional `--local-rag` mode builds a local lexical retrieval index so very large artifacts such as 5 MB bundles can feed more relevant sibling segments into the swarm without forcing the whole file into one prompt.
83
131
  - Formatting no longer has a hard artifact-size cutoff. If formatting fails, Mapr falls back to raw content instead of skipping by size.
84
132
 
85
133
  ## Output
86
134
 
87
- Each run writes a file named like:
135
+ Each run writes a Markdown file named like:
88
136
 
89
137
  ```text
90
138
  report-example.com-2026-03-15T12-34-56-789Z.md
91
139
  ```
92
140
 
141
+ If analysis fails after artifact discovery or formatting has already completed, Mapr still writes a partial report and includes the analysis error in the document.
142
+
143
+ ## Limitations
144
+
145
+ - AI-generated call graphs and symbol renames are inferred, not authoritative.
146
+ - WASM analysis is summary-based unless deeper lifting/disassembly is added.
147
+ - Crawl scope is intentionally bounded by same-origin policy, page limits, artifact limits, and depth limits.
148
+ - Very large or heavily obfuscated bundles still depend on model quality and provider behavior.
149
+
93
150
  ## Disclaimer
94
151
 
95
152
  - Mapr produces assisted reverse-engineering output, not a formal proof of program behavior.
@@ -99,7 +156,7 @@ report-example.com-2026-03-15T12-34-56-789Z.md
99
156
 
100
157
  ## Contribution Terms
101
158
 
102
- - This project is source-available and closed-license, not open source.
159
+ - This project is public and source-available, but it is not open source.
103
160
  - Contributions are accepted only under the repository owner’s terms.
104
161
  - By submitting a contribution, you agree that the maintainer may use, modify, relicense, and redistribute your contribution as part of Mapr without compensation.
105
162
  - Do not submit code unless you have the rights to contribute it.
package/index.ts CHANGED
@@ -4,8 +4,9 @@ import { cancel, confirm, intro, isCancel, log, outro, spinner, text } from "@cl
4
4
  import pc from "picocolors";
5
5
  import packageJson from "./package.json";
6
6
 
7
- import { AiBundleAnalyzer, PartialAnalysisError, buildAnalysisSnapshot, chunkTextByBytes, deriveChunkSizeBytes } from "./lib/ai-analyzer";
8
- import { parseCliArgs, getConfigOverrides, renderHelpText } from "./lib/cli-args";
7
+ import { buildAnalysisSnapshot, PartialAnalysisError } from "./lib/analysis-schema";
8
+ import { AiBundleAnalyzer, chunkTextByBytes, deriveChunkSizeBytes } from "./lib/ai-analyzer";
9
+ import { getConfigOverrides, parseCliArgs, renderHelpText } from "./lib/cli-args";
9
10
  import { ConfigManager } from "./lib/config";
10
11
  import { BundleFormatter } from "./lib/formatter";
11
12
  import { renderProgressBar } from "./lib/progress";
@@ -98,8 +99,12 @@ async function run(): Promise<void> {
98
99
  }
99
100
 
100
101
  if (args.listModels) {
101
- const models = await configManager.listModels(await configManager.resolveConfigDraft(configOverrides));
102
- console.log(models.join("\n"));
102
+ const models = await configManager.listModelCatalog(await configManager.resolveConfigDraft(configOverrides));
103
+ console.log(
104
+ models
105
+ .map((model) => (model.contextSize ? `${model.id}\t${model.contextSize}` : model.id))
106
+ .join("\n"),
107
+ );
103
108
  return;
104
109
  }
105
110
 
@@ -116,6 +121,10 @@ async function run(): Promise<void> {
116
121
  const scraper = new BundleScraper(fetch, {
117
122
  maxPages: args.maxPages,
118
123
  maxArtifacts: args.maxArtifacts,
124
+ maxDepth: args.maxDepth,
125
+ onProgress(event) {
126
+ scrapeStep.message(event.message);
127
+ },
119
128
  });
120
129
  const scrapeResult = await scraper.scrape(targetUrl);
121
130
  scrapeStep.stop(
@@ -1,7 +1,19 @@
1
- import { generateText, Output } from "ai";
2
- import { Buffer } from "buffer";
3
1
  import { z } from "zod";
2
+ import { Buffer } from "buffer";
4
3
 
4
+ import {
5
+ agentMemoSchema,
6
+ artifactSummarySchema,
7
+ buildAnalysisSnapshot,
8
+ chunkAnalysisSchema,
9
+ finalAnalysisSchema,
10
+ type AgentMemo,
11
+ type ArtifactSummary,
12
+ type BundleAnalysis,
13
+ type ChunkAnalysis,
14
+ PartialAnalysisError,
15
+ } from "./analysis-schema";
16
+ import { generateObjectWithTextFallback } from "./ai-json";
5
17
  import { artifactTypeSchema } from "./artifacts";
6
18
  import type { FormattedArtifact } from "./formatter";
7
19
  import { LocalArtifactRag } from "./local-rag";
@@ -10,63 +22,6 @@ import { SWARM_AGENT_ORDER, getGlobalMissionPrompt, getSwarmAgentPrompt, type Sw
10
22
 
11
23
  export const DEFAULT_CHUNK_SIZE_BYTES = 80 * 1024;
12
24
 
13
- const entryPointSchema = z.object({
14
- symbol: z.string().min(1),
15
- description: z.string().min(1),
16
- evidence: z.string().min(1),
17
- });
18
-
19
- const callGraphEdgeSchema = z.object({
20
- caller: z.string().min(1),
21
- callee: z.string().min(1),
22
- rationale: z.string().min(1),
23
- });
24
-
25
- const renamedSymbolSchema = z.object({
26
- originalName: z.string().min(1),
27
- suggestedName: z.string().min(1),
28
- justification: z.string().min(1),
29
- });
30
-
31
- const agentMemoSchema = z.object({
32
- role: z.string().min(1),
33
- summary: z.string().min(1),
34
- observations: z.array(z.string().min(1)).default([]),
35
- evidence: z.array(z.string().min(1)).default([]),
36
- nextQuestions: z.array(z.string().min(1)).default([]),
37
- });
38
-
39
- const chunkAnalysisSchema = z.object({
40
- entryPoints: z.array(entryPointSchema).default([]),
41
- initializationFlow: z.array(z.string().min(1)).default([]),
42
- callGraph: z.array(callGraphEdgeSchema).default([]),
43
- restoredNames: z.array(renamedSymbolSchema).default([]),
44
- summary: z.string().min(1),
45
- notableLibraries: z.array(z.string().min(1)).default([]),
46
- investigationTips: z.array(z.string().min(1)).default([]),
47
- risks: z.array(z.string().min(1)).default([]),
48
- });
49
-
50
- const artifactSummarySchema = z.object({
51
- url: z.string().url(),
52
- type: artifactTypeSchema,
53
- chunkCount: z.number().int().nonnegative(),
54
- summary: z.string().min(1),
55
- });
56
-
57
- const finalAnalysisSchema = z.object({
58
- overview: z.string().min(1),
59
- entryPoints: z.array(entryPointSchema).default([]),
60
- initializationFlow: z.array(z.string().min(1)).default([]),
61
- callGraph: z.array(callGraphEdgeSchema).default([]),
62
- restoredNames: z.array(renamedSymbolSchema).default([]),
63
- notableLibraries: z.array(z.string().min(1)).default([]),
64
- investigationTips: z.array(z.string().min(1)).default([]),
65
- risks: z.array(z.string().min(1)).default([]),
66
- artifactSummaries: z.array(artifactSummarySchema),
67
- analyzedChunkCount: z.number().int().nonnegative(),
68
- });
69
-
70
25
  const analyzeInputSchema = z.object({
71
26
  pageUrl: z.string().url(),
72
27
  artifacts: z.array(
@@ -82,8 +37,6 @@ const analyzeInputSchema = z.object({
82
37
  }),
83
38
  ),
84
39
  });
85
-
86
- export type BundleAnalysis = z.infer<typeof finalAnalysisSchema>;
87
40
  export type AnalysisProgressStage = "artifact" | "chunk" | "agent";
88
41
  export type AnalysisProgressState = "started" | "completed";
89
42
 
@@ -106,16 +59,6 @@ interface AnalyzerOptions {
106
59
  onProgress?: (event: AnalysisProgressEvent) => void;
107
60
  }
108
61
 
109
- export class PartialAnalysisError extends Error {
110
- public readonly partialAnalysis: BundleAnalysis;
111
-
112
- public constructor(message: string, partialAnalysis: BundleAnalysis) {
113
- super(message);
114
- this.name = "PartialAnalysisError";
115
- this.partialAnalysis = partialAnalysis;
116
- }
117
- }
118
-
119
62
  function createPromptEnvelope(input: {
120
63
  pageUrl: string;
121
64
  artifact: FormattedArtifact;
@@ -193,23 +136,6 @@ export function chunkTextByBytes(source: string, maxBytes = DEFAULT_CHUNK_SIZE_B
193
136
  return chunks;
194
137
  }
195
138
 
196
- function deduplicate<T>(items: T[], keySelector: (item: T) => string): T[] {
197
- const seen = new Set<string>();
198
- const deduplicated: T[] = [];
199
-
200
- for (const item of items) {
201
- const key = keySelector(item);
202
- if (seen.has(key)) {
203
- continue;
204
- }
205
-
206
- seen.add(key);
207
- deduplicated.push(item);
208
- }
209
-
210
- return deduplicated;
211
- }
212
-
213
139
  function normalizeAiError(error: unknown): Error {
214
140
  if (!(error instanceof Error)) {
215
141
  return new Error("AI analysis failed with an unknown error.");
@@ -227,49 +153,6 @@ function normalizeAiError(error: unknown): Error {
227
153
  return error;
228
154
  }
229
155
 
230
- export function buildAnalysisSnapshot(input: {
231
- overview: string;
232
- artifactSummaries?: Array<z.infer<typeof artifactSummarySchema>>;
233
- chunkAnalyses?: Array<z.infer<typeof chunkAnalysisSchema>>;
234
- }): BundleAnalysis {
235
- const artifactSummaries = input.artifactSummaries ?? [];
236
- const chunkAnalyses = input.chunkAnalyses ?? [];
237
-
238
- return finalAnalysisSchema.parse({
239
- overview: input.overview,
240
- entryPoints: deduplicate(
241
- chunkAnalyses.flatMap((analysis) => analysis.entryPoints),
242
- (entryPoint) => `${entryPoint.symbol}:${entryPoint.description}`,
243
- ),
244
- initializationFlow: deduplicate(
245
- chunkAnalyses.flatMap((analysis) => analysis.initializationFlow),
246
- (step) => step,
247
- ),
248
- callGraph: deduplicate(
249
- chunkAnalyses.flatMap((analysis) => analysis.callGraph),
250
- (edge) => `${edge.caller}->${edge.callee}`,
251
- ),
252
- restoredNames: deduplicate(
253
- chunkAnalyses.flatMap((analysis) => analysis.restoredNames),
254
- (entry) => `${entry.originalName}:${entry.suggestedName}`,
255
- ),
256
- notableLibraries: deduplicate(
257
- chunkAnalyses.flatMap((analysis) => analysis.notableLibraries),
258
- (library) => library,
259
- ),
260
- investigationTips: deduplicate(
261
- chunkAnalyses.flatMap((analysis) => analysis.investigationTips),
262
- (tip) => tip,
263
- ),
264
- risks: deduplicate(
265
- chunkAnalyses.flatMap((analysis) => analysis.risks),
266
- (risk) => risk,
267
- ),
268
- artifactSummaries,
269
- analyzedChunkCount: chunkAnalyses.length,
270
- });
271
- }
272
-
273
156
  export class AiBundleAnalyzer {
274
157
  private readonly providerClient: AiProviderClient;
275
158
  private readonly chunkSizeBytes: number;
@@ -301,8 +184,8 @@ export class AiBundleAnalyzer {
301
184
  });
302
185
  }
303
186
 
304
- const chunkAnalyses: Array<z.infer<typeof chunkAnalysisSchema>> = [];
305
- const artifactSummaries: Array<z.infer<typeof artifactSummarySchema>> = [];
187
+ const chunkAnalyses: ChunkAnalysis[] = [];
188
+ const artifactSummaries: ArtifactSummary[] = [];
306
189
 
307
190
  try {
308
191
  const localRag = this.localRagEnabled ? new LocalArtifactRag(validatedInput.artifacts) : null;
@@ -310,7 +193,7 @@ export class AiBundleAnalyzer {
310
193
  for (let artifactIndex = 0; artifactIndex < validatedInput.artifacts.length; artifactIndex += 1) {
311
194
  const artifact = validatedInput.artifacts[artifactIndex]!;
312
195
  const chunks = chunkTextByBytes(artifact.formattedContent || artifact.content, this.chunkSizeBytes);
313
- const perArtifactChunkAnalyses: Array<z.infer<typeof chunkAnalysisSchema>> = [];
196
+ const perArtifactChunkAnalyses: ChunkAnalysis[] = [];
314
197
 
315
198
  this.emitProgress({
316
199
  stage: "artifact",
@@ -401,8 +284,8 @@ export class AiBundleAnalyzer {
401
284
  artifactIndex: number;
402
285
  artifactCount: number;
403
286
  localRag: LocalArtifactRag | null;
404
- }): Promise<z.infer<typeof chunkAnalysisSchema>> {
405
- const memory: Partial<Record<SwarmAgentName, z.infer<typeof agentMemoSchema> | z.infer<typeof chunkAnalysisSchema>>> = {};
287
+ }): Promise<ChunkAnalysis> {
288
+ const memory: Partial<Record<SwarmAgentName, AgentMemo | ChunkAnalysis>> = {};
406
289
 
407
290
  for (const agent of SWARM_AGENT_ORDER) {
408
291
  this.emitProgress({
@@ -452,8 +335,8 @@ export class AiBundleAnalyzer {
452
335
  },
453
336
  memory: Partial<Record<SwarmAgentName, unknown>>,
454
337
  retrievedContext: string[],
455
- ): Promise<z.infer<typeof agentMemoSchema>> {
456
- const result = await generateText({
338
+ ): Promise<AgentMemo> {
339
+ return generateObjectWithTextFallback({
457
340
  model: this.providerClient.getModel(),
458
341
  system: getSwarmAgentPrompt(agent),
459
342
  prompt: createPromptEnvelope({
@@ -465,7 +348,11 @@ export class AiBundleAnalyzer {
465
348
  memory,
466
349
  retrievedContext,
467
350
  }),
468
- output: Output.object({ schema: agentMemoSchema }),
351
+ schema: agentMemoSchema,
352
+ contract: [
353
+ "JSON contract:",
354
+ '{"role":"string","summary":"string","observations":["string"],"evidence":["string"],"nextQuestions":["string"]}',
355
+ ].join("\n"),
469
356
  maxRetries: 2,
470
357
  providerOptions: {
471
358
  openai: {
@@ -473,8 +360,6 @@ export class AiBundleAnalyzer {
473
360
  },
474
361
  },
475
362
  });
476
-
477
- return agentMemoSchema.parse(result.output);
478
363
  }
479
364
 
480
365
  private async runSynthesisAgent(
@@ -487,8 +372,8 @@ export class AiBundleAnalyzer {
487
372
  },
488
373
  memory: Partial<Record<SwarmAgentName, unknown>>,
489
374
  retrievedContext: string[],
490
- ): Promise<z.infer<typeof chunkAnalysisSchema>> {
491
- const result = await generateText({
375
+ ): Promise<ChunkAnalysis> {
376
+ return generateObjectWithTextFallback({
492
377
  model: this.providerClient.getModel(),
493
378
  system: getSwarmAgentPrompt("synthesizer"),
494
379
  prompt: createPromptEnvelope({
@@ -500,7 +385,11 @@ export class AiBundleAnalyzer {
500
385
  memory,
501
386
  retrievedContext,
502
387
  }),
503
- output: Output.object({ schema: chunkAnalysisSchema }),
388
+ schema: chunkAnalysisSchema,
389
+ contract: [
390
+ "JSON contract:",
391
+ '{"entryPoints":[{"symbol":"string","description":"string","evidence":"string"}],"initializationFlow":["string"],"callGraph":[{"caller":"string","callee":"string","rationale":"string"}],"restoredNames":[{"originalName":"string","suggestedName":"string","justification":"string"}],"summary":"string","notableLibraries":["string"],"investigationTips":["string"],"risks":["string"]}',
392
+ ].join("\n"),
504
393
  maxRetries: 2,
505
394
  providerOptions: {
506
395
  openai: {
@@ -508,17 +397,15 @@ export class AiBundleAnalyzer {
508
397
  },
509
398
  },
510
399
  });
511
-
512
- return chunkAnalysisSchema.parse(result.output);
513
400
  }
514
401
 
515
402
  private async summarizeFindings(
516
403
  pageUrl: string,
517
- artifactSummaries: Array<z.infer<typeof artifactSummarySchema>>,
518
- chunkAnalyses: Array<z.infer<typeof chunkAnalysisSchema>>,
404
+ artifactSummaries: ArtifactSummary[],
405
+ chunkAnalyses: ChunkAnalysis[],
519
406
  ): Promise<BundleAnalysis> {
520
407
  try {
521
- const result = await generateText({
408
+ const result = await generateObjectWithTextFallback({
522
409
  model: this.providerClient.getModel(),
523
410
  system: [
524
411
  getGlobalMissionPrompt(),
@@ -532,12 +419,14 @@ export class AiBundleAnalyzer {
532
419
  "Chunk analyses:",
533
420
  JSON.stringify(chunkAnalyses, null, 2),
534
421
  ].join("\n\n"),
535
- output: Output.object({
536
- schema: finalAnalysisSchema.omit({
537
- artifactSummaries: true,
538
- analyzedChunkCount: true,
539
- }),
422
+ schema: finalAnalysisSchema.omit({
423
+ artifactSummaries: true,
424
+ analyzedChunkCount: true,
540
425
  }),
426
+ contract: [
427
+ "JSON contract:",
428
+ '{"overview":"string","entryPoints":[{"symbol":"string","description":"string","evidence":"string"}],"initializationFlow":["string"],"callGraph":[{"caller":"string","callee":"string","rationale":"string"}],"restoredNames":[{"originalName":"string","suggestedName":"string","justification":"string"}],"notableLibraries":["string"],"investigationTips":["string"],"risks":["string"]}',
429
+ ].join("\n"),
541
430
  maxRetries: 2,
542
431
  providerOptions: {
543
432
  openai: {
@@ -547,7 +436,7 @@ export class AiBundleAnalyzer {
547
436
  });
548
437
 
549
438
  return finalAnalysisSchema.parse({
550
- ...result.output,
439
+ ...result,
551
440
  artifactSummaries,
552
441
  analyzedChunkCount: chunkAnalyses.length,
553
442
  });
package/lib/ai-json.ts ADDED
@@ -0,0 +1,134 @@
1
+ import { generateText, Output } from "ai";
2
+ import { z } from "zod";
3
+
4
+ const jsonFencePattern = /^```(?:json)?\s*([\s\S]*?)\s*```$/i;
5
+
6
+ function extractBalancedJsonSlice(source: string): string | null {
7
+ const startIndex = source.search(/[\[{]/);
8
+ if (startIndex < 0) {
9
+ return null;
10
+ }
11
+
12
+ let depth = 0;
13
+ let inString = false;
14
+ let escaped = false;
15
+
16
+ for (let index = startIndex; index < source.length; index += 1) {
17
+ const character = source[index];
18
+
19
+ if (!character) {
20
+ continue;
21
+ }
22
+
23
+ if (inString) {
24
+ if (escaped) {
25
+ escaped = false;
26
+ } else if (character === "\\") {
27
+ escaped = true;
28
+ } else if (character === "\"") {
29
+ inString = false;
30
+ }
31
+
32
+ continue;
33
+ }
34
+
35
+ if (character === "\"") {
36
+ inString = true;
37
+ continue;
38
+ }
39
+
40
+ if (character === "{" || character === "[") {
41
+ depth += 1;
42
+ continue;
43
+ }
44
+
45
+ if (character === "}" || character === "]") {
46
+ depth -= 1;
47
+ if (depth === 0) {
48
+ return source.slice(startIndex, index + 1);
49
+ }
50
+ }
51
+ }
52
+
53
+ return null;
54
+ }
55
+
56
+ export function extractJsonFromText(source: string): unknown {
57
+ const trimmed = source.trim();
58
+ if (!trimmed) {
59
+ throw new Error("Model returned empty text instead of JSON.");
60
+ }
61
+
62
+ const fenced = trimmed.match(jsonFencePattern)?.[1]?.trim();
63
+ const directCandidate = fenced ?? trimmed;
64
+
65
+ try {
66
+ return JSON.parse(directCandidate) as unknown;
67
+ } catch {
68
+ const balancedSlice = extractBalancedJsonSlice(directCandidate);
69
+ if (!balancedSlice) {
70
+ throw new Error("No JSON object found in model output.");
71
+ }
72
+
73
+ return JSON.parse(balancedSlice) as unknown;
74
+ }
75
+ }
76
+
77
+ export function shouldFallbackToTextJson(error: unknown): boolean {
78
+ if (!(error instanceof Error)) {
79
+ return false;
80
+ }
81
+
82
+ const message = error.message.toLowerCase();
83
+ return (
84
+ message.includes("responseformat") ||
85
+ message.includes("structured output") ||
86
+ message.includes("structuredoutputs") ||
87
+ message.includes("response did not match schema") ||
88
+ message.includes("no object generated")
89
+ );
90
+ }
91
+
92
+ export async function generateObjectWithTextFallback<TOutput>(input: {
93
+ model: unknown;
94
+ system: string;
95
+ prompt: string;
96
+ schema: z.ZodType<TOutput>;
97
+ contract: string;
98
+ maxRetries?: number;
99
+ providerOptions?: Record<string, unknown>;
100
+ }): Promise<TOutput> {
101
+ try {
102
+ const structuredResult = await generateText({
103
+ model: input.model as never,
104
+ system: input.system,
105
+ prompt: input.prompt,
106
+ output: Output.object({ schema: input.schema }),
107
+ maxRetries: input.maxRetries ?? 2,
108
+ ...(input.providerOptions !== undefined ? { providerOptions: input.providerOptions as never } : {}),
109
+ });
110
+
111
+ return input.schema.parse(structuredResult.output);
112
+ } catch (error) {
113
+ if (!shouldFallbackToTextJson(error)) {
114
+ throw error;
115
+ }
116
+ }
117
+
118
+ const textResult = await generateText({
119
+ model: input.model as never,
120
+ system: [
121
+ input.system,
122
+ "Return only one valid JSON object.",
123
+ "Do not wrap the JSON in markdown fences.",
124
+ "Do not add explanations before or after the JSON.",
125
+ input.contract,
126
+ ].join("\n"),
127
+ prompt: input.prompt,
128
+ output: Output.text(),
129
+ maxRetries: input.maxRetries ?? 2,
130
+ ...(input.providerOptions !== undefined ? { providerOptions: input.providerOptions as never } : {}),
131
+ });
132
+
133
+ return input.schema.parse(extractJsonFromText(textResult.output));
134
+ }