@redstone-md/mapr 0.0.1-alpha → 0.0.3-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -28
- package/assets/banner.svg +50 -0
- package/index.ts +13 -4
- package/lib/ai-analyzer.ts +44 -155
- package/lib/ai-json.ts +134 -0
- package/lib/analysis-schema.ts +135 -0
- package/lib/artifacts.ts +57 -73
- package/lib/cli-args.ts +16 -2
- package/lib/config.ts +95 -37
- package/lib/formatter.ts +1 -4
- package/lib/provider.ts +307 -14
- package/lib/reporter.ts +1 -1
- package/lib/scraper.ts +318 -22
- package/package.json +2 -1
package/lib/ai-json.ts
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import { generateText, Output } from "ai";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
|
|
4
|
+
const jsonFencePattern = /^```(?:json)?\s*([\s\S]*?)\s*```$/i;
|
|
5
|
+
|
|
6
|
+
function extractBalancedJsonSlice(source: string): string | null {
|
|
7
|
+
const startIndex = source.search(/[\[{]/);
|
|
8
|
+
if (startIndex < 0) {
|
|
9
|
+
return null;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
let depth = 0;
|
|
13
|
+
let inString = false;
|
|
14
|
+
let escaped = false;
|
|
15
|
+
|
|
16
|
+
for (let index = startIndex; index < source.length; index += 1) {
|
|
17
|
+
const character = source[index];
|
|
18
|
+
|
|
19
|
+
if (!character) {
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
if (inString) {
|
|
24
|
+
if (escaped) {
|
|
25
|
+
escaped = false;
|
|
26
|
+
} else if (character === "\\") {
|
|
27
|
+
escaped = true;
|
|
28
|
+
} else if (character === "\"") {
|
|
29
|
+
inString = false;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (character === "\"") {
|
|
36
|
+
inString = true;
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (character === "{" || character === "[") {
|
|
41
|
+
depth += 1;
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (character === "}" || character === "]") {
|
|
46
|
+
depth -= 1;
|
|
47
|
+
if (depth === 0) {
|
|
48
|
+
return source.slice(startIndex, index + 1);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export function extractJsonFromText(source: string): unknown {
|
|
57
|
+
const trimmed = source.trim();
|
|
58
|
+
if (!trimmed) {
|
|
59
|
+
throw new Error("Model returned empty text instead of JSON.");
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const fenced = trimmed.match(jsonFencePattern)?.[1]?.trim();
|
|
63
|
+
const directCandidate = fenced ?? trimmed;
|
|
64
|
+
|
|
65
|
+
try {
|
|
66
|
+
return JSON.parse(directCandidate) as unknown;
|
|
67
|
+
} catch {
|
|
68
|
+
const balancedSlice = extractBalancedJsonSlice(directCandidate);
|
|
69
|
+
if (!balancedSlice) {
|
|
70
|
+
throw new Error("No JSON object found in model output.");
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return JSON.parse(balancedSlice) as unknown;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export function shouldFallbackToTextJson(error: unknown): boolean {
|
|
78
|
+
if (!(error instanceof Error)) {
|
|
79
|
+
return false;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const message = error.message.toLowerCase();
|
|
83
|
+
return (
|
|
84
|
+
message.includes("responseformat") ||
|
|
85
|
+
message.includes("structured output") ||
|
|
86
|
+
message.includes("structuredoutputs") ||
|
|
87
|
+
message.includes("response did not match schema") ||
|
|
88
|
+
message.includes("no object generated")
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export async function generateObjectWithTextFallback<TOutput>(input: {
|
|
93
|
+
model: unknown;
|
|
94
|
+
system: string;
|
|
95
|
+
prompt: string;
|
|
96
|
+
schema: z.ZodType<TOutput>;
|
|
97
|
+
contract: string;
|
|
98
|
+
maxRetries?: number;
|
|
99
|
+
providerOptions?: Record<string, unknown>;
|
|
100
|
+
}): Promise<TOutput> {
|
|
101
|
+
try {
|
|
102
|
+
const structuredResult = await generateText({
|
|
103
|
+
model: input.model as never,
|
|
104
|
+
system: input.system,
|
|
105
|
+
prompt: input.prompt,
|
|
106
|
+
output: Output.object({ schema: input.schema }),
|
|
107
|
+
maxRetries: input.maxRetries ?? 2,
|
|
108
|
+
...(input.providerOptions !== undefined ? { providerOptions: input.providerOptions as never } : {}),
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
return input.schema.parse(structuredResult.output);
|
|
112
|
+
} catch (error) {
|
|
113
|
+
if (!shouldFallbackToTextJson(error)) {
|
|
114
|
+
throw error;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const textResult = await generateText({
|
|
119
|
+
model: input.model as never,
|
|
120
|
+
system: [
|
|
121
|
+
input.system,
|
|
122
|
+
"Return only one valid JSON object.",
|
|
123
|
+
"Do not wrap the JSON in markdown fences.",
|
|
124
|
+
"Do not add explanations before or after the JSON.",
|
|
125
|
+
input.contract,
|
|
126
|
+
].join("\n"),
|
|
127
|
+
prompt: input.prompt,
|
|
128
|
+
output: Output.text(),
|
|
129
|
+
maxRetries: input.maxRetries ?? 2,
|
|
130
|
+
...(input.providerOptions !== undefined ? { providerOptions: input.providerOptions as never } : {}),
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
return input.schema.parse(extractJsonFromText(textResult.output));
|
|
134
|
+
}
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
|
|
3
|
+
import { artifactTypeSchema } from "./artifacts";
|
|
4
|
+
|
|
5
|
+
export const entryPointSchema = z.object({
|
|
6
|
+
symbol: z.string().min(1),
|
|
7
|
+
description: z.string().min(1),
|
|
8
|
+
evidence: z.string().min(1),
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
export const callGraphEdgeSchema = z.object({
|
|
12
|
+
caller: z.string().min(1),
|
|
13
|
+
callee: z.string().min(1),
|
|
14
|
+
rationale: z.string().min(1),
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
export const renamedSymbolSchema = z.object({
|
|
18
|
+
originalName: z.string().min(1),
|
|
19
|
+
suggestedName: z.string().min(1),
|
|
20
|
+
justification: z.string().min(1),
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
export const agentMemoSchema = z.object({
|
|
24
|
+
role: z.string().min(1),
|
|
25
|
+
summary: z.string().min(1),
|
|
26
|
+
observations: z.array(z.string().min(1)).default([]),
|
|
27
|
+
evidence: z.array(z.string().min(1)).default([]),
|
|
28
|
+
nextQuestions: z.array(z.string().min(1)).default([]),
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
export const chunkAnalysisSchema = z.object({
|
|
32
|
+
entryPoints: z.array(entryPointSchema).default([]),
|
|
33
|
+
initializationFlow: z.array(z.string().min(1)).default([]),
|
|
34
|
+
callGraph: z.array(callGraphEdgeSchema).default([]),
|
|
35
|
+
restoredNames: z.array(renamedSymbolSchema).default([]),
|
|
36
|
+
summary: z.string().min(1),
|
|
37
|
+
notableLibraries: z.array(z.string().min(1)).default([]),
|
|
38
|
+
investigationTips: z.array(z.string().min(1)).default([]),
|
|
39
|
+
risks: z.array(z.string().min(1)).default([]),
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
export const artifactSummarySchema = z.object({
|
|
43
|
+
url: z.string().url(),
|
|
44
|
+
type: artifactTypeSchema,
|
|
45
|
+
chunkCount: z.number().int().nonnegative(),
|
|
46
|
+
summary: z.string().min(1),
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
export const finalAnalysisSchema = z.object({
|
|
50
|
+
overview: z.string().min(1),
|
|
51
|
+
entryPoints: z.array(entryPointSchema).default([]),
|
|
52
|
+
initializationFlow: z.array(z.string().min(1)).default([]),
|
|
53
|
+
callGraph: z.array(callGraphEdgeSchema).default([]),
|
|
54
|
+
restoredNames: z.array(renamedSymbolSchema).default([]),
|
|
55
|
+
notableLibraries: z.array(z.string().min(1)).default([]),
|
|
56
|
+
investigationTips: z.array(z.string().min(1)).default([]),
|
|
57
|
+
risks: z.array(z.string().min(1)).default([]),
|
|
58
|
+
artifactSummaries: z.array(artifactSummarySchema),
|
|
59
|
+
analyzedChunkCount: z.number().int().nonnegative(),
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
export type BundleAnalysis = z.infer<typeof finalAnalysisSchema>;
|
|
63
|
+
export type AgentMemo = z.infer<typeof agentMemoSchema>;
|
|
64
|
+
export type ChunkAnalysis = z.infer<typeof chunkAnalysisSchema>;
|
|
65
|
+
export type ArtifactSummary = z.infer<typeof artifactSummarySchema>;
|
|
66
|
+
|
|
67
|
+
function deduplicate<T>(items: T[], keySelector: (item: T) => string): T[] {
|
|
68
|
+
const seen = new Set<string>();
|
|
69
|
+
const deduplicated: T[] = [];
|
|
70
|
+
|
|
71
|
+
for (const item of items) {
|
|
72
|
+
const key = keySelector(item);
|
|
73
|
+
if (seen.has(key)) {
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
seen.add(key);
|
|
78
|
+
deduplicated.push(item);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return deduplicated;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export function buildAnalysisSnapshot(input: {
|
|
85
|
+
overview: string;
|
|
86
|
+
artifactSummaries?: ArtifactSummary[];
|
|
87
|
+
chunkAnalyses?: ChunkAnalysis[];
|
|
88
|
+
}): BundleAnalysis {
|
|
89
|
+
const artifactSummaries = input.artifactSummaries ?? [];
|
|
90
|
+
const chunkAnalyses = input.chunkAnalyses ?? [];
|
|
91
|
+
|
|
92
|
+
return finalAnalysisSchema.parse({
|
|
93
|
+
overview: input.overview,
|
|
94
|
+
entryPoints: deduplicate(
|
|
95
|
+
chunkAnalyses.flatMap((analysis) => analysis.entryPoints),
|
|
96
|
+
(entryPoint) => `${entryPoint.symbol}:${entryPoint.description}`,
|
|
97
|
+
),
|
|
98
|
+
initializationFlow: deduplicate(
|
|
99
|
+
chunkAnalyses.flatMap((analysis) => analysis.initializationFlow),
|
|
100
|
+
(step) => step,
|
|
101
|
+
),
|
|
102
|
+
callGraph: deduplicate(
|
|
103
|
+
chunkAnalyses.flatMap((analysis) => analysis.callGraph),
|
|
104
|
+
(edge) => `${edge.caller}->${edge.callee}`,
|
|
105
|
+
),
|
|
106
|
+
restoredNames: deduplicate(
|
|
107
|
+
chunkAnalyses.flatMap((analysis) => analysis.restoredNames),
|
|
108
|
+
(entry) => `${entry.originalName}:${entry.suggestedName}`,
|
|
109
|
+
),
|
|
110
|
+
notableLibraries: deduplicate(
|
|
111
|
+
chunkAnalyses.flatMap((analysis) => analysis.notableLibraries),
|
|
112
|
+
(library) => library,
|
|
113
|
+
),
|
|
114
|
+
investigationTips: deduplicate(
|
|
115
|
+
chunkAnalyses.flatMap((analysis) => analysis.investigationTips),
|
|
116
|
+
(tip) => tip,
|
|
117
|
+
),
|
|
118
|
+
risks: deduplicate(
|
|
119
|
+
chunkAnalyses.flatMap((analysis) => analysis.risks),
|
|
120
|
+
(risk) => risk,
|
|
121
|
+
),
|
|
122
|
+
artifactSummaries,
|
|
123
|
+
analyzedChunkCount: chunkAnalyses.length,
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
export class PartialAnalysisError extends Error {
|
|
128
|
+
public readonly partialAnalysis: BundleAnalysis;
|
|
129
|
+
|
|
130
|
+
public constructor(message: string, partialAnalysis: BundleAnalysis) {
|
|
131
|
+
super(message);
|
|
132
|
+
this.name = "PartialAnalysisError";
|
|
133
|
+
this.partialAnalysis = partialAnalysis;
|
|
134
|
+
}
|
|
135
|
+
}
|
package/lib/artifacts.ts
CHANGED
|
@@ -6,9 +6,15 @@ export const artifactTypeSchema = z.enum([
|
|
|
6
6
|
"script",
|
|
7
7
|
"service-worker",
|
|
8
8
|
"worker",
|
|
9
|
-
"
|
|
10
|
-
"
|
|
11
|
-
|
|
9
|
+
"source-map",
|
|
10
|
+
"wasm",
|
|
11
|
+
]);
|
|
12
|
+
|
|
13
|
+
export const analyzableArtifactTypeSchema = z.enum([
|
|
14
|
+
"script",
|
|
15
|
+
"service-worker",
|
|
16
|
+
"worker",
|
|
17
|
+
"source-map",
|
|
12
18
|
"wasm",
|
|
13
19
|
]);
|
|
14
20
|
|
|
@@ -30,6 +36,11 @@ export type ArtifactType = z.infer<typeof artifactTypeSchema>;
|
|
|
30
36
|
export type DiscoveredArtifact = z.infer<typeof discoveredArtifactSchema>;
|
|
31
37
|
export type ArtifactCandidate = z.infer<typeof artifactCandidateSchema>;
|
|
32
38
|
|
|
39
|
+
const binaryOrVisualAssetPattern =
|
|
40
|
+
/\.(?:png|jpe?g|gif|webp|avif|svg|ico|bmp|tiff?|mp4|webm|mov|avi|mp3|wav|ogg|flac|aac|m4a|pdf|zip|gz|tar|7z|rar|woff2?|ttf|otf|eot)(?:$|[?#])/i;
|
|
41
|
+
const ignoredContentTypePattern =
|
|
42
|
+
/^(?:image\/|audio\/|video\/|font\/|application\/(?:font|octet-stream|pdf|zip|gzip|x-font|vnd\.ms-fontobject))/i;
|
|
43
|
+
|
|
33
44
|
function makeCandidate(url: string, type: ArtifactType, discoveredFrom: string): ArtifactCandidate | null {
|
|
34
45
|
const parsed = artifactCandidateSchema.safeParse({ url, type, discoveredFrom });
|
|
35
46
|
return parsed.success ? parsed.data : null;
|
|
@@ -61,6 +72,10 @@ function resolveCandidateUrl(reference: string, baseUrl: string): string | null
|
|
|
61
72
|
|
|
62
73
|
try {
|
|
63
74
|
const absoluteUrl = new URL(reference, baseUrl).toString();
|
|
75
|
+
if (binaryOrVisualAssetPattern.test(new URL(absoluteUrl).pathname)) {
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
|
|
64
79
|
const parsed = z.string().url().safeParse(absoluteUrl);
|
|
65
80
|
return parsed.success ? parsed.data : null;
|
|
66
81
|
} catch {
|
|
@@ -75,16 +90,8 @@ function inferAssetTypeFromUrl(url: string, fallback: ArtifactType = "script"):
|
|
|
75
90
|
return "wasm";
|
|
76
91
|
}
|
|
77
92
|
|
|
78
|
-
if (pathname.endsWith(".
|
|
79
|
-
return "
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
if (pathname.endsWith(".json")) {
|
|
83
|
-
return "json";
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
if (pathname.endsWith(".webmanifest") || pathname.endsWith("manifest.json")) {
|
|
87
|
-
return "manifest";
|
|
93
|
+
if (pathname.endsWith(".map")) {
|
|
94
|
+
return "source-map";
|
|
88
95
|
}
|
|
89
96
|
|
|
90
97
|
if (pathname.endsWith(".html") || pathname.endsWith(".htm")) {
|
|
@@ -94,14 +101,34 @@ function inferAssetTypeFromUrl(url: string, fallback: ArtifactType = "script"):
|
|
|
94
101
|
return fallback;
|
|
95
102
|
}
|
|
96
103
|
|
|
104
|
+
function extractPageCandidate(reference: string, pageUrl: string, discoveredFrom: string): ArtifactCandidate | null {
|
|
105
|
+
const resolvedUrl = resolveCandidateUrl(reference, pageUrl);
|
|
106
|
+
if (!resolvedUrl) {
|
|
107
|
+
return null;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const pathname = new URL(resolvedUrl).pathname.toLowerCase();
|
|
111
|
+
const looksLikePage =
|
|
112
|
+
pathname === "" ||
|
|
113
|
+
pathname.endsWith("/") ||
|
|
114
|
+
pathname.endsWith(".html") ||
|
|
115
|
+
pathname.endsWith(".htm") ||
|
|
116
|
+
!/\.[a-z0-9]+$/i.test(pathname);
|
|
117
|
+
|
|
118
|
+
return looksLikePage ? makeCandidate(resolvedUrl, "html", discoveredFrom) : null;
|
|
119
|
+
}
|
|
120
|
+
|
|
97
121
|
function extractFromJavaScript(source: string, baseUrl: string, discoveredFrom: string): ArtifactCandidate[] {
|
|
98
122
|
const candidates = new Map<string, ArtifactCandidate>();
|
|
99
123
|
const regexDefinitions: Array<{ regex: RegExp; type: ArtifactType }> = [
|
|
100
124
|
{ regex: /(?:import|export)\s+(?:[^"'`]+?\s+from\s+)?["'`]([^"'`]+)["'`]/g, type: "script" },
|
|
101
125
|
{ regex: /import\(\s*["'`]([^"'`]+)["'`]\s*\)/g, type: "script" },
|
|
126
|
+
{ regex: /importScripts\(\s*["'`]([^"'`]+)["'`]\s*\)/g, type: "script" },
|
|
102
127
|
{ regex: /navigator\.serviceWorker\.register\(\s*(?:new\s+URL\(\s*)?["'`]([^"'`]+)["'`]/g, type: "service-worker" },
|
|
103
128
|
{ regex: /new\s+(?:SharedWorker|Worker)\(\s*(?:new\s+URL\(\s*)?["'`]([^"'`]+)["'`]/g, type: "worker" },
|
|
104
|
-
{ regex: /["'`]([^"'`]
|
|
129
|
+
{ regex: /new\s+URL\(\s*["'`]([^"'`]+)["'`]\s*,\s*import\.meta\.url\s*\)/g, type: "script" },
|
|
130
|
+
{ regex: /["'`]([^"'`]+\.(?:m?js|cjs|wasm|map)(?:\?[^"'`]*)?)["'`]/g, type: "script" },
|
|
131
|
+
{ regex: /[@#]\s*sourceMappingURL=([^\s]+)/g, type: "source-map" },
|
|
105
132
|
];
|
|
106
133
|
|
|
107
134
|
for (const definition of regexDefinitions) {
|
|
@@ -124,31 +151,12 @@ function extractFromJavaScript(source: string, baseUrl: string, discoveredFrom:
|
|
|
124
151
|
return [...candidates.values()];
|
|
125
152
|
}
|
|
126
153
|
|
|
127
|
-
function
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
/@import\s+(?:url\()?["']?([^"'()]+)["']?\)?/g,
|
|
131
|
-
/url\(\s*["']?([^"'()]+)["']?\s*\)/g,
|
|
132
|
-
];
|
|
133
|
-
|
|
134
|
-
for (const regex of regexDefinitions) {
|
|
135
|
-
let match: RegExpExecArray | null;
|
|
136
|
-
while ((match = regex.exec(source)) !== null) {
|
|
137
|
-
const resolvedUrl = resolveCandidateUrl(match[1] ?? "", baseUrl);
|
|
138
|
-
if (!resolvedUrl) {
|
|
139
|
-
continue;
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
addCandidate(
|
|
143
|
-
candidates,
|
|
144
|
-
makeCandidate(resolvedUrl, inferAssetTypeFromUrl(resolvedUrl, "stylesheet"), discoveredFrom),
|
|
145
|
-
false,
|
|
146
|
-
new URL(baseUrl).origin,
|
|
147
|
-
);
|
|
148
|
-
}
|
|
149
|
-
}
|
|
154
|
+
export function isAnalyzableArtifactType(type: ArtifactType): type is z.infer<typeof analyzableArtifactTypeSchema> {
|
|
155
|
+
return analyzableArtifactTypeSchema.safeParse(type).success;
|
|
156
|
+
}
|
|
150
157
|
|
|
151
|
-
|
|
158
|
+
export function isIgnoredContentType(contentType: string): boolean {
|
|
159
|
+
return ignoredContentTypePattern.test(contentType.trim().toLowerCase());
|
|
152
160
|
}
|
|
153
161
|
|
|
154
162
|
export function extractArtifactCandidates(html: string, pageUrl: string): ArtifactCandidate[] {
|
|
@@ -170,40 +178,15 @@ export function extractArtifactCandidates(html: string, pageUrl: string): Artifa
|
|
|
170
178
|
const rel = ($(element).attr("rel") ?? "").toLowerCase();
|
|
171
179
|
const asValue = ($(element).attr("as") ?? "").toLowerCase();
|
|
172
180
|
|
|
173
|
-
if (rel.includes("manifest")) {
|
|
174
|
-
addCandidate(candidates, makeCandidate(href, "manifest", "html:manifest"), false, origin);
|
|
175
|
-
return;
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
if (rel.includes("stylesheet")) {
|
|
179
|
-
addCandidate(candidates, makeCandidate(href, "stylesheet", "html:stylesheet"), false, origin);
|
|
180
|
-
return;
|
|
181
|
-
}
|
|
182
|
-
|
|
183
181
|
if (rel.includes("modulepreload") || (rel.includes("preload") && asValue === "script")) {
|
|
184
|
-
addCandidate(candidates, makeCandidate(href, "script", "html:preload"), false, origin);
|
|
182
|
+
addCandidate(candidates, makeCandidate(href, inferAssetTypeFromUrl(href, "script"), "html:preload"), false, origin);
|
|
185
183
|
}
|
|
186
184
|
});
|
|
187
185
|
|
|
188
|
-
$("a[href]").each((_, element) => {
|
|
189
|
-
const
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
const url = new URL(href);
|
|
195
|
-
const isSameOrigin = url.origin === origin;
|
|
196
|
-
const pathname = url.pathname.toLowerCase();
|
|
197
|
-
const looksLikePage =
|
|
198
|
-
pathname === "" ||
|
|
199
|
-
pathname.endsWith("/") ||
|
|
200
|
-
pathname.endsWith(".html") ||
|
|
201
|
-
pathname.endsWith(".htm") ||
|
|
202
|
-
!/\.[a-z0-9]+$/i.test(pathname);
|
|
203
|
-
|
|
204
|
-
if (isSameOrigin && looksLikePage) {
|
|
205
|
-
addCandidate(candidates, makeCandidate(href, "html", "html:anchor"), true, origin);
|
|
206
|
-
}
|
|
186
|
+
$("a[href], iframe[src], form[action]").each((_, element) => {
|
|
187
|
+
const attributeName = element.tagName === "iframe" ? "src" : element.tagName === "form" ? "action" : "href";
|
|
188
|
+
const pageCandidate = extractPageCandidate($(element).attr(attributeName)?.trim() ?? "", pageUrl, `html:${element.tagName}`);
|
|
189
|
+
addCandidate(candidates, pageCandidate, true, origin);
|
|
207
190
|
});
|
|
208
191
|
|
|
209
192
|
$("script:not([src])").each((_, element) => {
|
|
@@ -221,13 +204,14 @@ export function extractNestedCandidates(artifact: DiscoveredArtifact): ArtifactC
|
|
|
221
204
|
return extractArtifactCandidates(artifact.content, artifact.url);
|
|
222
205
|
}
|
|
223
206
|
|
|
224
|
-
if (
|
|
207
|
+
if (
|
|
208
|
+
artifact.type === "script" ||
|
|
209
|
+
artifact.type === "service-worker" ||
|
|
210
|
+
artifact.type === "worker" ||
|
|
211
|
+
artifact.type === "source-map"
|
|
212
|
+
) {
|
|
225
213
|
return extractFromJavaScript(artifact.content, artifact.url, `${artifact.type}:code`);
|
|
226
214
|
}
|
|
227
215
|
|
|
228
|
-
if (artifact.type === "stylesheet") {
|
|
229
|
-
return extractFromCss(artifact.content, artifact.url, "stylesheet:code");
|
|
230
|
-
}
|
|
231
|
-
|
|
232
216
|
return [];
|
|
233
217
|
}
|
package/lib/cli-args.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
|
|
3
|
-
import { providerTypeSchema } from "./provider";
|
|
3
|
+
import { getProviderPreset, providerPresetSchema, providerTypeSchema } from "./provider";
|
|
4
4
|
|
|
5
5
|
const rawCliArgsSchema = z.object({
|
|
6
6
|
help: z.boolean().default(false),
|
|
@@ -13,6 +13,7 @@ const rawCliArgsSchema = z.object({
|
|
|
13
13
|
url: z.string().url().optional(),
|
|
14
14
|
output: z.string().min(1).optional(),
|
|
15
15
|
providerType: providerTypeSchema.optional(),
|
|
16
|
+
providerPreset: providerPresetSchema.optional(),
|
|
16
17
|
providerName: z.string().min(1).optional(),
|
|
17
18
|
apiKey: z.string().min(1).optional(),
|
|
18
19
|
baseURL: z.string().url().optional(),
|
|
@@ -20,11 +21,13 @@ const rawCliArgsSchema = z.object({
|
|
|
20
21
|
contextSize: z.number().int().positive().optional(),
|
|
21
22
|
maxPages: z.number().int().positive().optional(),
|
|
22
23
|
maxArtifacts: z.number().int().positive().optional(),
|
|
24
|
+
maxDepth: z.number().int().nonnegative().optional(),
|
|
23
25
|
});
|
|
24
26
|
|
|
25
27
|
const cliConfigOverrideSchema = z
|
|
26
28
|
.object({
|
|
27
29
|
providerType: providerTypeSchema.optional(),
|
|
30
|
+
providerPreset: providerPresetSchema.optional(),
|
|
28
31
|
providerName: z.string().min(1).optional(),
|
|
29
32
|
apiKey: z.string().min(1).optional(),
|
|
30
33
|
baseURL: z.string().url().optional(),
|
|
@@ -49,6 +52,7 @@ const optionMap = new Map<string, keyof CliArgs>([
|
|
|
49
52
|
["-u", "url"],
|
|
50
53
|
["--output", "output"],
|
|
51
54
|
["--provider-type", "providerType"],
|
|
55
|
+
["--provider-preset", "providerPreset"],
|
|
52
56
|
["--provider-name", "providerName"],
|
|
53
57
|
["--api-key", "apiKey"],
|
|
54
58
|
["--base-url", "baseURL"],
|
|
@@ -56,10 +60,11 @@ const optionMap = new Map<string, keyof CliArgs>([
|
|
|
56
60
|
["--context-size", "contextSize"],
|
|
57
61
|
["--max-pages", "maxPages"],
|
|
58
62
|
["--max-artifacts", "maxArtifacts"],
|
|
63
|
+
["--max-depth", "maxDepth"],
|
|
59
64
|
]);
|
|
60
65
|
|
|
61
66
|
const booleanKeys = new Set<keyof CliArgs>(["help", "version", "headless", "reconfigure", "listModels", "localRag", "verboseAgents"]);
|
|
62
|
-
const numberKeys = new Set<keyof CliArgs>(["contextSize", "maxPages", "maxArtifacts"]);
|
|
67
|
+
const numberKeys = new Set<keyof CliArgs>(["contextSize", "maxPages", "maxArtifacts", "maxDepth"]);
|
|
63
68
|
|
|
64
69
|
function normalizeValue(key: keyof CliArgs, value: string): unknown {
|
|
65
70
|
if (numberKeys.has(key)) {
|
|
@@ -110,6 +115,13 @@ export function getConfigOverrides(args: CliArgs) {
|
|
|
110
115
|
const overrides: Record<string, unknown> = {};
|
|
111
116
|
|
|
112
117
|
if (args.providerType !== undefined) overrides.providerType = args.providerType;
|
|
118
|
+
if (args.providerPreset !== undefined) {
|
|
119
|
+
const preset = getProviderPreset(args.providerPreset);
|
|
120
|
+
overrides.providerType = "openai-compatible";
|
|
121
|
+
overrides.providerPreset = args.providerPreset;
|
|
122
|
+
overrides.providerName = args.providerName ?? preset.providerName;
|
|
123
|
+
overrides.baseURL = args.baseURL ?? preset.baseURL;
|
|
124
|
+
}
|
|
113
125
|
if (args.providerName !== undefined) overrides.providerName = args.providerName;
|
|
114
126
|
if (args.apiKey !== undefined) overrides.apiKey = args.apiKey;
|
|
115
127
|
if (args.baseURL !== undefined) overrides.baseURL = args.baseURL;
|
|
@@ -132,9 +144,11 @@ export function renderHelpText(): string {
|
|
|
132
144
|
" --output <path> Write the report to a specific path",
|
|
133
145
|
" --max-pages <number> Limit same-origin HTML pages to crawl",
|
|
134
146
|
" --max-artifacts <number> Limit total downloaded artifacts",
|
|
147
|
+
" --max-depth <number> Limit crawl hop depth from the entry page",
|
|
135
148
|
"",
|
|
136
149
|
"Provider options:",
|
|
137
150
|
" --provider-type <type> openai | openai-compatible",
|
|
151
|
+
" --provider-preset <preset> custom | blackbox | nvidia-nim | onlysq",
|
|
138
152
|
" --provider-name <name> Display name for the provider",
|
|
139
153
|
" --api-key <key> Provider API key",
|
|
140
154
|
" --base-url <url> Base URL for the provider",
|