@redstone-md/mapr 0.0.1-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +45 -0
- package/README.md +109 -0
- package/bin/mapr +2 -0
- package/index.ts +247 -0
- package/lib/ai-analyzer.ts +598 -0
- package/lib/artifacts.ts +233 -0
- package/lib/cli-args.ts +152 -0
- package/lib/config.ts +385 -0
- package/lib/formatter.ts +109 -0
- package/lib/local-rag.ts +104 -0
- package/lib/progress.ts +10 -0
- package/lib/provider.ts +85 -0
- package/lib/reporter.ts +213 -0
- package/lib/scraper.ts +169 -0
- package/lib/swarm-prompts.ts +56 -0
- package/lib/wasm.ts +62 -0
- package/package.json +62 -0
package/lib/artifacts.ts
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
import * as cheerio from "cheerio";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
|
|
4
|
+
export const artifactTypeSchema = z.enum([
|
|
5
|
+
"html",
|
|
6
|
+
"script",
|
|
7
|
+
"service-worker",
|
|
8
|
+
"worker",
|
|
9
|
+
"stylesheet",
|
|
10
|
+
"manifest",
|
|
11
|
+
"json",
|
|
12
|
+
"wasm",
|
|
13
|
+
]);
|
|
14
|
+
|
|
15
|
+
export const discoveredArtifactSchema = z.object({
|
|
16
|
+
url: z.string().url(),
|
|
17
|
+
type: artifactTypeSchema,
|
|
18
|
+
sizeBytes: z.number().int().nonnegative(),
|
|
19
|
+
content: z.string(),
|
|
20
|
+
discoveredFrom: z.string().min(1),
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
export const artifactCandidateSchema = z.object({
|
|
24
|
+
url: z.string().url(),
|
|
25
|
+
type: artifactTypeSchema,
|
|
26
|
+
discoveredFrom: z.string().min(1),
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
export type ArtifactType = z.infer<typeof artifactTypeSchema>;
|
|
30
|
+
export type DiscoveredArtifact = z.infer<typeof discoveredArtifactSchema>;
|
|
31
|
+
export type ArtifactCandidate = z.infer<typeof artifactCandidateSchema>;
|
|
32
|
+
|
|
33
|
+
function makeCandidate(url: string, type: ArtifactType, discoveredFrom: string): ArtifactCandidate | null {
|
|
34
|
+
const parsed = artifactCandidateSchema.safeParse({ url, type, discoveredFrom });
|
|
35
|
+
return parsed.success ? parsed.data : null;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function addCandidate(
|
|
39
|
+
candidates: Map<string, ArtifactCandidate>,
|
|
40
|
+
candidate: ArtifactCandidate | null,
|
|
41
|
+
restrictToSameOrigin: boolean,
|
|
42
|
+
origin: string,
|
|
43
|
+
): void {
|
|
44
|
+
if (!candidate) {
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if (restrictToSameOrigin && new URL(candidate.url).origin !== origin) {
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (!candidates.has(candidate.url)) {
|
|
53
|
+
candidates.set(candidate.url, candidate);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function resolveCandidateUrl(reference: string, baseUrl: string): string | null {
|
|
58
|
+
if (!reference || reference.startsWith("data:") || reference.startsWith("blob:") || reference.startsWith("#")) {
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
try {
|
|
63
|
+
const absoluteUrl = new URL(reference, baseUrl).toString();
|
|
64
|
+
const parsed = z.string().url().safeParse(absoluteUrl);
|
|
65
|
+
return parsed.success ? parsed.data : null;
|
|
66
|
+
} catch {
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function inferAssetTypeFromUrl(url: string, fallback: ArtifactType = "script"): ArtifactType {
|
|
72
|
+
const pathname = new URL(url).pathname.toLowerCase();
|
|
73
|
+
|
|
74
|
+
if (pathname.endsWith(".wasm")) {
|
|
75
|
+
return "wasm";
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (pathname.endsWith(".css")) {
|
|
79
|
+
return "stylesheet";
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (pathname.endsWith(".json")) {
|
|
83
|
+
return "json";
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (pathname.endsWith(".webmanifest") || pathname.endsWith("manifest.json")) {
|
|
87
|
+
return "manifest";
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (pathname.endsWith(".html") || pathname.endsWith(".htm")) {
|
|
91
|
+
return "html";
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return fallback;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function extractFromJavaScript(source: string, baseUrl: string, discoveredFrom: string): ArtifactCandidate[] {
|
|
98
|
+
const candidates = new Map<string, ArtifactCandidate>();
|
|
99
|
+
const regexDefinitions: Array<{ regex: RegExp; type: ArtifactType }> = [
|
|
100
|
+
{ regex: /(?:import|export)\s+(?:[^"'`]+?\s+from\s+)?["'`]([^"'`]+)["'`]/g, type: "script" },
|
|
101
|
+
{ regex: /import\(\s*["'`]([^"'`]+)["'`]\s*\)/g, type: "script" },
|
|
102
|
+
{ regex: /navigator\.serviceWorker\.register\(\s*(?:new\s+URL\(\s*)?["'`]([^"'`]+)["'`]/g, type: "service-worker" },
|
|
103
|
+
{ regex: /new\s+(?:SharedWorker|Worker)\(\s*(?:new\s+URL\(\s*)?["'`]([^"'`]+)["'`]/g, type: "worker" },
|
|
104
|
+
{ regex: /["'`]([^"'`]+\.wasm(?:\?[^"'`]*)?)["'`]/g, type: "wasm" },
|
|
105
|
+
];
|
|
106
|
+
|
|
107
|
+
for (const definition of regexDefinitions) {
|
|
108
|
+
let match: RegExpExecArray | null;
|
|
109
|
+
while ((match = definition.regex.exec(source)) !== null) {
|
|
110
|
+
const resolvedUrl = resolveCandidateUrl(match[1] ?? "", baseUrl);
|
|
111
|
+
if (!resolvedUrl) {
|
|
112
|
+
continue;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
addCandidate(
|
|
116
|
+
candidates,
|
|
117
|
+
makeCandidate(resolvedUrl, inferAssetTypeFromUrl(resolvedUrl, definition.type), discoveredFrom),
|
|
118
|
+
false,
|
|
119
|
+
new URL(baseUrl).origin,
|
|
120
|
+
);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return [...candidates.values()];
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function extractFromCss(source: string, baseUrl: string, discoveredFrom: string): ArtifactCandidate[] {
|
|
128
|
+
const candidates = new Map<string, ArtifactCandidate>();
|
|
129
|
+
const regexDefinitions = [
|
|
130
|
+
/@import\s+(?:url\()?["']?([^"'()]+)["']?\)?/g,
|
|
131
|
+
/url\(\s*["']?([^"'()]+)["']?\s*\)/g,
|
|
132
|
+
];
|
|
133
|
+
|
|
134
|
+
for (const regex of regexDefinitions) {
|
|
135
|
+
let match: RegExpExecArray | null;
|
|
136
|
+
while ((match = regex.exec(source)) !== null) {
|
|
137
|
+
const resolvedUrl = resolveCandidateUrl(match[1] ?? "", baseUrl);
|
|
138
|
+
if (!resolvedUrl) {
|
|
139
|
+
continue;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
addCandidate(
|
|
143
|
+
candidates,
|
|
144
|
+
makeCandidate(resolvedUrl, inferAssetTypeFromUrl(resolvedUrl, "stylesheet"), discoveredFrom),
|
|
145
|
+
false,
|
|
146
|
+
new URL(baseUrl).origin,
|
|
147
|
+
);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
return [...candidates.values()];
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
export function extractArtifactCandidates(html: string, pageUrl: string): ArtifactCandidate[] {
|
|
155
|
+
const $ = cheerio.load(html);
|
|
156
|
+
const candidates = new Map<string, ArtifactCandidate>();
|
|
157
|
+
const origin = new URL(pageUrl).origin;
|
|
158
|
+
|
|
159
|
+
$("script[src]").each((_, element) => {
|
|
160
|
+
const src = resolveCandidateUrl($(element).attr("src")?.trim() ?? "", pageUrl);
|
|
161
|
+
addCandidate(candidates, makeCandidate(src ?? "", "script", "html:script"), false, origin);
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
$("link[href]").each((_, element) => {
|
|
165
|
+
const href = resolveCandidateUrl($(element).attr("href")?.trim() ?? "", pageUrl);
|
|
166
|
+
if (!href) {
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const rel = ($(element).attr("rel") ?? "").toLowerCase();
|
|
171
|
+
const asValue = ($(element).attr("as") ?? "").toLowerCase();
|
|
172
|
+
|
|
173
|
+
if (rel.includes("manifest")) {
|
|
174
|
+
addCandidate(candidates, makeCandidate(href, "manifest", "html:manifest"), false, origin);
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if (rel.includes("stylesheet")) {
|
|
179
|
+
addCandidate(candidates, makeCandidate(href, "stylesheet", "html:stylesheet"), false, origin);
|
|
180
|
+
return;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
if (rel.includes("modulepreload") || (rel.includes("preload") && asValue === "script")) {
|
|
184
|
+
addCandidate(candidates, makeCandidate(href, "script", "html:preload"), false, origin);
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
$("a[href]").each((_, element) => {
|
|
189
|
+
const href = resolveCandidateUrl($(element).attr("href")?.trim() ?? "", pageUrl);
|
|
190
|
+
if (!href) {
|
|
191
|
+
return;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const url = new URL(href);
|
|
195
|
+
const isSameOrigin = url.origin === origin;
|
|
196
|
+
const pathname = url.pathname.toLowerCase();
|
|
197
|
+
const looksLikePage =
|
|
198
|
+
pathname === "" ||
|
|
199
|
+
pathname.endsWith("/") ||
|
|
200
|
+
pathname.endsWith(".html") ||
|
|
201
|
+
pathname.endsWith(".htm") ||
|
|
202
|
+
!/\.[a-z0-9]+$/i.test(pathname);
|
|
203
|
+
|
|
204
|
+
if (isSameOrigin && looksLikePage) {
|
|
205
|
+
addCandidate(candidates, makeCandidate(href, "html", "html:anchor"), true, origin);
|
|
206
|
+
}
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
$("script:not([src])").each((_, element) => {
|
|
210
|
+
const inlineSource = $(element).html() ?? "";
|
|
211
|
+
for (const candidate of extractFromJavaScript(inlineSource, pageUrl, "html:inline-script")) {
|
|
212
|
+
addCandidate(candidates, candidate, false, origin);
|
|
213
|
+
}
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
return [...candidates.values()];
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
export function extractNestedCandidates(artifact: DiscoveredArtifact): ArtifactCandidate[] {
|
|
220
|
+
if (artifact.type === "html") {
|
|
221
|
+
return extractArtifactCandidates(artifact.content, artifact.url);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
if (artifact.type === "script" || artifact.type === "service-worker" || artifact.type === "worker") {
|
|
225
|
+
return extractFromJavaScript(artifact.content, artifact.url, `${artifact.type}:code`);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
if (artifact.type === "stylesheet") {
|
|
229
|
+
return extractFromCss(artifact.content, artifact.url, "stylesheet:code");
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
return [];
|
|
233
|
+
}
|
package/lib/cli-args.ts
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
|
|
3
|
+
import { providerTypeSchema } from "./provider";
|
|
4
|
+
|
|
5
|
+
const rawCliArgsSchema = z.object({
|
|
6
|
+
help: z.boolean().default(false),
|
|
7
|
+
version: z.boolean().default(false),
|
|
8
|
+
headless: z.boolean().default(false),
|
|
9
|
+
reconfigure: z.boolean().default(false),
|
|
10
|
+
listModels: z.boolean().default(false),
|
|
11
|
+
localRag: z.boolean().default(false),
|
|
12
|
+
verboseAgents: z.boolean().default(false),
|
|
13
|
+
url: z.string().url().optional(),
|
|
14
|
+
output: z.string().min(1).optional(),
|
|
15
|
+
providerType: providerTypeSchema.optional(),
|
|
16
|
+
providerName: z.string().min(1).optional(),
|
|
17
|
+
apiKey: z.string().min(1).optional(),
|
|
18
|
+
baseURL: z.string().url().optional(),
|
|
19
|
+
model: z.string().min(1).optional(),
|
|
20
|
+
contextSize: z.number().int().positive().optional(),
|
|
21
|
+
maxPages: z.number().int().positive().optional(),
|
|
22
|
+
maxArtifacts: z.number().int().positive().optional(),
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
const cliConfigOverrideSchema = z
|
|
26
|
+
.object({
|
|
27
|
+
providerType: providerTypeSchema.optional(),
|
|
28
|
+
providerName: z.string().min(1).optional(),
|
|
29
|
+
apiKey: z.string().min(1).optional(),
|
|
30
|
+
baseURL: z.string().url().optional(),
|
|
31
|
+
model: z.string().min(1).optional(),
|
|
32
|
+
modelContextSize: z.number().int().positive().optional(),
|
|
33
|
+
})
|
|
34
|
+
.strict();
|
|
35
|
+
|
|
36
|
+
export type CliArgs = z.infer<typeof rawCliArgsSchema>;
|
|
37
|
+
|
|
38
|
+
const optionMap = new Map<string, keyof CliArgs>([
|
|
39
|
+
["--help", "help"],
|
|
40
|
+
["-h", "help"],
|
|
41
|
+
["--version", "version"],
|
|
42
|
+
["-v", "version"],
|
|
43
|
+
["--headless", "headless"],
|
|
44
|
+
["--reconfigure", "reconfigure"],
|
|
45
|
+
["--list-models", "listModels"],
|
|
46
|
+
["--local-rag", "localRag"],
|
|
47
|
+
["--verbose-agents", "verboseAgents"],
|
|
48
|
+
["--url", "url"],
|
|
49
|
+
["-u", "url"],
|
|
50
|
+
["--output", "output"],
|
|
51
|
+
["--provider-type", "providerType"],
|
|
52
|
+
["--provider-name", "providerName"],
|
|
53
|
+
["--api-key", "apiKey"],
|
|
54
|
+
["--base-url", "baseURL"],
|
|
55
|
+
["--model", "model"],
|
|
56
|
+
["--context-size", "contextSize"],
|
|
57
|
+
["--max-pages", "maxPages"],
|
|
58
|
+
["--max-artifacts", "maxArtifacts"],
|
|
59
|
+
]);
|
|
60
|
+
|
|
61
|
+
const booleanKeys = new Set<keyof CliArgs>(["help", "version", "headless", "reconfigure", "listModels", "localRag", "verboseAgents"]);
|
|
62
|
+
const numberKeys = new Set<keyof CliArgs>(["contextSize", "maxPages", "maxArtifacts"]);
|
|
63
|
+
|
|
64
|
+
function normalizeValue(key: keyof CliArgs, value: string): unknown {
|
|
65
|
+
if (numberKeys.has(key)) {
|
|
66
|
+
return Number(value);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return value;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export function parseCliArgs(argv: string[]): CliArgs {
|
|
73
|
+
const accumulator: Record<string, unknown> = {};
|
|
74
|
+
|
|
75
|
+
for (let index = 0; index < argv.length; index += 1) {
|
|
76
|
+
const token = argv[index];
|
|
77
|
+
if (!token) {
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
if (!token.startsWith("-")) {
|
|
81
|
+
throw new Error(`Unexpected positional argument: ${token}`);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const [rawKey, rawInlineValue] = token.includes("=") ? token.split(/=(.*)/s, 2) : [token, undefined];
|
|
85
|
+
const mappedKey = optionMap.get(rawKey);
|
|
86
|
+
if (!mappedKey) {
|
|
87
|
+
throw new Error(`Unknown argument: ${rawKey}`);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (booleanKeys.has(mappedKey)) {
|
|
91
|
+
accumulator[mappedKey] = true;
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const value = rawInlineValue ?? argv[index + 1];
|
|
96
|
+
if (!value || value.startsWith("-")) {
|
|
97
|
+
throw new Error(`Argument ${rawKey} requires a value.`);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
accumulator[mappedKey] = normalizeValue(mappedKey, value);
|
|
101
|
+
if (rawInlineValue === undefined) {
|
|
102
|
+
index += 1;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return rawCliArgsSchema.parse(accumulator);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
export function getConfigOverrides(args: CliArgs) {
|
|
110
|
+
const overrides: Record<string, unknown> = {};
|
|
111
|
+
|
|
112
|
+
if (args.providerType !== undefined) overrides.providerType = args.providerType;
|
|
113
|
+
if (args.providerName !== undefined) overrides.providerName = args.providerName;
|
|
114
|
+
if (args.apiKey !== undefined) overrides.apiKey = args.apiKey;
|
|
115
|
+
if (args.baseURL !== undefined) overrides.baseURL = args.baseURL;
|
|
116
|
+
if (args.model !== undefined) overrides.model = args.model;
|
|
117
|
+
if (args.contextSize !== undefined) overrides.modelContextSize = args.contextSize;
|
|
118
|
+
|
|
119
|
+
return cliConfigOverrideSchema.parse(overrides);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
export function renderHelpText(): string {
|
|
123
|
+
return [
|
|
124
|
+
"Mapr",
|
|
125
|
+
"",
|
|
126
|
+
"Usage:",
|
|
127
|
+
" mapr [options]",
|
|
128
|
+
"",
|
|
129
|
+
"Core options:",
|
|
130
|
+
" --url, -u <url> Target URL to analyze",
|
|
131
|
+
" --headless Disable prompts and require config from saved values or flags",
|
|
132
|
+
" --output <path> Write the report to a specific path",
|
|
133
|
+
" --max-pages <number> Limit same-origin HTML pages to crawl",
|
|
134
|
+
" --max-artifacts <number> Limit total downloaded artifacts",
|
|
135
|
+
"",
|
|
136
|
+
"Provider options:",
|
|
137
|
+
" --provider-type <type> openai | openai-compatible",
|
|
138
|
+
" --provider-name <name> Display name for the provider",
|
|
139
|
+
" --api-key <key> Provider API key",
|
|
140
|
+
" --base-url <url> Base URL for the provider",
|
|
141
|
+
" --model <id> Model identifier",
|
|
142
|
+
" --context-size <tokens> Model context window, for example 128000 or 512000",
|
|
143
|
+
" --list-models Fetch and print models using the resolved provider config",
|
|
144
|
+
" --local-rag Enable local lexical RAG for oversized artifacts",
|
|
145
|
+
" --reconfigure Force interactive provider reconfiguration",
|
|
146
|
+
"",
|
|
147
|
+
"Output and diagnostics:",
|
|
148
|
+
" --verbose-agents Print agent swarm completion events",
|
|
149
|
+
" --help, -h Show help",
|
|
150
|
+
" --version, -v Show version",
|
|
151
|
+
].join("\n");
|
|
152
|
+
}
|