@redstone-md/mapr 0.0.2-alpha → 0.0.3-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/assets/banner.svg +50 -0
- package/lib/scraper.ts +89 -7
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# Mapr
|
|
2
2
|
|
|
3
|
+

|
|
4
|
+
|
|
3
5
|
Mapr is a Bun-native CLI/TUI for reverse-engineering frontend websites and build outputs. It crawls a target site, collects analyzable frontend artifacts, runs a multi-agent AI analysis pipeline over chunked code, and writes a Markdown report with entry points, initialization flow, inferred call graph edges, restored names, artifact summaries, and investigation tips.
|
|
4
6
|
|
|
5
7
|
This repository is public for source visibility and collaboration. The license remains source-available and restricted. Read the contribution and license sections before reusing or contributing to the codebase.
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
<svg width="1280" height="320" viewBox="0 0 1280 320" fill="none" xmlns="http://www.w3.org/2000/svg" role="img" aria-labelledby="title desc">
|
|
2
|
+
<title id="title">Mapr repository banner</title>
|
|
3
|
+
<desc id="desc">A dark technical banner showing Mapr's flow from crawl to artifacts to report.</desc>
|
|
4
|
+
<rect width="1280" height="320" fill="#111315"/>
|
|
5
|
+
<rect x="24" y="24" width="1232" height="272" rx="14" fill="#15181B" stroke="#2A2E33"/>
|
|
6
|
+
<rect x="56" y="56" width="360" height="208" rx="12" fill="#181C20" stroke="#2E3339"/>
|
|
7
|
+
<rect x="460" y="56" width="360" height="208" rx="12" fill="#181C20" stroke="#2E3339"/>
|
|
8
|
+
<rect x="864" y="56" width="360" height="208" rx="12" fill="#181C20" stroke="#2E3339"/>
|
|
9
|
+
|
|
10
|
+
<text x="80" y="88" fill="#EEE6DA" font-family="ui-monospace, SFMono-Regular, Menlo, Consolas, monospace" font-size="28" font-weight="700">mapr</text>
|
|
11
|
+
<text x="80" y="114" fill="#8E877C" font-family="ui-monospace, SFMono-Regular, Menlo, Consolas, monospace" font-size="14">frontend reverse engineering for bun</text>
|
|
12
|
+
|
|
13
|
+
<text x="80" y="154" fill="#C67A3B" font-family="ui-monospace, SFMono-Regular, Menlo, Consolas, monospace" font-size="13">crawl</text>
|
|
14
|
+
<text x="484" y="88" fill="#C67A3B" font-family="ui-monospace, SFMono-Regular, Menlo, Consolas, monospace" font-size="13">artifacts</text>
|
|
15
|
+
<text x="888" y="88" fill="#C67A3B" font-family="ui-monospace, SFMono-Regular, Menlo, Consolas, monospace" font-size="13">report</text>
|
|
16
|
+
|
|
17
|
+
<rect x="80" y="172" width="170" height="20" rx="6" fill="#22272C"/>
|
|
18
|
+
<rect x="80" y="204" width="240" height="20" rx="6" fill="#22272C"/>
|
|
19
|
+
<rect x="80" y="236" width="120" height="12" rx="6" fill="#7F6B52"/>
|
|
20
|
+
<rect x="210" y="236" width="112" height="12" rx="6" fill="#22272C"/>
|
|
21
|
+
<rect x="332" y="236" width="42" height="12" rx="6" fill="#22272C"/>
|
|
22
|
+
|
|
23
|
+
<rect x="500" y="114" width="88" height="36" rx="8" fill="#1F2428" stroke="#343A41"/>
|
|
24
|
+
<rect x="596" y="114" width="88" height="36" rx="8" fill="#1F2428" stroke="#343A41"/>
|
|
25
|
+
<rect x="692" y="114" width="88" height="36" rx="8" fill="#1F2428" stroke="#343A41"/>
|
|
26
|
+
<rect x="500" y="164" width="88" height="36" rx="8" fill="#1F2428" stroke="#343A41"/>
|
|
27
|
+
<rect x="596" y="164" width="88" height="36" rx="8" fill="#1F2428" stroke="#343A41"/>
|
|
28
|
+
<rect x="692" y="164" width="88" height="36" rx="8" fill="#1F2428" stroke="#343A41"/>
|
|
29
|
+
<text x="530" y="136" fill="#EEE6DA" font-family="ui-monospace, SFMono-Regular, Menlo, Consolas, monospace" font-size="14">js</text>
|
|
30
|
+
<text x="618" y="136" fill="#EEE6DA" font-family="ui-monospace, SFMono-Regular, Menlo, Consolas, monospace" font-size="14">sw</text>
|
|
31
|
+
<text x="705" y="136" fill="#EEE6DA" font-family="ui-monospace, SFMono-Regular, Menlo, Consolas, monospace" font-size="14">map</text>
|
|
32
|
+
<text x="522" y="186" fill="#EEE6DA" font-family="ui-monospace, SFMono-Regular, Menlo, Consolas, monospace" font-size="14">wasm</text>
|
|
33
|
+
<text x="607" y="186" fill="#EEE6DA" font-family="ui-monospace, SFMono-Regular, Menlo, Consolas, monospace" font-size="14">worker</text>
|
|
34
|
+
<text x="713" y="186" fill="#EEE6DA" font-family="ui-monospace, SFMono-Regular, Menlo, Consolas, monospace" font-size="14">rag</text>
|
|
35
|
+
|
|
36
|
+
<rect x="888" y="112" width="312" height="120" rx="10" fill="#1D2226" stroke="#343941"/>
|
|
37
|
+
<rect x="914" y="134" width="84" height="8" rx="4" fill="#C67A3B"/>
|
|
38
|
+
<rect x="914" y="156" width="182" height="8" rx="4" fill="#444B52"/>
|
|
39
|
+
<rect x="914" y="176" width="212" height="8" rx="4" fill="#444B52"/>
|
|
40
|
+
<rect x="914" y="196" width="168" height="8" rx="4" fill="#444B52"/>
|
|
41
|
+
<rect x="1108" y="156" width="66" height="52" rx="8" fill="#171B1E" stroke="#343941"/>
|
|
42
|
+
<path d="M1132 171H1150M1132 183H1148M1132 195H1144" stroke="#EEE6DA" stroke-width="2" stroke-linecap="round"/>
|
|
43
|
+
|
|
44
|
+
<path d="M392 160C414 160 426 160 446 160" stroke="#7F6B52" stroke-width="2.5" stroke-linecap="round"/>
|
|
45
|
+
<path d="M796 160C816 160 828 160 848 160" stroke="#7F6B52" stroke-width="2.5" stroke-linecap="round"/>
|
|
46
|
+
<path d="M438 160L426 154V166L438 160Z" fill="#C67A3B"/>
|
|
47
|
+
<path d="M840 160L828 154V166L840 160Z" fill="#C67A3B"/>
|
|
48
|
+
|
|
49
|
+
<text x="930" y="254" fill="#8E877C" font-family="ui-monospace, SFMono-Regular, Menlo, Consolas, monospace" font-size="12">crawl -> analyze -> report</text>
|
|
50
|
+
</svg>
|
package/lib/scraper.ts
CHANGED
|
@@ -37,6 +37,7 @@ export interface ScrapeResult {
|
|
|
37
37
|
type FetchLike = (input: string | URL | Request, init?: RequestInit) => Promise<Response>;
|
|
38
38
|
type NumericScraperOptions = z.input<typeof scraperOptionsSchema>;
|
|
39
39
|
type QueueEntry = { candidate: ArtifactCandidate; depth: number };
|
|
40
|
+
type CrawlScope = "site" | "page";
|
|
40
41
|
|
|
41
42
|
export interface ScraperProgressEvent {
|
|
42
43
|
message: string;
|
|
@@ -53,6 +54,11 @@ function isPageCandidate(candidate: ArtifactCandidate, rootOrigin: string): bool
|
|
|
53
54
|
return candidate.type === "html" && new URL(candidate.url).origin === rootOrigin;
|
|
54
55
|
}
|
|
55
56
|
|
|
57
|
+
function isRootLikeEntry(url: string): boolean {
|
|
58
|
+
const pathname = new URL(url).pathname.toLowerCase();
|
|
59
|
+
return pathname === "/" || pathname === "" || pathname.endsWith("/index.html") || pathname.endsWith("/index.htm");
|
|
60
|
+
}
|
|
61
|
+
|
|
56
62
|
function shouldFollowCandidate(candidate: ArtifactCandidate, rootOrigin: string): boolean {
|
|
57
63
|
if (candidate.type === "html") {
|
|
58
64
|
return new URL(candidate.url).origin === rootOrigin;
|
|
@@ -150,6 +156,7 @@ export class BundleScraper {
|
|
|
150
156
|
public async scrape(pageUrl: string): Promise<ScrapeResult> {
|
|
151
157
|
const validatedPageUrl = httpUrlSchema.parse(pageUrl);
|
|
152
158
|
const rootOrigin = new URL(validatedPageUrl).origin;
|
|
159
|
+
const crawlScope: CrawlScope = isRootLikeEntry(validatedPageUrl) ? "site" : "page";
|
|
153
160
|
const visitedUrls = new Set<string>();
|
|
154
161
|
const htmlPages = new Set<string>();
|
|
155
162
|
const artifacts: DiscoveredArtifact[] = [];
|
|
@@ -164,7 +171,9 @@ export class BundleScraper {
|
|
|
164
171
|
},
|
|
165
172
|
];
|
|
166
173
|
|
|
167
|
-
|
|
174
|
+
if (crawlScope === "site") {
|
|
175
|
+
queue.push(...(await this.discoverSupplementalPages(rootOrigin)).map((candidate) => ({ candidate, depth: 1 })));
|
|
176
|
+
}
|
|
168
177
|
|
|
169
178
|
while (queue.length > 0) {
|
|
170
179
|
if (artifacts.length >= this.options.maxArtifacts) {
|
|
@@ -204,7 +213,7 @@ export class BundleScraper {
|
|
|
204
213
|
depth,
|
|
205
214
|
});
|
|
206
215
|
|
|
207
|
-
const artifact = await this.fetchArtifact(candidate, depth);
|
|
216
|
+
const artifact = await this.fetchArtifact(candidate, depth, candidate.url === validatedPageUrl);
|
|
208
217
|
if (!artifact) {
|
|
209
218
|
continue;
|
|
210
219
|
}
|
|
@@ -217,7 +226,7 @@ export class BundleScraper {
|
|
|
217
226
|
artifacts.push(artifact);
|
|
218
227
|
}
|
|
219
228
|
|
|
220
|
-
const nestedCandidates = extractNestedCandidates(artifact);
|
|
229
|
+
const nestedCandidates = this.filterNestedCandidates(extractNestedCandidates(artifact), validatedPageUrl, crawlScope);
|
|
221
230
|
for (const nestedCandidate of nestedCandidates) {
|
|
222
231
|
if (!visitedUrls.has(nestedCandidate.url)) {
|
|
223
232
|
queue.push({ candidate: nestedCandidate, depth: depth + 1 });
|
|
@@ -279,8 +288,12 @@ export class BundleScraper {
|
|
|
279
288
|
return [...candidates.values()];
|
|
280
289
|
}
|
|
281
290
|
|
|
282
|
-
private async fetchArtifact(candidate: ArtifactCandidate, depth: number): Promise<DiscoveredArtifact | null> {
|
|
283
|
-
const response = await this.fetchResponse(candidate.url, candidate.type);
|
|
291
|
+
private async fetchArtifact(candidate: ArtifactCandidate, depth: number, required: boolean): Promise<DiscoveredArtifact | null> {
|
|
292
|
+
const response = await this.fetchResponse(candidate.url, candidate.type, depth, required);
|
|
293
|
+
if (!response) {
|
|
294
|
+
return null;
|
|
295
|
+
}
|
|
296
|
+
|
|
284
297
|
const contentType = response.headers.get("content-type")?.toLowerCase() ?? "";
|
|
285
298
|
|
|
286
299
|
if (isIgnoredContentType(contentType)) {
|
|
@@ -335,7 +348,12 @@ export class BundleScraper {
|
|
|
335
348
|
});
|
|
336
349
|
}
|
|
337
350
|
|
|
338
|
-
private async fetchResponse(
|
|
351
|
+
private async fetchResponse(
|
|
352
|
+
url: string,
|
|
353
|
+
artifactType: ArtifactCandidate["type"],
|
|
354
|
+
depth: number,
|
|
355
|
+
required: boolean,
|
|
356
|
+
): Promise<Response | null> {
|
|
339
357
|
try {
|
|
340
358
|
const response = await this.fetcher(url, {
|
|
341
359
|
headers: {
|
|
@@ -344,11 +362,31 @@ export class BundleScraper {
|
|
|
344
362
|
});
|
|
345
363
|
|
|
346
364
|
if (!response.ok) {
|
|
347
|
-
|
|
365
|
+
if (required) {
|
|
366
|
+
throw new Error(`Failed to fetch ${artifactType} from ${url}: ${response.status} ${response.statusText}`);
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
this.emitProgress({
|
|
370
|
+
message: `Skipping ${artifactType} after ${response.status} ${response.statusText}: ${url}`,
|
|
371
|
+
url,
|
|
372
|
+
type: artifactType,
|
|
373
|
+
depth,
|
|
374
|
+
});
|
|
375
|
+
return null;
|
|
348
376
|
}
|
|
349
377
|
|
|
350
378
|
return response;
|
|
351
379
|
} catch (error) {
|
|
380
|
+
if (!required) {
|
|
381
|
+
this.emitProgress({
|
|
382
|
+
message: `Skipping ${artifactType} after fetch error: ${url}`,
|
|
383
|
+
url,
|
|
384
|
+
type: artifactType,
|
|
385
|
+
depth,
|
|
386
|
+
});
|
|
387
|
+
return null;
|
|
388
|
+
}
|
|
389
|
+
|
|
352
390
|
if (error instanceof Error) {
|
|
353
391
|
throw new Error(`Unable to fetch ${artifactType} artifact ${url}: ${error.message}`);
|
|
354
392
|
}
|
|
@@ -378,6 +416,50 @@ export class BundleScraper {
|
|
|
378
416
|
private emitProgress(event: ScraperProgressEvent): void {
|
|
379
417
|
this.onProgress?.(event);
|
|
380
418
|
}
|
|
419
|
+
|
|
420
|
+
private filterNestedCandidates(
|
|
421
|
+
candidates: ArtifactCandidate[],
|
|
422
|
+
entryUrl: string,
|
|
423
|
+
crawlScope: CrawlScope,
|
|
424
|
+
): ArtifactCandidate[] {
|
|
425
|
+
if (crawlScope === "site") {
|
|
426
|
+
return candidates;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
const entryPath = new URL(entryUrl).pathname.toLowerCase();
|
|
430
|
+
const entryStem = entryPath.replace(/(?:index)?\.html?$/i, "").replace(/\/+$/, "") || entryPath;
|
|
431
|
+
const entryDirectory = entryPath.includes("/") ? entryPath.slice(0, entryPath.lastIndexOf("/") + 1) : "/";
|
|
432
|
+
|
|
433
|
+
return candidates.filter((candidate) => {
|
|
434
|
+
if (candidate.type !== "html") {
|
|
435
|
+
return true;
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
const discoveredFrom = candidate.discoveredFrom.toLowerCase();
|
|
439
|
+
if (discoveredFrom.includes("iframe") || discoveredFrom.includes("form")) {
|
|
440
|
+
return true;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
const candidatePath = new URL(candidate.url).pathname.toLowerCase();
|
|
444
|
+
if (candidatePath === entryPath) {
|
|
445
|
+
return true;
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
if (entryDirectory !== "/") {
|
|
449
|
+
return candidatePath.startsWith(entryDirectory);
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
if (entryStem !== entryPath && candidatePath.startsWith(entryStem)) {
|
|
453
|
+
return true;
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
if (candidatePath.startsWith(`${entryPath}/`)) {
|
|
457
|
+
return true;
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
return false;
|
|
461
|
+
});
|
|
462
|
+
}
|
|
381
463
|
}
|
|
382
464
|
|
|
383
465
|
export { extractArtifactCandidates };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@redstone-md/mapr",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.3-alpha",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Bun-native CLI/TUI for reverse-engineering frontend websites, bundles, WASM, and service workers",
|
|
6
6
|
"license": "SEE LICENSE IN LICENSE",
|
|
@@ -30,6 +30,7 @@
|
|
|
30
30
|
"mapr": "./bin/mapr"
|
|
31
31
|
},
|
|
32
32
|
"files": [
|
|
33
|
+
"assets",
|
|
33
34
|
"bin",
|
|
34
35
|
"index.ts",
|
|
35
36
|
"lib",
|