launchframe 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,372 @@
1
+ /**
2
+ * `extract` — the headline command.
3
+ *
4
+ * npm run extract -- https://site-a.com https://site-b.com https://site-c.com
5
+ *
6
+ * For each URL: open in Chromium, screenshot, harvest computed design
7
+ * tokens via `browser-extract.ts`. After all sites: synthesize an
8
+ * original shadcn-compatible design system and emit drop-in files.
9
+ *
10
+ * Output goes to `output/<runId>/`.
11
+ *
12
+ * Policy (from rules/anti-clone-policy.md):
13
+ * - Honor robots.txt by default.
14
+ * - Per-domain rate limit defaults to 15 req/min.
15
+ * - Store only PNG screenshots and harvested computed-style values.
16
+ * - Never persist HTML, JS, CSS, or third-party assets.
17
+ */
18
+
19
+ import { mkdirSync, writeFileSync } from "node:fs";
20
+ import { dirname, join } from "node:path";
21
+ import { fileURLToPath, pathToFileURL } from "node:url";
22
+
23
+ import { chromium, type Browser } from "playwright";
24
+
25
+ import { harvestTokens } from "./browser-extract.js";
26
+ import { emitAll } from "./emit.js";
27
+ import { synthesize } from "./synthesize.js";
28
+ import type { ExtractionRun, RawTokens, SiteCapture } from "./types.js";
29
+
30
+ const __filename = fileURLToPath(import.meta.url);
31
+ const __dirname = dirname(__filename);
32
+ /** Writes under the user's cwd so `npx launchframe` from any folder works. */
33
+ const DEFAULT_OUTPUT_ROOT = join(process.cwd(), "output");
34
+
35
+ const USER_AGENT =
36
+ "launchframe/0.1 (+https://github.com/evangruhlkey/launchframe; design-token research; respects robots.txt)";
37
+
38
+ interface CliArgs {
39
+ urls: string[];
40
+ outDir: string;
41
+ viewport: { width: number; height: number };
42
+ respectRobots: boolean;
43
+ rateLimitPerMinute: number;
44
+ runName?: string;
45
+ }
46
+
47
+ function parseArgs(argv: string[]): CliArgs {
48
+ const args: CliArgs = {
49
+ urls: [],
50
+ outDir: "",
51
+ viewport: { width: 1440, height: 900 },
52
+ respectRobots: true,
53
+ rateLimitPerMinute: 15,
54
+ };
55
+ for (let i = 0; i < argv.length; i++) {
56
+ const a = argv[i]!;
57
+ if (a === "--out") args.outDir = argv[++i]!;
58
+ else if (a === "--name") args.runName = argv[++i];
59
+ else if (a === "--no-robots") args.respectRobots = false;
60
+ else if (a === "--rate") args.rateLimitPerMinute = parseInt(argv[++i]!, 10);
61
+ else if (a === "--width") args.viewport.width = parseInt(argv[++i]!, 10);
62
+ else if (a === "--height") args.viewport.height = parseInt(argv[++i]!, 10);
63
+ else if (a === "--help" || a === "-h") {
64
+ printHelp();
65
+ process.exit(0);
66
+ } else if (a.startsWith("http://") || a.startsWith("https://")) {
67
+ args.urls.push(a);
68
+ } else if (a.startsWith("--")) {
69
+ console.error(`Unknown flag: ${a}`);
70
+ process.exit(2);
71
+ } else {
72
+ console.error(`Unrecognized argument: ${a}`);
73
+ process.exit(2);
74
+ }
75
+ }
76
+ if (args.urls.length === 0) {
77
+ printHelp();
78
+ process.exit(2);
79
+ }
80
+ return args;
81
+ }
82
+
83
+ function printHelp(): void {
84
+ console.log(
85
+ [
86
+ "Usage:",
87
+ " npx launchframe <url> [<url> ...] [options] (from any folder)",
88
+ " npm run extract -- <url> [<url> ...] [options] (from this repo)",
89
+ "",
90
+ "Writes to ./output/<runId>/ in your current working directory unless",
91
+ "you pass --out.",
92
+ "",
93
+ "Captures each URL at a desktop viewport, harvests computed design",
94
+ "tokens (colors, type, spacing, radius, shadow), and synthesizes a",
95
+ "drop-in shadcn-compatible design system in output/<runId>/.",
96
+ "",
97
+ "Options:",
98
+ " --out <dir> Output directory (default: output/<runId>)",
99
+ " --name <slug> Human-friendly slug used in the runId",
100
+ " --no-robots Skip robots.txt check (not recommended)",
101
+ " --rate <per-min> Per-domain rate limit, default 15",
102
+ " --width <px> Viewport width, default 1440",
103
+ " --height <px> Viewport height, default 900",
104
+ " --help Show this help",
105
+ ].join("\n"),
106
+ );
107
+ }
108
+
109
+ /* -------------------------------------------------------------------------- */
110
+ /* robots.txt */
111
+ /* -------------------------------------------------------------------------- */
112
+
113
+ async function isAllowedByRobots(url: string): Promise<boolean> {
114
+ try {
115
+ const u = new URL(url);
116
+ const res = await fetch(`${u.origin}/robots.txt`, {
117
+ headers: { "User-Agent": USER_AGENT },
118
+ signal: AbortSignal.timeout(8_000),
119
+ });
120
+ if (!res.ok) return true;
121
+ const body = await res.text();
122
+ return checkRobots(body, u.pathname);
123
+ } catch {
124
+ return true;
125
+ }
126
+ }
127
+
128
+ function checkRobots(body: string, pathname: string): boolean {
129
+ const lines = body.split(/\r?\n/);
130
+ let inStarBlock = false;
131
+ const disallow: string[] = [];
132
+ const allow: string[] = [];
133
+ for (const raw of lines) {
134
+ const line = raw.split("#")[0]!.trim();
135
+ if (!line) continue;
136
+ const idx = line.indexOf(":");
137
+ if (idx < 0) continue;
138
+ const key = line.slice(0, idx).toLowerCase().trim();
139
+ const value = line.slice(idx + 1).trim();
140
+ if (key === "user-agent") inStarBlock = value === "*";
141
+ else if (inStarBlock && key === "disallow" && value) disallow.push(value);
142
+ else if (inStarBlock && key === "allow" && value) allow.push(value);
143
+ }
144
+ const len = (patterns: string[]) =>
145
+ patterns.reduce((m, p) => (pathname.startsWith(p) ? Math.max(m, p.length) : m), -1);
146
+ const a = len(allow);
147
+ const d = len(disallow);
148
+ if (d < 0) return true;
149
+ return a >= d;
150
+ }
151
+
152
+ /* -------------------------------------------------------------------------- */
153
+ /* Rate limiter */
154
+ /* -------------------------------------------------------------------------- */
155
+
156
+ class RateLimiter {
157
+ private readonly intervalMs: number;
158
+ private readonly lastByHost = new Map<string, number>();
159
+ constructor(perMinute: number) {
160
+ this.intervalMs = Math.ceil(60_000 / Math.max(1, perMinute));
161
+ }
162
+ async wait(host: string): Promise<void> {
163
+ const last = this.lastByHost.get(host) ?? 0;
164
+ const elapsed = Date.now() - last;
165
+ if (elapsed < this.intervalMs) {
166
+ await new Promise((r) => setTimeout(r, this.intervalMs - elapsed));
167
+ }
168
+ this.lastByHost.set(host, Date.now());
169
+ }
170
+ }
171
+
172
+ /* -------------------------------------------------------------------------- */
173
+ /* Pipeline */
174
+ /* -------------------------------------------------------------------------- */
175
+
176
+ async function captureOne(
177
+ browser: Browser,
178
+ url: string,
179
+ viewport: { width: number; height: number },
180
+ outDir: string,
181
+ ): Promise<{ raw: RawTokens; capture: SiteCapture } | null> {
182
+ const host = new URL(url).host;
183
+ const stamp = `${host}.png`;
184
+ const screenshotPath = join(outDir, "screenshots", stamp);
185
+ const rawPath = join(outDir, "raw", `${host}.tokens.json`);
186
+
187
+ const ctx = await browser.newContext({
188
+ userAgent: USER_AGENT,
189
+ viewport,
190
+ deviceScaleFactor: 2,
191
+ reducedMotion: "reduce",
192
+ });
193
+ const page = await ctx.newPage();
194
+ try {
195
+ const response = await page.goto(url, { waitUntil: "networkidle", timeout: 30_000 });
196
+ if (!response || response.status() >= 400) {
197
+ throw new Error(`HTTP ${response?.status() ?? "unknown"}`);
198
+ }
199
+
200
+ await page.evaluate(() => {
201
+ const style = document.createElement("style");
202
+ style.textContent = `*, *::before, *::after {
203
+ animation: none !important;
204
+ transition: none !important;
205
+ scroll-behavior: auto !important;
206
+ }`;
207
+ document.head.appendChild(style);
208
+ });
209
+ await page.waitForTimeout(400);
210
+
211
+ mkdirSync(dirname(screenshotPath), { recursive: true });
212
+ await page.screenshot({ path: screenshotPath, fullPage: true, type: "png" });
213
+
214
+ const raw = await harvestTokens(page, url, viewport);
215
+ mkdirSync(dirname(rawPath), { recursive: true });
216
+ writeFileSync(rawPath, JSON.stringify(raw, null, 2));
217
+
218
+ const capture: SiteCapture = {
219
+ url,
220
+ host,
221
+ capturedAt: raw.capturedAt,
222
+ screenshotPath,
223
+ rawTokensPath: rawPath,
224
+ status: "ok",
225
+ };
226
+ return { raw, capture };
227
+ } catch (err) {
228
+ return {
229
+ raw: emptyRaw(url, viewport),
230
+ capture: {
231
+ url,
232
+ host,
233
+ capturedAt: new Date().toISOString(),
234
+ screenshotPath: "",
235
+ rawTokensPath: "",
236
+ status: "failed",
237
+ reason: (err as Error).message,
238
+ },
239
+ };
240
+ } finally {
241
+ await ctx.close();
242
+ }
243
+ }
244
+
245
+ function emptyRaw(url: string, viewport: { width: number; height: number }): RawTokens {
246
+ return {
247
+ url,
248
+ capturedAt: new Date().toISOString(),
249
+ viewport,
250
+ colors: [],
251
+ typography: [],
252
+ spacing: [],
253
+ radii: [],
254
+ shadows: [],
255
+ dominantContainerPx: null,
256
+ };
257
+ }
258
+
259
+ async function main(): Promise<void> {
260
+ const args = parseArgs(process.argv.slice(2));
261
+ const startedAt = new Date().toISOString();
262
+ const runId = makeRunId(startedAt, args.runName);
263
+ const outDir = args.outDir || join(DEFAULT_OUTPUT_ROOT, runId);
264
+
265
+ console.log(`[extract] runId=${runId}`);
266
+ console.log(`[extract] urls=${args.urls.length} viewport=${args.viewport.width}x${args.viewport.height}`);
267
+ console.log(`[extract] output=${outDir}`);
268
+ console.log("");
269
+
270
+ mkdirSync(outDir, { recursive: true });
271
+
272
+ const limiter = new RateLimiter(args.rateLimitPerMinute);
273
+ const captures: SiteCapture[] = [];
274
+ const rawList: RawTokens[] = [];
275
+
276
+ let browser: Browser | null = null;
277
+ try {
278
+ browser = await chromium.launch();
279
+
280
+ for (const url of args.urls) {
281
+ const host = new URL(url).host;
282
+
283
+ if (args.respectRobots) {
284
+ const allowed = await isAllowedByRobots(url);
285
+ if (!allowed) {
286
+ console.log(` ⊘ ${url} skipped — robots.txt disallows`);
287
+ captures.push({
288
+ url,
289
+ host,
290
+ capturedAt: new Date().toISOString(),
291
+ screenshotPath: "",
292
+ rawTokensPath: "",
293
+ status: "skipped",
294
+ reason: "robots.txt",
295
+ });
296
+ continue;
297
+ }
298
+ }
299
+
300
+ await limiter.wait(host);
301
+ const result = await captureOne(browser, url, args.viewport, outDir);
302
+ if (!result) continue;
303
+ captures.push(result.capture);
304
+ if (result.capture.status === "ok") {
305
+ rawList.push(result.raw);
306
+ console.log(` ✓ ${url}`);
307
+ } else {
308
+ console.log(` ✗ ${url} ${result.capture.reason ?? ""}`);
309
+ }
310
+ }
311
+ } finally {
312
+ if (browser) await browser.close();
313
+ }
314
+
315
+ if (rawList.length === 0) {
316
+ console.error("[extract] no successful captures — nothing to synthesize.");
317
+ process.exit(1);
318
+ }
319
+
320
+ console.log("");
321
+ console.log(`[extract] synthesizing design system from ${rawList.length} site(s)...`);
322
+ const designSystem = synthesize(rawList, {
323
+ runId,
324
+ sources: rawList.map((r) => ({ url: r.url, capturedAt: r.capturedAt })),
325
+ });
326
+
327
+ const run: ExtractionRun = {
328
+ runId,
329
+ startedAt,
330
+ finishedAt: new Date().toISOString(),
331
+ outputDir: outDir,
332
+ captures,
333
+ designSystem,
334
+ };
335
+
336
+ const written = emitAll(designSystem, run);
337
+ writeFileSync(join(outDir, "run.json"), JSON.stringify(run, null, 2));
338
+ console.log("");
339
+ console.log("[extract] wrote:");
340
+ for (const f of written) console.log(` → ${f}`);
341
+ console.log(` → ${join(outDir, "run.json")}`);
342
+ console.log("");
343
+ console.log(`[extract] done. Open ${join(outDir, "REPORT.md")} for the summary.`);
344
+ console.log(`[extract] Give your AI: ${join(outDir, "FOR_AI.md")}`);
345
+ }
346
+
347
+ function makeRunId(startedAt: string, name: string | undefined): string {
348
+ const stamp = startedAt.replace(/[-:T]/g, "").slice(0, 14);
349
+ return name ? `${stamp}-${name}` : stamp;
350
+ }
351
+
352
+ if (isMainModule(import.meta.url)) {
353
+ main().catch((err) => {
354
+ console.error(err);
355
+ process.exit(1);
356
+ });
357
+ }
358
+
359
+ /**
360
+ * Cross-platform entry-point check. On Windows, `process.argv[1]` is a
361
+ * backslash path while `import.meta.url` is a proper file URL, so the
362
+ * naive `file://${argv[1]}` template literal never matches and the
363
+ * script silently exits. `pathToFileURL` produces the encoded URL form
364
+ * on every platform.
365
+ */
366
+ function isMainModule(metaUrl: string): boolean {
367
+ const entry = process.argv[1];
368
+ if (!entry) return false;
369
+ return metaUrl === pathToFileURL(entry).href;
370
+ }
371
+
372
+ export { main };
@@ -0,0 +1,13 @@
1
+ {
2
+ "name": "@framework/extract",
3
+ "version": "0.1.0",
4
+ "private": true,
5
+ "type": "module",
6
+ "main": "./extract.ts",
7
+ "scripts": {
8
+ "extract": "tsx extract.ts"
9
+ },
10
+ "dependencies": {
11
+ "playwright": "^1.48.0"
12
+ }
13
+ }