similarbuild 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/CHANGELOG.md +110 -0
  2. package/LICENSE +21 -0
  3. package/README.md +301 -0
  4. package/bin/install.js +256 -0
  5. package/lib/copy-templates.mjs +52 -0
  6. package/lib/install-deps.mjs +62 -0
  7. package/lib/prompt-config.mjs +83 -0
  8. package/lib/verify-env.mjs +19 -0
  9. package/package.json +63 -0
  10. package/scripts/sync-templates.mjs +71 -0
  11. package/templates/commands/build-page.md +490 -0
  12. package/templates/commands/build-site.md +548 -0
  13. package/templates/commands/clip-section.md +519 -0
  14. package/templates/memory/anti-patterns.md +212 -0
  15. package/templates/memory/design-knowledge.md +225 -0
  16. package/templates/memory/fixes.md +163 -0
  17. package/templates/memory/patterns.md +681 -0
  18. package/templates/presets/shopify-section.yaml +51 -0
  19. package/templates/presets/wp-elementor.yaml +49 -0
  20. package/templates/reports/fixtures/mock-run-1.json +115 -0
  21. package/templates/reports/fixtures/mock-run-2.json +72 -0
  22. package/templates/reports/report-renderer.mjs +218 -0
  23. package/templates/reports/report-template.html +571 -0
  24. package/templates/skills/sb-build-shopify/SKILL.md +104 -0
  25. package/templates/skills/sb-build-shopify/references/shopify-build-rules.md +563 -0
  26. package/templates/skills/sb-build-shopify/scripts/build-shopify.mjs +637 -0
  27. package/templates/skills/sb-build-shopify/scripts/tests/test-build-shopify.mjs +424 -0
  28. package/templates/skills/sb-build-wp/SKILL.md +83 -0
  29. package/templates/skills/sb-build-wp/references/wp-build-rules.md +376 -0
  30. package/templates/skills/sb-build-wp/scripts/build-wp.mjs +327 -0
  31. package/templates/skills/sb-build-wp/scripts/tests/test-build-wp.mjs +224 -0
  32. package/templates/skills/sb-compare-visual/SKILL.md +121 -0
  33. package/templates/skills/sb-compare-visual/scripts/compare-visual.mjs +387 -0
  34. package/templates/skills/sb-compare-visual/scripts/lib/compare-tokens.mjs +273 -0
  35. package/templates/skills/sb-compare-visual/scripts/tests/test-compare-tokens.mjs +350 -0
  36. package/templates/skills/sb-compare-visual/scripts/tests/test-compare-visual.mjs +626 -0
  37. package/templates/skills/sb-crawl-and-list/SKILL.md +99 -0
  38. package/templates/skills/sb-crawl-and-list/scripts/crawl-and-list.mjs +437 -0
  39. package/templates/skills/sb-crawl-and-list/scripts/lib/blocklist-filter.mjs +176 -0
  40. package/templates/skills/sb-crawl-and-list/scripts/lib/fallback-crawler.mjs +107 -0
  41. package/templates/skills/sb-crawl-and-list/scripts/lib/page-classifier.mjs +89 -0
  42. package/templates/skills/sb-crawl-and-list/scripts/lib/sitemap-parser.mjs +118 -0
  43. package/templates/skills/sb-crawl-and-list/scripts/tests/test-blocklist-filter.mjs +204 -0
  44. package/templates/skills/sb-crawl-and-list/scripts/tests/test-crawl-and-list.mjs +276 -0
  45. package/templates/skills/sb-crawl-and-list/scripts/tests/test-fallback-crawler.mjs +243 -0
  46. package/templates/skills/sb-crawl-and-list/scripts/tests/test-page-classifier.mjs +120 -0
  47. package/templates/skills/sb-crawl-and-list/scripts/tests/test-sitemap-parser.mjs +157 -0
  48. package/templates/skills/sb-extract-assets/SKILL.md +112 -0
  49. package/templates/skills/sb-extract-assets/scripts/extract-assets.mjs +484 -0
  50. package/templates/skills/sb-extract-assets/scripts/tests/test-extract-assets.mjs +112 -0
  51. package/templates/skills/sb-inspect-live/SKILL.md +105 -0
  52. package/templates/skills/sb-inspect-live/scripts/inspect-live.mjs +693 -0
  53. package/templates/skills/sb-inspect-live/scripts/tests/test-inspect-live.mjs +181 -0
  54. package/templates/skills/sb-review-checks/SKILL.md +113 -0
  55. package/templates/skills/sb-review-checks/references/review-rules.md +195 -0
  56. package/templates/skills/sb-review-checks/scripts/lib/anti-patterns.mjs +379 -0
  57. package/templates/skills/sb-review-checks/scripts/lib/cross-reference.mjs +115 -0
  58. package/templates/skills/sb-review-checks/scripts/lib/design-quality.mjs +541 -0
  59. package/templates/skills/sb-review-checks/scripts/review-checks.mjs +250 -0
  60. package/templates/skills/sb-review-checks/scripts/tests/test-anti-patterns.mjs +343 -0
  61. package/templates/skills/sb-review-checks/scripts/tests/test-cross-reference.mjs +170 -0
  62. package/templates/skills/sb-review-checks/scripts/tests/test-design-quality.mjs +493 -0
  63. package/templates/skills/sb-review-checks/scripts/tests/test-review-checks.mjs +267 -0
  64. package/templates/skills/sb-tweak/SKILL.md +130 -0
  65. package/templates/skills/sb-tweak/references/tweak-patterns.md +157 -0
  66. package/templates/skills/sb-tweak/scripts/lib/diff-summarizer.mjs +140 -0
  67. package/templates/skills/sb-tweak/scripts/lib/element-locator.mjs +507 -0
  68. package/templates/skills/sb-tweak/scripts/lib/intent-parser.mjs +324 -0
  69. package/templates/skills/sb-tweak/scripts/tests/test-diff-summarizer.mjs +248 -0
  70. package/templates/skills/sb-tweak/scripts/tests/test-element-locator.mjs +418 -0
  71. package/templates/skills/sb-tweak/scripts/tests/test-intent-parser.mjs +496 -0
  72. package/templates/skills/sb-tweak/scripts/tests/test-tweak.mjs +407 -0
  73. package/templates/skills/sb-tweak/scripts/tweak.mjs +656 -0
  74. package/templates/skills/sb-validate-render/SKILL.md +120 -0
  75. package/templates/skills/sb-validate-render/scripts/tests/test-validate-render.mjs +304 -0
  76. package/templates/skills/sb-validate-render/scripts/validate-render.mjs +645 -0
@@ -0,0 +1,484 @@
1
+ #!/usr/bin/env node
2
+ // extract-assets.mjs — Download images from sb-inspect-live's imgUrls list,
3
+ // strip identifying metadata (EXIF/XMP/IPTC), rename via sha256 content-hash,
4
+ // dedupe against assets already on disk, and emit assets-map.json.
5
+ //
6
+ // Strip is the *default* sharp behaviour: we re-encode without calling
7
+ // .withMetadata(). .rotate() is called BEFORE the strip so EXIF orientation
8
+ // is baked into pixels (otherwise an upright phone photo looks sideways once
9
+ // the EXIF block is gone). ICC profile is preserved for color fidelity.
10
+ //
11
+ // SVGs don't have EXIF but carry editor signatures (`<metadata>`, comments,
12
+ // inkscape:/sodipodi: namespaces) — those are stripped via regex. For
13
+ // target=wp the sanitized markup is returned inline (WP blocks raw SVG
14
+ // uploads by default). For target=shopify SVGs are saved as files.
15
+ //
16
+ // Output: JSON to stdout AND assets-map.json in --output-dir. Logs to stderr.
17
+
18
+ import { parseArgs } from 'node:util'
19
+ import { mkdir, writeFile, readFile, readdir } from 'node:fs/promises'
20
+ import { existsSync } from 'node:fs'
21
+ import { join, resolve, basename, extname } from 'node:path'
22
+ import { createHash } from 'node:crypto'
23
+
24
+ // sharp is imported lazily inside main() so --help and arg validation
25
+ // work without the dep installed.
26
+
27
+ const HELP = `
28
+ extract-assets.mjs — Download + sanitize images for SimilarBuild visual cloning.
29
+
30
+ One of:
31
+ --inspection-path <file> Path to inspection.json (script reads imgUrls).
32
+ --img-urls <json> Inline JSON array [{url, context, alt}].
33
+
34
+ Required:
35
+ --output-dir <dir> Directory for sanitized assets + assets-map.json.
36
+
37
+ Optional:
38
+ --target <wp|shopify> Default 'wp'. Affects SVG handling (inline vs file).
39
+ --existing-assets-dir <d> Extra dir of already-extracted assets to dedupe against.
40
+ --large-warn-mb <n> Warn (stderr) if asset > N MB. Default 10.
41
+ --timeout <ms> Per-request fetch timeout. Default 30000.
42
+ --help Show this message.
43
+
44
+ Exit codes: 0=ok, 1=script error, 2=invalid args.
45
+ `
46
+
47
+ function fail(msg, code = 2) {
48
+ process.stderr.write(`[sb-extract-assets] ${msg}\n`)
49
+ process.exit(code)
50
+ }
51
+
52
+ function log(msg) {
53
+ process.stderr.write(`[sb-extract-assets] ${msg}\n`)
54
+ }
55
+
56
+ const { values } = parseArgs({
57
+ options: {
58
+ 'inspection-path': { type: 'string' },
59
+ 'img-urls': { type: 'string' },
60
+ 'output-dir': { type: 'string' },
61
+ target: { type: 'string', default: 'wp' },
62
+ 'existing-assets-dir': { type: 'string' },
63
+ 'large-warn-mb': { type: 'string', default: '10' },
64
+ timeout: { type: 'string', default: '30000' },
65
+ help: { type: 'boolean', default: false },
66
+ },
67
+ strict: false,
68
+ })
69
+
70
+ if (values.help) {
71
+ process.stdout.write(HELP)
72
+ process.exit(0)
73
+ }
74
+
75
+ if (!values['output-dir']) fail('missing --output-dir')
76
+ if (!values['inspection-path'] && !values['img-urls']) {
77
+ fail('missing input: pass --inspection-path or --img-urls')
78
+ }
79
+ if (values['inspection-path'] && values['img-urls']) {
80
+ fail('pass exactly one of --inspection-path or --img-urls, not both')
81
+ }
82
+ if (!['wp', 'shopify'].includes(values.target)) {
83
+ fail(`invalid --target '${values.target}' (must be 'wp' or 'shopify')`)
84
+ }
85
+
86
+ const OUTPUT_DIR = resolve(values['output-dir'])
87
+ const TARGET = values.target
88
+ const EXISTING_DIR = values['existing-assets-dir'] ? resolve(values['existing-assets-dir']) : null
89
+ const LARGE_WARN_BYTES = parseInt(values['large-warn-mb'], 10) * 1024 * 1024
90
+ const TIMEOUT = parseInt(values.timeout, 10)
91
+
92
+ if (!Number.isFinite(LARGE_WARN_BYTES)) fail('--large-warn-mb must be numeric')
93
+ if (!Number.isFinite(TIMEOUT)) fail('--timeout must be numeric')
94
+
95
+ // Validate input source up front (before sharp import) so input errors exit
96
+ // with code 2 (invalid args) instead of code 1 (script error from missing dep).
97
+ let INPUT_URLS = null
98
+ if (values['img-urls']) {
99
+ try {
100
+ const arr = JSON.parse(values['img-urls'])
101
+ if (!Array.isArray(arr)) fail('--img-urls must be a JSON array')
102
+ INPUT_URLS = arr
103
+ } catch (err) {
104
+ if (err?.message?.startsWith('--img-urls')) throw err
105
+ fail(`--img-urls is not valid JSON: ${err.message}`)
106
+ }
107
+ } else {
108
+ const p = resolve(values['inspection-path'])
109
+ if (!existsSync(p)) fail(`inspection file not found: ${p}`)
110
+ // Defer parse to main() — file exists, that's enough for arg validation.
111
+ }
112
+
113
+ // MIME → extension. Authoritative: drives the saved filename's extension,
114
+ // regardless of what the URL or Content-Disposition claims. Prevents the
115
+ // classic ".jpg?v=2 actually a PNG" mismatch.
116
+ const MIME_EXT = {
117
+ 'image/jpeg': 'jpg',
118
+ 'image/jpg': 'jpg',
119
+ 'image/png': 'png',
120
+ 'image/webp': 'webp',
121
+ 'image/avif': 'avif',
122
+ 'image/gif': 'gif',
123
+ 'image/svg+xml': 'svg',
124
+ 'image/x-icon': 'ico',
125
+ 'image/vnd.microsoft.icon': 'ico',
126
+ 'image/heic': 'heic',
127
+ 'image/heif': 'heif',
128
+ 'image/bmp': 'bmp',
129
+ 'image/tiff': 'tiff',
130
+ }
131
+
132
+ // Magic-byte sniff fallback when Content-Type lies or is generic
133
+ // (octet-stream, text/plain). Covers the formats sharp can decode.
134
+ function sniffMime(buf) {
135
+ if (buf.length >= 12) {
136
+ if (buf[0] === 0xff && buf[1] === 0xd8 && buf[2] === 0xff) return 'image/jpeg'
137
+ if (
138
+ buf[0] === 0x89 &&
139
+ buf[1] === 0x50 &&
140
+ buf[2] === 0x4e &&
141
+ buf[3] === 0x47
142
+ ) return 'image/png'
143
+ if (buf[0] === 0x47 && buf[1] === 0x49 && buf[2] === 0x46) return 'image/gif'
144
+ if (
145
+ buf[0] === 0x52 &&
146
+ buf[1] === 0x49 &&
147
+ buf[2] === 0x46 &&
148
+ buf[3] === 0x46 &&
149
+ buf[8] === 0x57 &&
150
+ buf[9] === 0x45 &&
151
+ buf[10] === 0x42 &&
152
+ buf[11] === 0x50
153
+ ) return 'image/webp'
154
+ // AVIF: ftyp box at offset 4, brand 'avif' at 8
155
+ if (
156
+ buf[4] === 0x66 &&
157
+ buf[5] === 0x74 &&
158
+ buf[6] === 0x79 &&
159
+ buf[7] === 0x70 &&
160
+ buf[8] === 0x61 &&
161
+ buf[9] === 0x76 &&
162
+ buf[10] === 0x69 &&
163
+ buf[11] === 0x66
164
+ ) return 'image/avif'
165
+ }
166
+ // SVG: text-y, contains '<svg' near the top
167
+ const head = buf.slice(0, 512).toString('utf8')
168
+ if (/<svg[\s>]/i.test(head)) return 'image/svg+xml'
169
+ return null
170
+ }
171
+
172
+ function hashHex16(buf) {
173
+ return createHash('sha256').update(buf).digest('hex').slice(0, 16)
174
+ }
175
+
176
+ // Strip editor signatures and identifying blocks from SVG markup. Keeps the
177
+ // drawable content (paths, fills, viewBox) but drops:
178
+ // - <metadata>…</metadata> blocks (RDF/cc:license/dc:* author info)
179
+ // - <!-- … --> comments (often embed editor name + path on disk)
180
+ // - inkscape:* / sodipodi:* attributes and namespace declarations
181
+ // - <sodipodi:namedview>…</sodipodi:namedview> editor state
182
+ // - the leading <?xml …?> declaration (carries encoding + sometimes editor id)
183
+ function sanitizeSvg(svg) {
184
+ let s = String(svg)
185
+ s = s.replace(/<\?xml[^?]*\?>/g, '')
186
+ s = s.replace(/<!--[\s\S]*?-->/g, '')
187
+ s = s.replace(/<metadata\b[\s\S]*?<\/metadata>/gi, '')
188
+ s = s.replace(/<sodipodi:namedview\b[\s\S]*?<\/sodipodi:namedview>/gi, '')
189
+ s = s.replace(/<sodipodi:namedview\b[^/>]*\/>/gi, '')
190
+ s = s.replace(/\sxmlns:(inkscape|sodipodi|cc|dc|rdf|sketch)="[^"]*"/gi, '')
191
+ s = s.replace(/\s(inkscape|sodipodi|sketch):[\w-]+="[^"]*"/gi, '')
192
+ return s.trim()
193
+ }
194
+
195
+ // Walk a directory once and build hash → relative-path index. The skill
196
+ // names files <hash>.<ext> so filename === hash, but we still verify by
197
+ // stripping the extension. Existing assets older than this rule (renamed by
198
+ // hand, etc.) are ignored — they wouldn't dedupe correctly anyway.
199
+ async function indexExisting(dir) {
200
+ const map = new Map()
201
+ if (!dir || !existsSync(dir)) return map
202
+ let entries
203
+ try {
204
+ entries = await readdir(dir)
205
+ } catch {
206
+ return map
207
+ }
208
+ for (const name of entries) {
209
+ const m = /^([0-9a-f]{16})\.([a-z0-9]+)$/i.exec(name)
210
+ if (!m) continue
211
+ map.set(m[1], { path: join(dir, name), ext: m[2].toLowerCase() })
212
+ }
213
+ return map
214
+ }
215
+
216
+ async function fetchBuffer(url) {
217
+ const ac = new AbortController()
218
+ const t = setTimeout(() => ac.abort(), TIMEOUT)
219
+ try {
220
+ const res = await fetch(url, {
221
+ signal: ac.signal,
222
+ redirect: 'follow',
223
+ headers: {
224
+ // A neutral UA. Some CDNs 403 on default node fetch UA.
225
+ 'user-agent':
226
+ 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1',
227
+ accept: 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
228
+ },
229
+ })
230
+ if (!res.ok) {
231
+ return { ok: false, status: res.status, reason: res.statusText || `http-${res.status}` }
232
+ }
233
+ const ct = (res.headers.get('content-type') || '').split(';')[0].trim().toLowerCase()
234
+ const ab = await res.arrayBuffer()
235
+ return { ok: true, status: res.status, contentType: ct, buffer: Buffer.from(ab) }
236
+ } catch (err) {
237
+ return { ok: false, status: 0, reason: err?.name === 'AbortError' ? 'timeout' : (err?.message || String(err)) }
238
+ } finally {
239
+ clearTimeout(t)
240
+ }
241
+ }
242
+
243
+ async function loadInputUrls() {
244
+ if (INPUT_URLS) return INPUT_URLS
245
+ const p = resolve(values['inspection-path'])
246
+ let raw
247
+ try {
248
+ raw = await readFile(p, 'utf8')
249
+ } catch (err) {
250
+ fail(`cannot read inspection file: ${err.message}`)
251
+ }
252
+ let parsed
253
+ try {
254
+ parsed = JSON.parse(raw)
255
+ } catch (err) {
256
+ fail(`inspection file is not valid JSON: ${err.message}`)
257
+ }
258
+ if (!Array.isArray(parsed.imgUrls)) {
259
+ fail(`inspection file has no imgUrls[] (got ${typeof parsed.imgUrls})`)
260
+ }
261
+ return parsed.imgUrls
262
+ }
263
+
264
+ async function main() {
265
+ await mkdir(OUTPUT_DIR, { recursive: true })
266
+
267
+ let sharp
268
+ try {
269
+ sharp = (await import('sharp')).default
270
+ } catch (err) {
271
+ process.stderr.write(
272
+ `[sb-extract-assets] missing dependency: ${err?.message || err}\n` +
273
+ `Install with: npm i sharp\n`,
274
+ )
275
+ process.exit(1)
276
+ }
277
+
278
+ const inputs = await loadInputUrls()
279
+ log(`processing ${inputs.length} url(s) into ${OUTPUT_DIR} (target=${TARGET})`)
280
+
281
+ // Build hash index from output_dir (idempotent reruns + cross-page dedupe
282
+ // within the same project) and from --existing-assets-dir if provided.
283
+ const localIdx = await indexExisting(OUTPUT_DIR)
284
+ const externalIdx = await indexExisting(EXISTING_DIR)
285
+ // Merge: external first, local overrides (we prefer assets already saved
286
+ // in this project's output_dir).
287
+ const knownHashes = new Map([...externalIdx, ...localIdx])
288
+
289
+ // Dedupe input by URL — orchestrator may pass the same URL multiple times
290
+ // (e.g. same hero referenced from <img> and CSS background-image).
291
+ const byUrl = new Map()
292
+ for (const item of inputs) {
293
+ if (!item || typeof item.url !== 'string') continue
294
+ if (!byUrl.has(item.url)) byUrl.set(item.url, item)
295
+ }
296
+
297
+ const assets = {}
298
+ const reuseReport = []
299
+ const failed = []
300
+
301
+ for (const item of byUrl.values()) {
302
+ const url = item.url
303
+ const context = item.context || ''
304
+ const alt = item.alt || ''
305
+
306
+ // Skip data: URIs — they're already inline and have no metadata to strip
307
+ // beyond what the source HTML already exposed. Pass through as failed
308
+ // with a clear reason so the orchestrator can decide to keep them inline.
309
+ if (url.startsWith('data:')) {
310
+ failed.push({ url, status: 0, reason: 'data-uri-skipped' })
311
+ continue
312
+ }
313
+ if (!/^https?:\/\//i.test(url)) {
314
+ failed.push({ url, status: 0, reason: 'unsupported-scheme' })
315
+ continue
316
+ }
317
+
318
+ const fetched = await fetchBuffer(url)
319
+ if (!fetched.ok) {
320
+ log(`✗ ${url} — ${fetched.status} ${fetched.reason}`)
321
+ failed.push({ url, status: fetched.status, reason: fetched.reason })
322
+ continue
323
+ }
324
+
325
+ let mime = MIME_EXT[fetched.contentType] ? fetched.contentType : sniffMime(fetched.buffer)
326
+ if (!mime || !MIME_EXT[mime]) {
327
+ // Fallback: trust the URL's extension if it matches something we know
328
+ const urlExt = (extname(new URL(url).pathname) || '').slice(1).toLowerCase()
329
+ const guessedMime = Object.entries(MIME_EXT).find(([, e]) => e === urlExt)?.[0]
330
+ if (guessedMime) mime = guessedMime
331
+ }
332
+ if (!mime || !MIME_EXT[mime]) {
333
+ failed.push({ url, status: fetched.status, reason: `unsupported-mime:${fetched.contentType || 'unknown'}` })
334
+ continue
335
+ }
336
+
337
+ if (fetched.buffer.length > LARGE_WARN_BYTES) {
338
+ log(`⚠ ${url} is ${(fetched.buffer.length / 1024 / 1024).toFixed(1)}MB — consider optimizing`)
339
+ }
340
+
341
+ const originalFilename = basename(new URL(url).pathname) || 'asset'
342
+
343
+ // ---- SVG path ----
344
+ if (mime === 'image/svg+xml') {
345
+ const cleaned = sanitizeSvg(fetched.buffer.toString('utf8'))
346
+ const cleanedBuf = Buffer.from(cleaned, 'utf8')
347
+ const hash = hashHex16(cleanedBuf)
348
+
349
+ if (TARGET === 'wp') {
350
+ // Inline mode: emit the sanitized markup directly. No file written.
351
+ // The orchestrator's WP builder splices `assets[url].inline` into HTML.
352
+ assets[url] = {
353
+ inline: cleaned,
354
+ hash,
355
+ ext: 'svg',
356
+ bytes: cleanedBuf.length,
357
+ originalFilename,
358
+ strippedMetadata: true,
359
+ context,
360
+ alt,
361
+ }
362
+ log(`✓ ${url} — svg inline (wp), ${cleanedBuf.length}B`)
363
+ continue
364
+ }
365
+
366
+ // target=shopify: save as file, dedupe by hash like rasters
367
+ const known = knownHashes.get(hash)
368
+ if (known) {
369
+ assets[url] = {
370
+ localPath: known.path,
371
+ hash,
372
+ ext: known.ext,
373
+ bytes: cleanedBuf.length,
374
+ originalFilename,
375
+ strippedMetadata: true,
376
+ reusedFrom: known.path,
377
+ context,
378
+ alt,
379
+ }
380
+ reuseReport.push({ url, reusedFrom: known.path, reason: 'identical-content-hash' })
381
+ log(`↻ ${url} — reused ${basename(known.path)}`)
382
+ continue
383
+ }
384
+ const localPath = join(OUTPUT_DIR, `${hash}.svg`)
385
+ await writeFile(localPath, cleanedBuf)
386
+ knownHashes.set(hash, { path: localPath, ext: 'svg' })
387
+ assets[url] = {
388
+ localPath,
389
+ hash,
390
+ ext: 'svg',
391
+ bytes: cleanedBuf.length,
392
+ originalFilename,
393
+ strippedMetadata: true,
394
+ reusedFrom: null,
395
+ context,
396
+ alt,
397
+ }
398
+ log(`✓ ${url} — svg saved ${hash}.svg`)
399
+ continue
400
+ }
401
+
402
+ // ---- Raster path (sharp re-encode) ----
403
+ const ext = MIME_EXT[mime]
404
+ let stripped
405
+ let strippedMetadata = true
406
+ try {
407
+ // .rotate() bakes EXIF orientation into pixels BEFORE we drop the
408
+ // EXIF block. Without this an iPhone portrait photo would render
409
+ // sideways once metadata is gone.
410
+ // toFormat(<ext>) re-encodes; sharp does NOT preserve metadata by
411
+ // default — that's the strip. We do NOT call .withMetadata().
412
+ // .keepIccProfile() (sharp ≥ 0.33) preserves color management for
413
+ // calibrated screens. Older sharps fall back gracefully because the
414
+ // method-chain returns the instance.
415
+ const sharpFormat = ext === 'jpg' ? 'jpeg' : ext
416
+ let pipeline = sharp(fetched.buffer).rotate()
417
+ if (typeof pipeline.keepIccProfile === 'function') {
418
+ pipeline = pipeline.keepIccProfile()
419
+ }
420
+ stripped = await pipeline.toFormat(sharpFormat).toBuffer()
421
+ } catch (err) {
422
+ // Sharp can't decode (corrupt, unsupported variant, ICO, etc.) — save
423
+ // raw bytes and flag. Better to ship a possibly-leaky asset than to
424
+ // black-hole it; the orchestrator can decide whether to drop it.
425
+ log(`! ${url} — sharp decode failed (${err.message}), saving raw`)
426
+ stripped = fetched.buffer
427
+ strippedMetadata = false
428
+ }
429
+
430
+ const hash = hashHex16(stripped)
431
+ const known = knownHashes.get(hash)
432
+ if (known) {
433
+ assets[url] = {
434
+ localPath: known.path,
435
+ hash,
436
+ ext: known.ext,
437
+ bytes: stripped.length,
438
+ originalFilename,
439
+ strippedMetadata,
440
+ reusedFrom: known.path,
441
+ context,
442
+ alt,
443
+ }
444
+ reuseReport.push({ url, reusedFrom: known.path, reason: 'identical-content-hash' })
445
+ log(`↻ ${url} — reused ${basename(known.path)}`)
446
+ continue
447
+ }
448
+
449
+ const localPath = join(OUTPUT_DIR, `${hash}.${ext}`)
450
+ await writeFile(localPath, stripped)
451
+ knownHashes.set(hash, { path: localPath, ext })
452
+ assets[url] = {
453
+ localPath,
454
+ hash,
455
+ ext,
456
+ bytes: stripped.length,
457
+ originalFilename,
458
+ strippedMetadata,
459
+ reusedFrom: null,
460
+ context,
461
+ alt,
462
+ }
463
+ log(`✓ ${url} — saved ${hash}.${ext} (${stripped.length}B${strippedMetadata ? '' : ', RAW'})`)
464
+ }
465
+
466
+ const result = {
467
+ target: TARGET,
468
+ outputDir: OUTPUT_DIR,
469
+ assets,
470
+ reuseReport,
471
+ failed,
472
+ }
473
+
474
+ const mapPath = join(OUTPUT_DIR, 'assets-map.json')
475
+ await writeFile(mapPath, JSON.stringify(result, null, 2))
476
+ log(`wrote ${mapPath}`)
477
+
478
+ process.stdout.write(JSON.stringify(result))
479
+ }
480
+
481
+ main().catch((err) => {
482
+ process.stderr.write(`[sb-extract-assets] fatal: ${err?.stack ? err.stack : err}\n`)
483
+ process.exit(1)
484
+ })
@@ -0,0 +1,112 @@
1
+ #!/usr/bin/env node
2
+ // test-extract-assets.mjs — Smoke tests that don't require sharp or network.
3
+ // Validates: --help works, missing/invalid args fail with exit code 2, syntax loads.
4
+ // Network + sharp integration tests belong in a separate harness.
5
+
6
+ import { spawnSync } from 'node:child_process'
7
+ import { fileURLToPath } from 'node:url'
8
+ import { dirname, resolve } from 'node:path'
9
+ import { strict as assert } from 'node:assert'
10
+
11
+ const here = dirname(fileURLToPath(import.meta.url))
12
+ const SCRIPT = resolve(here, '..', 'extract-assets.mjs')
13
+
14
+ let passed = 0
15
+ let failed = 0
16
+
17
+ function test(name, fn) {
18
+ try {
19
+ fn()
20
+ process.stdout.write(`ok - ${name}\n`)
21
+ passed++
22
+ } catch (err) {
23
+ process.stdout.write(`not ok - ${name}\n ${err.message}\n`)
24
+ failed++
25
+ }
26
+ }
27
+
28
+ test('--help exits 0 and prints usage', () => {
29
+ const r = spawnSync('node', [SCRIPT, '--help'], { encoding: 'utf8' })
30
+ assert.equal(r.status, 0, `exit code was ${r.status}`)
31
+ assert.match(r.stdout, /extract-assets\.mjs/)
32
+ assert.match(r.stdout, /--inspection-path/)
33
+ assert.match(r.stdout, /--img-urls/)
34
+ assert.match(r.stdout, /--output-dir/)
35
+ assert.match(r.stdout, /--target/)
36
+ assert.match(r.stdout, /--existing-assets-dir/)
37
+ })
38
+
39
+ test('missing --output-dir exits 2', () => {
40
+ const r = spawnSync('node', [SCRIPT, '--img-urls', '[]'], { encoding: 'utf8' })
41
+ assert.equal(r.status, 2, `exit code was ${r.status}`)
42
+ assert.match(r.stderr, /missing --output-dir/)
43
+ })
44
+
45
+ test('missing input source exits 2', () => {
46
+ const r = spawnSync('node', [SCRIPT, '--output-dir', '/tmp/sb-extract-test'], { encoding: 'utf8' })
47
+ assert.equal(r.status, 2, `exit code was ${r.status}`)
48
+ assert.match(r.stderr, /missing input/)
49
+ })
50
+
51
+ test('both --inspection-path and --img-urls exits 2', () => {
52
+ const r = spawnSync(
53
+ 'node',
54
+ [SCRIPT, '--output-dir', '/tmp/sb-extract-test', '--img-urls', '[]', '--inspection-path', '/tmp/x.json'],
55
+ { encoding: 'utf8' },
56
+ )
57
+ assert.equal(r.status, 2, `exit code was ${r.status}`)
58
+ assert.match(r.stderr, /exactly one/)
59
+ })
60
+
61
+ test('invalid --target exits 2', () => {
62
+ const r = spawnSync(
63
+ 'node',
64
+ [SCRIPT, '--output-dir', '/tmp/sb-extract-test', '--img-urls', '[]', '--target', 'wix'],
65
+ { encoding: 'utf8' },
66
+ )
67
+ assert.equal(r.status, 2, `exit code was ${r.status}`)
68
+ assert.match(r.stderr, /invalid --target/)
69
+ })
70
+
71
+ test('non-numeric --large-warn-mb exits 2', () => {
72
+ const r = spawnSync(
73
+ 'node',
74
+ [SCRIPT, '--output-dir', '/tmp/sb-extract-test', '--img-urls', '[]', '--large-warn-mb', 'foo'],
75
+ { encoding: 'utf8' },
76
+ )
77
+ assert.equal(r.status, 2, `exit code was ${r.status}`)
78
+ assert.match(r.stderr, /numeric/)
79
+ })
80
+
81
+ test('malformed --img-urls JSON exits 2', () => {
82
+ const r = spawnSync(
83
+ 'node',
84
+ [SCRIPT, '--output-dir', '/tmp/sb-extract-test', '--img-urls', '{not-json'],
85
+ { encoding: 'utf8' },
86
+ )
87
+ assert.equal(r.status, 2, `exit code was ${r.status}`)
88
+ assert.match(r.stderr, /not valid JSON/)
89
+ })
90
+
91
+ test('non-array --img-urls exits 2', () => {
92
+ const r = spawnSync(
93
+ 'node',
94
+ [SCRIPT, '--output-dir', '/tmp/sb-extract-test', '--img-urls', '{"foo":"bar"}'],
95
+ { encoding: 'utf8' },
96
+ )
97
+ assert.equal(r.status, 2, `exit code was ${r.status}`)
98
+ assert.match(r.stderr, /must be a JSON array|not valid JSON/)
99
+ })
100
+
101
+ test('missing inspection file exits 2', () => {
102
+ const r = spawnSync(
103
+ 'node',
104
+ [SCRIPT, '--output-dir', '/tmp/sb-extract-test', '--inspection-path', '/tmp/does-not-exist-xyz.json'],
105
+ { encoding: 'utf8' },
106
+ )
107
+ assert.equal(r.status, 2, `exit code was ${r.status}`)
108
+ assert.match(r.stderr, /not found/)
109
+ })
110
+
111
+ process.stdout.write(`\n${passed} passed, ${failed} failed\n`)
112
+ process.exit(failed === 0 ? 0 : 1)