similarbuild 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +110 -0
- package/LICENSE +21 -0
- package/README.md +301 -0
- package/bin/install.js +256 -0
- package/lib/copy-templates.mjs +52 -0
- package/lib/install-deps.mjs +62 -0
- package/lib/prompt-config.mjs +83 -0
- package/lib/verify-env.mjs +19 -0
- package/package.json +63 -0
- package/scripts/sync-templates.mjs +71 -0
- package/templates/commands/build-page.md +490 -0
- package/templates/commands/build-site.md +548 -0
- package/templates/commands/clip-section.md +519 -0
- package/templates/memory/anti-patterns.md +212 -0
- package/templates/memory/design-knowledge.md +225 -0
- package/templates/memory/fixes.md +163 -0
- package/templates/memory/patterns.md +681 -0
- package/templates/presets/shopify-section.yaml +51 -0
- package/templates/presets/wp-elementor.yaml +49 -0
- package/templates/reports/fixtures/mock-run-1.json +115 -0
- package/templates/reports/fixtures/mock-run-2.json +72 -0
- package/templates/reports/report-renderer.mjs +218 -0
- package/templates/reports/report-template.html +571 -0
- package/templates/skills/sb-build-shopify/SKILL.md +104 -0
- package/templates/skills/sb-build-shopify/references/shopify-build-rules.md +563 -0
- package/templates/skills/sb-build-shopify/scripts/build-shopify.mjs +637 -0
- package/templates/skills/sb-build-shopify/scripts/tests/test-build-shopify.mjs +424 -0
- package/templates/skills/sb-build-wp/SKILL.md +83 -0
- package/templates/skills/sb-build-wp/references/wp-build-rules.md +376 -0
- package/templates/skills/sb-build-wp/scripts/build-wp.mjs +327 -0
- package/templates/skills/sb-build-wp/scripts/tests/test-build-wp.mjs +224 -0
- package/templates/skills/sb-compare-visual/SKILL.md +121 -0
- package/templates/skills/sb-compare-visual/scripts/compare-visual.mjs +387 -0
- package/templates/skills/sb-compare-visual/scripts/lib/compare-tokens.mjs +273 -0
- package/templates/skills/sb-compare-visual/scripts/tests/test-compare-tokens.mjs +350 -0
- package/templates/skills/sb-compare-visual/scripts/tests/test-compare-visual.mjs +626 -0
- package/templates/skills/sb-crawl-and-list/SKILL.md +99 -0
- package/templates/skills/sb-crawl-and-list/scripts/crawl-and-list.mjs +437 -0
- package/templates/skills/sb-crawl-and-list/scripts/lib/blocklist-filter.mjs +176 -0
- package/templates/skills/sb-crawl-and-list/scripts/lib/fallback-crawler.mjs +107 -0
- package/templates/skills/sb-crawl-and-list/scripts/lib/page-classifier.mjs +89 -0
- package/templates/skills/sb-crawl-and-list/scripts/lib/sitemap-parser.mjs +118 -0
- package/templates/skills/sb-crawl-and-list/scripts/tests/test-blocklist-filter.mjs +204 -0
- package/templates/skills/sb-crawl-and-list/scripts/tests/test-crawl-and-list.mjs +276 -0
- package/templates/skills/sb-crawl-and-list/scripts/tests/test-fallback-crawler.mjs +243 -0
- package/templates/skills/sb-crawl-and-list/scripts/tests/test-page-classifier.mjs +120 -0
- package/templates/skills/sb-crawl-and-list/scripts/tests/test-sitemap-parser.mjs +157 -0
- package/templates/skills/sb-extract-assets/SKILL.md +112 -0
- package/templates/skills/sb-extract-assets/scripts/extract-assets.mjs +484 -0
- package/templates/skills/sb-extract-assets/scripts/tests/test-extract-assets.mjs +112 -0
- package/templates/skills/sb-inspect-live/SKILL.md +105 -0
- package/templates/skills/sb-inspect-live/scripts/inspect-live.mjs +693 -0
- package/templates/skills/sb-inspect-live/scripts/tests/test-inspect-live.mjs +181 -0
- package/templates/skills/sb-review-checks/SKILL.md +113 -0
- package/templates/skills/sb-review-checks/references/review-rules.md +195 -0
- package/templates/skills/sb-review-checks/scripts/lib/anti-patterns.mjs +379 -0
- package/templates/skills/sb-review-checks/scripts/lib/cross-reference.mjs +115 -0
- package/templates/skills/sb-review-checks/scripts/lib/design-quality.mjs +541 -0
- package/templates/skills/sb-review-checks/scripts/review-checks.mjs +250 -0
- package/templates/skills/sb-review-checks/scripts/tests/test-anti-patterns.mjs +343 -0
- package/templates/skills/sb-review-checks/scripts/tests/test-cross-reference.mjs +170 -0
- package/templates/skills/sb-review-checks/scripts/tests/test-design-quality.mjs +493 -0
- package/templates/skills/sb-review-checks/scripts/tests/test-review-checks.mjs +267 -0
- package/templates/skills/sb-tweak/SKILL.md +130 -0
- package/templates/skills/sb-tweak/references/tweak-patterns.md +157 -0
- package/templates/skills/sb-tweak/scripts/lib/diff-summarizer.mjs +140 -0
- package/templates/skills/sb-tweak/scripts/lib/element-locator.mjs +507 -0
- package/templates/skills/sb-tweak/scripts/lib/intent-parser.mjs +324 -0
- package/templates/skills/sb-tweak/scripts/tests/test-diff-summarizer.mjs +248 -0
- package/templates/skills/sb-tweak/scripts/tests/test-element-locator.mjs +418 -0
- package/templates/skills/sb-tweak/scripts/tests/test-intent-parser.mjs +496 -0
- package/templates/skills/sb-tweak/scripts/tests/test-tweak.mjs +407 -0
- package/templates/skills/sb-tweak/scripts/tweak.mjs +656 -0
- package/templates/skills/sb-validate-render/SKILL.md +120 -0
- package/templates/skills/sb-validate-render/scripts/tests/test-validate-render.mjs +304 -0
- package/templates/skills/sb-validate-render/scripts/validate-render.mjs +645 -0
|
@@ -0,0 +1,484 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// extract-assets.mjs — Download images from sb-inspect-live's imgUrls list,
|
|
3
|
+
// strip identifying metadata (EXIF/XMP/IPTC), rename via sha256 content-hash,
|
|
4
|
+
// dedupe against assets already on disk, and emit assets-map.json.
|
|
5
|
+
//
|
|
6
|
+
// Strip is the *default* sharp behaviour: we re-encode without calling
|
|
7
|
+
// .withMetadata(). .rotate() is called BEFORE the strip so EXIF orientation
|
|
8
|
+
// is baked into pixels (otherwise an upright phone photo looks sideways once
|
|
9
|
+
// the EXIF block is gone). ICC profile is preserved for color fidelity.
|
|
10
|
+
//
|
|
11
|
+
// SVGs don't have EXIF but carry editor signatures (`<metadata>`, comments,
|
|
12
|
+
// inkscape:/sodipodi: namespaces) — those are stripped via regex. For
|
|
13
|
+
// target=wp the sanitized markup is returned inline (WP blocks raw SVG
|
|
14
|
+
// uploads by default). For target=shopify SVGs are saved as files.
|
|
15
|
+
//
|
|
16
|
+
// Output: JSON to stdout AND assets-map.json in --output-dir. Logs to stderr.
|
|
17
|
+
|
|
18
|
+
import { parseArgs } from 'node:util'
|
|
19
|
+
import { mkdir, writeFile, readFile, readdir } from 'node:fs/promises'
|
|
20
|
+
import { existsSync } from 'node:fs'
|
|
21
|
+
import { join, resolve, basename, extname } from 'node:path'
|
|
22
|
+
import { createHash } from 'node:crypto'
|
|
23
|
+
|
|
24
|
+
// sharp is imported lazily inside main() so --help and arg validation
|
|
25
|
+
// work without the dep installed.
|
|
26
|
+
|
|
27
|
+
const HELP = `
|
|
28
|
+
extract-assets.mjs — Download + sanitize images for SimilarBuild visual cloning.
|
|
29
|
+
|
|
30
|
+
One of:
|
|
31
|
+
--inspection-path <file> Path to inspection.json (script reads imgUrls).
|
|
32
|
+
--img-urls <json> Inline JSON array [{url, context, alt}].
|
|
33
|
+
|
|
34
|
+
Required:
|
|
35
|
+
--output-dir <dir> Directory for sanitized assets + assets-map.json.
|
|
36
|
+
|
|
37
|
+
Optional:
|
|
38
|
+
--target <wp|shopify> Default 'wp'. Affects SVG handling (inline vs file).
|
|
39
|
+
--existing-assets-dir <d> Extra dir of already-extracted assets to dedupe against.
|
|
40
|
+
--large-warn-mb <n> Warn (stderr) if asset > N MB. Default 10.
|
|
41
|
+
--timeout <ms> Per-request fetch timeout. Default 30000.
|
|
42
|
+
--help Show this message.
|
|
43
|
+
|
|
44
|
+
Exit codes: 0=ok, 1=script error, 2=invalid args.
|
|
45
|
+
`
|
|
46
|
+
|
|
47
|
+
function fail(msg, code = 2) {
|
|
48
|
+
process.stderr.write(`[sb-extract-assets] ${msg}\n`)
|
|
49
|
+
process.exit(code)
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function log(msg) {
|
|
53
|
+
process.stderr.write(`[sb-extract-assets] ${msg}\n`)
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const { values } = parseArgs({
|
|
57
|
+
options: {
|
|
58
|
+
'inspection-path': { type: 'string' },
|
|
59
|
+
'img-urls': { type: 'string' },
|
|
60
|
+
'output-dir': { type: 'string' },
|
|
61
|
+
target: { type: 'string', default: 'wp' },
|
|
62
|
+
'existing-assets-dir': { type: 'string' },
|
|
63
|
+
'large-warn-mb': { type: 'string', default: '10' },
|
|
64
|
+
timeout: { type: 'string', default: '30000' },
|
|
65
|
+
help: { type: 'boolean', default: false },
|
|
66
|
+
},
|
|
67
|
+
strict: false,
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
if (values.help) {
|
|
71
|
+
process.stdout.write(HELP)
|
|
72
|
+
process.exit(0)
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if (!values['output-dir']) fail('missing --output-dir')
|
|
76
|
+
if (!values['inspection-path'] && !values['img-urls']) {
|
|
77
|
+
fail('missing input: pass --inspection-path or --img-urls')
|
|
78
|
+
}
|
|
79
|
+
if (values['inspection-path'] && values['img-urls']) {
|
|
80
|
+
fail('pass exactly one of --inspection-path or --img-urls, not both')
|
|
81
|
+
}
|
|
82
|
+
if (!['wp', 'shopify'].includes(values.target)) {
|
|
83
|
+
fail(`invalid --target '${values.target}' (must be 'wp' or 'shopify')`)
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const OUTPUT_DIR = resolve(values['output-dir'])
|
|
87
|
+
const TARGET = values.target
|
|
88
|
+
const EXISTING_DIR = values['existing-assets-dir'] ? resolve(values['existing-assets-dir']) : null
|
|
89
|
+
const LARGE_WARN_BYTES = parseInt(values['large-warn-mb'], 10) * 1024 * 1024
|
|
90
|
+
const TIMEOUT = parseInt(values.timeout, 10)
|
|
91
|
+
|
|
92
|
+
if (!Number.isFinite(LARGE_WARN_BYTES)) fail('--large-warn-mb must be numeric')
|
|
93
|
+
if (!Number.isFinite(TIMEOUT)) fail('--timeout must be numeric')
|
|
94
|
+
|
|
95
|
+
// Validate input source up front (before sharp import) so input errors exit
|
|
96
|
+
// with code 2 (invalid args) instead of code 1 (script error from missing dep).
|
|
97
|
+
let INPUT_URLS = null
|
|
98
|
+
if (values['img-urls']) {
|
|
99
|
+
try {
|
|
100
|
+
const arr = JSON.parse(values['img-urls'])
|
|
101
|
+
if (!Array.isArray(arr)) fail('--img-urls must be a JSON array')
|
|
102
|
+
INPUT_URLS = arr
|
|
103
|
+
} catch (err) {
|
|
104
|
+
if (err?.message?.startsWith('--img-urls')) throw err
|
|
105
|
+
fail(`--img-urls is not valid JSON: ${err.message}`)
|
|
106
|
+
}
|
|
107
|
+
} else {
|
|
108
|
+
const p = resolve(values['inspection-path'])
|
|
109
|
+
if (!existsSync(p)) fail(`inspection file not found: ${p}`)
|
|
110
|
+
// Defer parse to main() — file exists, that's enough for arg validation.
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// MIME → extension. Authoritative: drives the saved filename's extension,
|
|
114
|
+
// regardless of what the URL or Content-Disposition claims. Prevents the
|
|
115
|
+
// classic ".jpg?v=2 actually a PNG" mismatch.
|
|
116
|
+
const MIME_EXT = {
|
|
117
|
+
'image/jpeg': 'jpg',
|
|
118
|
+
'image/jpg': 'jpg',
|
|
119
|
+
'image/png': 'png',
|
|
120
|
+
'image/webp': 'webp',
|
|
121
|
+
'image/avif': 'avif',
|
|
122
|
+
'image/gif': 'gif',
|
|
123
|
+
'image/svg+xml': 'svg',
|
|
124
|
+
'image/x-icon': 'ico',
|
|
125
|
+
'image/vnd.microsoft.icon': 'ico',
|
|
126
|
+
'image/heic': 'heic',
|
|
127
|
+
'image/heif': 'heif',
|
|
128
|
+
'image/bmp': 'bmp',
|
|
129
|
+
'image/tiff': 'tiff',
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Magic-byte sniff fallback when Content-Type lies or is generic
|
|
133
|
+
// (octet-stream, text/plain). Covers the formats sharp can decode.
|
|
134
|
+
function sniffMime(buf) {
|
|
135
|
+
if (buf.length >= 12) {
|
|
136
|
+
if (buf[0] === 0xff && buf[1] === 0xd8 && buf[2] === 0xff) return 'image/jpeg'
|
|
137
|
+
if (
|
|
138
|
+
buf[0] === 0x89 &&
|
|
139
|
+
buf[1] === 0x50 &&
|
|
140
|
+
buf[2] === 0x4e &&
|
|
141
|
+
buf[3] === 0x47
|
|
142
|
+
) return 'image/png'
|
|
143
|
+
if (buf[0] === 0x47 && buf[1] === 0x49 && buf[2] === 0x46) return 'image/gif'
|
|
144
|
+
if (
|
|
145
|
+
buf[0] === 0x52 &&
|
|
146
|
+
buf[1] === 0x49 &&
|
|
147
|
+
buf[2] === 0x46 &&
|
|
148
|
+
buf[3] === 0x46 &&
|
|
149
|
+
buf[8] === 0x57 &&
|
|
150
|
+
buf[9] === 0x45 &&
|
|
151
|
+
buf[10] === 0x42 &&
|
|
152
|
+
buf[11] === 0x50
|
|
153
|
+
) return 'image/webp'
|
|
154
|
+
// AVIF: ftyp box at offset 4, brand 'avif' at 8
|
|
155
|
+
if (
|
|
156
|
+
buf[4] === 0x66 &&
|
|
157
|
+
buf[5] === 0x74 &&
|
|
158
|
+
buf[6] === 0x79 &&
|
|
159
|
+
buf[7] === 0x70 &&
|
|
160
|
+
buf[8] === 0x61 &&
|
|
161
|
+
buf[9] === 0x76 &&
|
|
162
|
+
buf[10] === 0x69 &&
|
|
163
|
+
buf[11] === 0x66
|
|
164
|
+
) return 'image/avif'
|
|
165
|
+
}
|
|
166
|
+
// SVG: text-y, contains '<svg' near the top
|
|
167
|
+
const head = buf.slice(0, 512).toString('utf8')
|
|
168
|
+
if (/<svg[\s>]/i.test(head)) return 'image/svg+xml'
|
|
169
|
+
return null
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function hashHex16(buf) {
|
|
173
|
+
return createHash('sha256').update(buf).digest('hex').slice(0, 16)
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Strip editor signatures and identifying blocks from SVG markup. Keeps the
|
|
177
|
+
// drawable content (paths, fills, viewBox) but drops:
|
|
178
|
+
// - <metadata>…</metadata> blocks (RDF/cc:license/dc:* author info)
|
|
179
|
+
// - <!-- … --> comments (often embed editor name + path on disk)
|
|
180
|
+
// - inkscape:* / sodipodi:* attributes and namespace declarations
|
|
181
|
+
// - <sodipodi:namedview>…</sodipodi:namedview> editor state
|
|
182
|
+
// - the leading <?xml …?> declaration (carries encoding + sometimes editor id)
|
|
183
|
+
function sanitizeSvg(svg) {
|
|
184
|
+
let s = String(svg)
|
|
185
|
+
s = s.replace(/<\?xml[^?]*\?>/g, '')
|
|
186
|
+
s = s.replace(/<!--[\s\S]*?-->/g, '')
|
|
187
|
+
s = s.replace(/<metadata\b[\s\S]*?<\/metadata>/gi, '')
|
|
188
|
+
s = s.replace(/<sodipodi:namedview\b[\s\S]*?<\/sodipodi:namedview>/gi, '')
|
|
189
|
+
s = s.replace(/<sodipodi:namedview\b[^/>]*\/>/gi, '')
|
|
190
|
+
s = s.replace(/\sxmlns:(inkscape|sodipodi|cc|dc|rdf|sketch)="[^"]*"/gi, '')
|
|
191
|
+
s = s.replace(/\s(inkscape|sodipodi|sketch):[\w-]+="[^"]*"/gi, '')
|
|
192
|
+
return s.trim()
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Walk a directory once and build hash → relative-path index. The skill
|
|
196
|
+
// names files <hash>.<ext> so filename === hash, but we still verify by
|
|
197
|
+
// stripping the extension. Existing assets older than this rule (renamed by
|
|
198
|
+
// hand, etc.) are ignored — they wouldn't dedupe correctly anyway.
|
|
199
|
+
async function indexExisting(dir) {
|
|
200
|
+
const map = new Map()
|
|
201
|
+
if (!dir || !existsSync(dir)) return map
|
|
202
|
+
let entries
|
|
203
|
+
try {
|
|
204
|
+
entries = await readdir(dir)
|
|
205
|
+
} catch {
|
|
206
|
+
return map
|
|
207
|
+
}
|
|
208
|
+
for (const name of entries) {
|
|
209
|
+
const m = /^([0-9a-f]{16})\.([a-z0-9]+)$/i.exec(name)
|
|
210
|
+
if (!m) continue
|
|
211
|
+
map.set(m[1], { path: join(dir, name), ext: m[2].toLowerCase() })
|
|
212
|
+
}
|
|
213
|
+
return map
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
async function fetchBuffer(url) {
|
|
217
|
+
const ac = new AbortController()
|
|
218
|
+
const t = setTimeout(() => ac.abort(), TIMEOUT)
|
|
219
|
+
try {
|
|
220
|
+
const res = await fetch(url, {
|
|
221
|
+
signal: ac.signal,
|
|
222
|
+
redirect: 'follow',
|
|
223
|
+
headers: {
|
|
224
|
+
// A neutral UA. Some CDNs 403 on default node fetch UA.
|
|
225
|
+
'user-agent':
|
|
226
|
+
'Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1',
|
|
227
|
+
accept: 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
|
|
228
|
+
},
|
|
229
|
+
})
|
|
230
|
+
if (!res.ok) {
|
|
231
|
+
return { ok: false, status: res.status, reason: res.statusText || `http-${res.status}` }
|
|
232
|
+
}
|
|
233
|
+
const ct = (res.headers.get('content-type') || '').split(';')[0].trim().toLowerCase()
|
|
234
|
+
const ab = await res.arrayBuffer()
|
|
235
|
+
return { ok: true, status: res.status, contentType: ct, buffer: Buffer.from(ab) }
|
|
236
|
+
} catch (err) {
|
|
237
|
+
return { ok: false, status: 0, reason: err?.name === 'AbortError' ? 'timeout' : (err?.message || String(err)) }
|
|
238
|
+
} finally {
|
|
239
|
+
clearTimeout(t)
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
async function loadInputUrls() {
|
|
244
|
+
if (INPUT_URLS) return INPUT_URLS
|
|
245
|
+
const p = resolve(values['inspection-path'])
|
|
246
|
+
let raw
|
|
247
|
+
try {
|
|
248
|
+
raw = await readFile(p, 'utf8')
|
|
249
|
+
} catch (err) {
|
|
250
|
+
fail(`cannot read inspection file: ${err.message}`)
|
|
251
|
+
}
|
|
252
|
+
let parsed
|
|
253
|
+
try {
|
|
254
|
+
parsed = JSON.parse(raw)
|
|
255
|
+
} catch (err) {
|
|
256
|
+
fail(`inspection file is not valid JSON: ${err.message}`)
|
|
257
|
+
}
|
|
258
|
+
if (!Array.isArray(parsed.imgUrls)) {
|
|
259
|
+
fail(`inspection file has no imgUrls[] (got ${typeof parsed.imgUrls})`)
|
|
260
|
+
}
|
|
261
|
+
return parsed.imgUrls
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
async function main() {
|
|
265
|
+
await mkdir(OUTPUT_DIR, { recursive: true })
|
|
266
|
+
|
|
267
|
+
let sharp
|
|
268
|
+
try {
|
|
269
|
+
sharp = (await import('sharp')).default
|
|
270
|
+
} catch (err) {
|
|
271
|
+
process.stderr.write(
|
|
272
|
+
`[sb-extract-assets] missing dependency: ${err?.message || err}\n` +
|
|
273
|
+
`Install with: npm i sharp\n`,
|
|
274
|
+
)
|
|
275
|
+
process.exit(1)
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
const inputs = await loadInputUrls()
|
|
279
|
+
log(`processing ${inputs.length} url(s) into ${OUTPUT_DIR} (target=${TARGET})`)
|
|
280
|
+
|
|
281
|
+
// Build hash index from output_dir (idempotent reruns + cross-page dedupe
|
|
282
|
+
// within the same project) and from --existing-assets-dir if provided.
|
|
283
|
+
const localIdx = await indexExisting(OUTPUT_DIR)
|
|
284
|
+
const externalIdx = await indexExisting(EXISTING_DIR)
|
|
285
|
+
// Merge: external first, local overrides (we prefer assets already saved
|
|
286
|
+
// in this project's output_dir).
|
|
287
|
+
const knownHashes = new Map([...externalIdx, ...localIdx])
|
|
288
|
+
|
|
289
|
+
// Dedupe input by URL — orchestrator may pass the same URL multiple times
|
|
290
|
+
// (e.g. same hero referenced from <img> and CSS background-image).
|
|
291
|
+
const byUrl = new Map()
|
|
292
|
+
for (const item of inputs) {
|
|
293
|
+
if (!item || typeof item.url !== 'string') continue
|
|
294
|
+
if (!byUrl.has(item.url)) byUrl.set(item.url, item)
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
const assets = {}
|
|
298
|
+
const reuseReport = []
|
|
299
|
+
const failed = []
|
|
300
|
+
|
|
301
|
+
for (const item of byUrl.values()) {
|
|
302
|
+
const url = item.url
|
|
303
|
+
const context = item.context || ''
|
|
304
|
+
const alt = item.alt || ''
|
|
305
|
+
|
|
306
|
+
// Skip data: URIs — they're already inline and have no metadata to strip
|
|
307
|
+
// beyond what the source HTML already exposed. Pass through as failed
|
|
308
|
+
// with a clear reason so the orchestrator can decide to keep them inline.
|
|
309
|
+
if (url.startsWith('data:')) {
|
|
310
|
+
failed.push({ url, status: 0, reason: 'data-uri-skipped' })
|
|
311
|
+
continue
|
|
312
|
+
}
|
|
313
|
+
if (!/^https?:\/\//i.test(url)) {
|
|
314
|
+
failed.push({ url, status: 0, reason: 'unsupported-scheme' })
|
|
315
|
+
continue
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
const fetched = await fetchBuffer(url)
|
|
319
|
+
if (!fetched.ok) {
|
|
320
|
+
log(`✗ ${url} — ${fetched.status} ${fetched.reason}`)
|
|
321
|
+
failed.push({ url, status: fetched.status, reason: fetched.reason })
|
|
322
|
+
continue
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
let mime = MIME_EXT[fetched.contentType] ? fetched.contentType : sniffMime(fetched.buffer)
|
|
326
|
+
if (!mime || !MIME_EXT[mime]) {
|
|
327
|
+
// Fallback: trust the URL's extension if it matches something we know
|
|
328
|
+
const urlExt = (extname(new URL(url).pathname) || '').slice(1).toLowerCase()
|
|
329
|
+
const guessedMime = Object.entries(MIME_EXT).find(([, e]) => e === urlExt)?.[0]
|
|
330
|
+
if (guessedMime) mime = guessedMime
|
|
331
|
+
}
|
|
332
|
+
if (!mime || !MIME_EXT[mime]) {
|
|
333
|
+
failed.push({ url, status: fetched.status, reason: `unsupported-mime:${fetched.contentType || 'unknown'}` })
|
|
334
|
+
continue
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
if (fetched.buffer.length > LARGE_WARN_BYTES) {
|
|
338
|
+
log(`⚠ ${url} is ${(fetched.buffer.length / 1024 / 1024).toFixed(1)}MB — consider optimizing`)
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
const originalFilename = basename(new URL(url).pathname) || 'asset'
|
|
342
|
+
|
|
343
|
+
// ---- SVG path ----
|
|
344
|
+
if (mime === 'image/svg+xml') {
|
|
345
|
+
const cleaned = sanitizeSvg(fetched.buffer.toString('utf8'))
|
|
346
|
+
const cleanedBuf = Buffer.from(cleaned, 'utf8')
|
|
347
|
+
const hash = hashHex16(cleanedBuf)
|
|
348
|
+
|
|
349
|
+
if (TARGET === 'wp') {
|
|
350
|
+
// Inline mode: emit the sanitized markup directly. No file written.
|
|
351
|
+
// The orchestrator's WP builder splices `assets[url].inline` into HTML.
|
|
352
|
+
assets[url] = {
|
|
353
|
+
inline: cleaned,
|
|
354
|
+
hash,
|
|
355
|
+
ext: 'svg',
|
|
356
|
+
bytes: cleanedBuf.length,
|
|
357
|
+
originalFilename,
|
|
358
|
+
strippedMetadata: true,
|
|
359
|
+
context,
|
|
360
|
+
alt,
|
|
361
|
+
}
|
|
362
|
+
log(`✓ ${url} — svg inline (wp), ${cleanedBuf.length}B`)
|
|
363
|
+
continue
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// target=shopify: save as file, dedupe by hash like rasters
|
|
367
|
+
const known = knownHashes.get(hash)
|
|
368
|
+
if (known) {
|
|
369
|
+
assets[url] = {
|
|
370
|
+
localPath: known.path,
|
|
371
|
+
hash,
|
|
372
|
+
ext: known.ext,
|
|
373
|
+
bytes: cleanedBuf.length,
|
|
374
|
+
originalFilename,
|
|
375
|
+
strippedMetadata: true,
|
|
376
|
+
reusedFrom: known.path,
|
|
377
|
+
context,
|
|
378
|
+
alt,
|
|
379
|
+
}
|
|
380
|
+
reuseReport.push({ url, reusedFrom: known.path, reason: 'identical-content-hash' })
|
|
381
|
+
log(`↻ ${url} — reused ${basename(known.path)}`)
|
|
382
|
+
continue
|
|
383
|
+
}
|
|
384
|
+
const localPath = join(OUTPUT_DIR, `${hash}.svg`)
|
|
385
|
+
await writeFile(localPath, cleanedBuf)
|
|
386
|
+
knownHashes.set(hash, { path: localPath, ext: 'svg' })
|
|
387
|
+
assets[url] = {
|
|
388
|
+
localPath,
|
|
389
|
+
hash,
|
|
390
|
+
ext: 'svg',
|
|
391
|
+
bytes: cleanedBuf.length,
|
|
392
|
+
originalFilename,
|
|
393
|
+
strippedMetadata: true,
|
|
394
|
+
reusedFrom: null,
|
|
395
|
+
context,
|
|
396
|
+
alt,
|
|
397
|
+
}
|
|
398
|
+
log(`✓ ${url} — svg saved ${hash}.svg`)
|
|
399
|
+
continue
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// ---- Raster path (sharp re-encode) ----
|
|
403
|
+
const ext = MIME_EXT[mime]
|
|
404
|
+
let stripped
|
|
405
|
+
let strippedMetadata = true
|
|
406
|
+
try {
|
|
407
|
+
// .rotate() bakes EXIF orientation into pixels BEFORE we drop the
|
|
408
|
+
// EXIF block. Without this an iPhone portrait photo would render
|
|
409
|
+
// sideways once metadata is gone.
|
|
410
|
+
// toFormat(<ext>) re-encodes; sharp does NOT preserve metadata by
|
|
411
|
+
// default — that's the strip. We do NOT call .withMetadata().
|
|
412
|
+
// .keepIccProfile() (sharp ≥ 0.33) preserves color management for
|
|
413
|
+
// calibrated screens. Older sharps fall back gracefully because the
|
|
414
|
+
// method-chain returns the instance.
|
|
415
|
+
const sharpFormat = ext === 'jpg' ? 'jpeg' : ext
|
|
416
|
+
let pipeline = sharp(fetched.buffer).rotate()
|
|
417
|
+
if (typeof pipeline.keepIccProfile === 'function') {
|
|
418
|
+
pipeline = pipeline.keepIccProfile()
|
|
419
|
+
}
|
|
420
|
+
stripped = await pipeline.toFormat(sharpFormat).toBuffer()
|
|
421
|
+
} catch (err) {
|
|
422
|
+
// Sharp can't decode (corrupt, unsupported variant, ICO, etc.) — save
|
|
423
|
+
// raw bytes and flag. Better to ship a possibly-leaky asset than to
|
|
424
|
+
// black-hole it; the orchestrator can decide whether to drop it.
|
|
425
|
+
log(`! ${url} — sharp decode failed (${err.message}), saving raw`)
|
|
426
|
+
stripped = fetched.buffer
|
|
427
|
+
strippedMetadata = false
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
const hash = hashHex16(stripped)
|
|
431
|
+
const known = knownHashes.get(hash)
|
|
432
|
+
if (known) {
|
|
433
|
+
assets[url] = {
|
|
434
|
+
localPath: known.path,
|
|
435
|
+
hash,
|
|
436
|
+
ext: known.ext,
|
|
437
|
+
bytes: stripped.length,
|
|
438
|
+
originalFilename,
|
|
439
|
+
strippedMetadata,
|
|
440
|
+
reusedFrom: known.path,
|
|
441
|
+
context,
|
|
442
|
+
alt,
|
|
443
|
+
}
|
|
444
|
+
reuseReport.push({ url, reusedFrom: known.path, reason: 'identical-content-hash' })
|
|
445
|
+
log(`↻ ${url} — reused ${basename(known.path)}`)
|
|
446
|
+
continue
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
const localPath = join(OUTPUT_DIR, `${hash}.${ext}`)
|
|
450
|
+
await writeFile(localPath, stripped)
|
|
451
|
+
knownHashes.set(hash, { path: localPath, ext })
|
|
452
|
+
assets[url] = {
|
|
453
|
+
localPath,
|
|
454
|
+
hash,
|
|
455
|
+
ext,
|
|
456
|
+
bytes: stripped.length,
|
|
457
|
+
originalFilename,
|
|
458
|
+
strippedMetadata,
|
|
459
|
+
reusedFrom: null,
|
|
460
|
+
context,
|
|
461
|
+
alt,
|
|
462
|
+
}
|
|
463
|
+
log(`✓ ${url} — saved ${hash}.${ext} (${stripped.length}B${strippedMetadata ? '' : ', RAW'})`)
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
const result = {
|
|
467
|
+
target: TARGET,
|
|
468
|
+
outputDir: OUTPUT_DIR,
|
|
469
|
+
assets,
|
|
470
|
+
reuseReport,
|
|
471
|
+
failed,
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
const mapPath = join(OUTPUT_DIR, 'assets-map.json')
|
|
475
|
+
await writeFile(mapPath, JSON.stringify(result, null, 2))
|
|
476
|
+
log(`wrote ${mapPath}`)
|
|
477
|
+
|
|
478
|
+
process.stdout.write(JSON.stringify(result))
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
main().catch((err) => {
|
|
482
|
+
process.stderr.write(`[sb-extract-assets] fatal: ${err?.stack ? err.stack : err}\n`)
|
|
483
|
+
process.exit(1)
|
|
484
|
+
})
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// test-extract-assets.mjs — Smoke tests that don't require sharp or network.
|
|
3
|
+
// Validates: --help works, missing/invalid args fail with exit code 2, syntax loads.
|
|
4
|
+
// Network + sharp integration tests belong in a separate harness.
|
|
5
|
+
|
|
6
|
+
import { spawnSync } from 'node:child_process'
|
|
7
|
+
import { fileURLToPath } from 'node:url'
|
|
8
|
+
import { dirname, resolve } from 'node:path'
|
|
9
|
+
import { strict as assert } from 'node:assert'
|
|
10
|
+
|
|
11
|
+
const here = dirname(fileURLToPath(import.meta.url))
|
|
12
|
+
const SCRIPT = resolve(here, '..', 'extract-assets.mjs')
|
|
13
|
+
|
|
14
|
+
let passed = 0
|
|
15
|
+
let failed = 0
|
|
16
|
+
|
|
17
|
+
function test(name, fn) {
|
|
18
|
+
try {
|
|
19
|
+
fn()
|
|
20
|
+
process.stdout.write(`ok - ${name}\n`)
|
|
21
|
+
passed++
|
|
22
|
+
} catch (err) {
|
|
23
|
+
process.stdout.write(`not ok - ${name}\n ${err.message}\n`)
|
|
24
|
+
failed++
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
test('--help exits 0 and prints usage', () => {
|
|
29
|
+
const r = spawnSync('node', [SCRIPT, '--help'], { encoding: 'utf8' })
|
|
30
|
+
assert.equal(r.status, 0, `exit code was ${r.status}`)
|
|
31
|
+
assert.match(r.stdout, /extract-assets\.mjs/)
|
|
32
|
+
assert.match(r.stdout, /--inspection-path/)
|
|
33
|
+
assert.match(r.stdout, /--img-urls/)
|
|
34
|
+
assert.match(r.stdout, /--output-dir/)
|
|
35
|
+
assert.match(r.stdout, /--target/)
|
|
36
|
+
assert.match(r.stdout, /--existing-assets-dir/)
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
test('missing --output-dir exits 2', () => {
|
|
40
|
+
const r = spawnSync('node', [SCRIPT, '--img-urls', '[]'], { encoding: 'utf8' })
|
|
41
|
+
assert.equal(r.status, 2, `exit code was ${r.status}`)
|
|
42
|
+
assert.match(r.stderr, /missing --output-dir/)
|
|
43
|
+
})
|
|
44
|
+
|
|
45
|
+
test('missing input source exits 2', () => {
|
|
46
|
+
const r = spawnSync('node', [SCRIPT, '--output-dir', '/tmp/sb-extract-test'], { encoding: 'utf8' })
|
|
47
|
+
assert.equal(r.status, 2, `exit code was ${r.status}`)
|
|
48
|
+
assert.match(r.stderr, /missing input/)
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
test('both --inspection-path and --img-urls exits 2', () => {
|
|
52
|
+
const r = spawnSync(
|
|
53
|
+
'node',
|
|
54
|
+
[SCRIPT, '--output-dir', '/tmp/sb-extract-test', '--img-urls', '[]', '--inspection-path', '/tmp/x.json'],
|
|
55
|
+
{ encoding: 'utf8' },
|
|
56
|
+
)
|
|
57
|
+
assert.equal(r.status, 2, `exit code was ${r.status}`)
|
|
58
|
+
assert.match(r.stderr, /exactly one/)
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
test('invalid --target exits 2', () => {
|
|
62
|
+
const r = spawnSync(
|
|
63
|
+
'node',
|
|
64
|
+
[SCRIPT, '--output-dir', '/tmp/sb-extract-test', '--img-urls', '[]', '--target', 'wix'],
|
|
65
|
+
{ encoding: 'utf8' },
|
|
66
|
+
)
|
|
67
|
+
assert.equal(r.status, 2, `exit code was ${r.status}`)
|
|
68
|
+
assert.match(r.stderr, /invalid --target/)
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
test('non-numeric --large-warn-mb exits 2', () => {
|
|
72
|
+
const r = spawnSync(
|
|
73
|
+
'node',
|
|
74
|
+
[SCRIPT, '--output-dir', '/tmp/sb-extract-test', '--img-urls', '[]', '--large-warn-mb', 'foo'],
|
|
75
|
+
{ encoding: 'utf8' },
|
|
76
|
+
)
|
|
77
|
+
assert.equal(r.status, 2, `exit code was ${r.status}`)
|
|
78
|
+
assert.match(r.stderr, /numeric/)
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
test('malformed --img-urls JSON exits 2', () => {
|
|
82
|
+
const r = spawnSync(
|
|
83
|
+
'node',
|
|
84
|
+
[SCRIPT, '--output-dir', '/tmp/sb-extract-test', '--img-urls', '{not-json'],
|
|
85
|
+
{ encoding: 'utf8' },
|
|
86
|
+
)
|
|
87
|
+
assert.equal(r.status, 2, `exit code was ${r.status}`)
|
|
88
|
+
assert.match(r.stderr, /not valid JSON/)
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
test('non-array --img-urls exits 2', () => {
|
|
92
|
+
const r = spawnSync(
|
|
93
|
+
'node',
|
|
94
|
+
[SCRIPT, '--output-dir', '/tmp/sb-extract-test', '--img-urls', '{"foo":"bar"}'],
|
|
95
|
+
{ encoding: 'utf8' },
|
|
96
|
+
)
|
|
97
|
+
assert.equal(r.status, 2, `exit code was ${r.status}`)
|
|
98
|
+
assert.match(r.stderr, /must be a JSON array|not valid JSON/)
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
test('missing inspection file exits 2', () => {
|
|
102
|
+
const r = spawnSync(
|
|
103
|
+
'node',
|
|
104
|
+
[SCRIPT, '--output-dir', '/tmp/sb-extract-test', '--inspection-path', '/tmp/does-not-exist-xyz.json'],
|
|
105
|
+
{ encoding: 'utf8' },
|
|
106
|
+
)
|
|
107
|
+
assert.equal(r.status, 2, `exit code was ${r.status}`)
|
|
108
|
+
assert.match(r.stderr, /not found/)
|
|
109
|
+
})
|
|
110
|
+
|
|
111
|
+
process.stdout.write(`\n${passed} passed, ${failed} failed\n`)
|
|
112
|
+
process.exit(failed === 0 ? 0 : 1)
|