similarbuild 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "similarbuild",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.2",
|
|
4
4
|
"description": "Visual migration framework for Claude Code — clone a live page, get a paste-ready WordPress/Elementor or Shopify section file, validated and auto-corrected.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -29,7 +29,7 @@ Optional:
|
|
|
29
29
|
--wait-strategy <name> lazy-load (default) | auto | kaching-bundles | judge-me
|
|
30
30
|
--max-depth <n> DOM walk max depth (default 8).
|
|
31
31
|
--max-children <n> Max children kept per node (default 60).
|
|
32
|
-
--max-text <n> Max chars of direct text per node (default
|
|
32
|
+
--max-text <n> Max chars of direct text per node (default 0 = no truncation; pass a positive integer to cap).
|
|
33
33
|
--timeout <ms> Per-step timeout (default 30000).
|
|
34
34
|
--help Show this message.
|
|
35
35
|
|
|
@@ -55,7 +55,7 @@ const { values } = parseArgs({
|
|
|
55
55
|
'output-dir': { type: 'string' },
|
|
56
56
|
'max-depth': { type: 'string', default: '8' },
|
|
57
57
|
'max-children': { type: 'string', default: '60' },
|
|
58
|
-
'max-text': { type: 'string', default: '
|
|
58
|
+
'max-text': { type: 'string', default: '0' },
|
|
59
59
|
timeout: { type: 'string', default: '30000' },
|
|
60
60
|
help: { type: 'boolean', default: false },
|
|
61
61
|
},
|
|
@@ -960,6 +960,12 @@ function extractInPage({ selector, maxDepth, maxChildren, maxText }) {
|
|
|
960
960
|
// iframes (Klaviyo embeds, recaptcha) are recorded as opaque rectangles.
|
|
961
961
|
let shadowDOMTraversed = false
|
|
962
962
|
let shadowRootCount = 0
|
|
963
|
+
// §V03-C — counts hosts whose shadow tree was successfully re-serialized
|
|
964
|
+
// via getHTML+parseHTMLUnsafe and re-walked into a flattened light-DOM
|
|
965
|
+
// representation. 0 means the post-render shadow-flatten phase was either
|
|
966
|
+
// unavailable, skipped, or hit zero open roots.
|
|
967
|
+
let shadowSerializedHostCount = 0
|
|
968
|
+
const warnings = []
|
|
963
969
|
const externalIframes = []
|
|
964
970
|
function classifyIframePurpose(src) {
|
|
965
971
|
if (!src) return null
|
|
@@ -1091,7 +1097,10 @@ function extractInPage({ selector, maxDepth, maxChildren, maxText }) {
|
|
|
1091
1097
|
if (n.nodeType === 3) t += n.nodeValue
|
|
1092
1098
|
}
|
|
1093
1099
|
t = t.replace(/\s+/g, ' ').trim()
|
|
1094
|
-
|
|
1100
|
+
// §V03-B — maxText === 0 means "no truncation" (default since v0.3.0).
|
|
1101
|
+
// Pre-v0.3.0 default was 240, which silently clipped policy paragraphs
|
|
1102
|
+
// (privacy/terms) at ~25-36% coverage. The cap is now opt-in via flag.
|
|
1103
|
+
if (maxText > 0 && t.length > maxText) t = `${t.slice(0, maxText)}…`
|
|
1095
1104
|
return t
|
|
1096
1105
|
}
|
|
1097
1106
|
|
|
@@ -1406,15 +1415,138 @@ function extractInPage({ selector, maxDepth, maxChildren, maxText }) {
|
|
|
1406
1415
|
const dom = [walk(root, 0)].filter(Boolean)
|
|
1407
1416
|
const { sectionType, sectionBoundingBox } = findSectionAndBox(root, !!selector)
|
|
1408
1417
|
|
|
1418
|
+
// §V03-C — Shadow DOM serialization via getHTML + parseHTMLUnsafe.
|
|
1419
|
+
// walk() above only sees light DOM and `.shadowRoot.children` direct.
|
|
1420
|
+
// Custom elements whose shadow tree is populated by JS (Shopify
|
|
1421
|
+
// <x-product-form>, <price-list>, <variant-radios>, <store-footer-menu>)
|
|
1422
|
+
// expose content that the children-only walker can technically read,
|
|
1423
|
+
// but `<slot>`-projected content and content composed from declarative
|
|
1424
|
+
// shadow DOM gets fragmented. getHTML({ serializableShadowRoots: true })
|
|
1425
|
+
// emits a single HTML string with `<template shadowrootmode="open">`
|
|
1426
|
+
// declarations inline; parseHTMLUnsafe re-attaches those as live shadow
|
|
1427
|
+
// roots in a parsed document, which the same walk() can then flatten
|
|
1428
|
+
// uniformly. Layout (bbox/computedStyle) on the parsed doc is detached
|
|
1429
|
+
// and returns UA defaults — that's an acknowledged trade-off; the
|
|
1430
|
+
// structural content (tags, classes, attrs, text, src) is what makes
|
|
1431
|
+
// PDP gallery/price/variants visible to the composer downstream.
|
|
1432
|
+
const originalShadowRootCount = shadowRootCount
|
|
1433
|
+
const originalShadowDOMTraversed = shadowDOMTraversed
|
|
1434
|
+
// §V03-C — preserve a snapshot of the live-walker dom[] before any
|
|
1435
|
+
// potential substitution. If the re-walk replaces dom[] with the
|
|
1436
|
+
// shadow-flattened tree (which carries detached parsedDoc bboxes
|
|
1437
|
+
// === {0,0,0,0}), downstream consumers that need real layout (e.g.
|
|
1438
|
+
// /build-site Step 3.5e --crop-live-bbox for header/footer compare)
|
|
1439
|
+
// can fall back to domLive. When substitution doesn't fire, domLive
|
|
1440
|
+
// is left null and consumers use dom[] as-is.
|
|
1441
|
+
let domLive = null
|
|
1442
|
+
try {
|
|
1443
|
+
if (typeof document.documentElement.getHTML === 'function' &&
|
|
1444
|
+
typeof Document.parseHTMLUnsafe === 'function') {
|
|
1445
|
+
const hostsSeen = new Set()
|
|
1446
|
+
const hostsCollected = []
|
|
1447
|
+
function collectHostsFrom(el) {
|
|
1448
|
+
if (!el) return
|
|
1449
|
+
if (el.shadowRoot && !hostsSeen.has(el)) {
|
|
1450
|
+
hostsSeen.add(el)
|
|
1451
|
+
hostsCollected.push(el)
|
|
1452
|
+
for (const child of el.shadowRoot.children) collectHostsFrom(child)
|
|
1453
|
+
}
|
|
1454
|
+
for (const child of el.children) collectHostsFrom(child)
|
|
1455
|
+
}
|
|
1456
|
+
collectHostsFrom(document.documentElement)
|
|
1457
|
+
|
|
1458
|
+
if (hostsCollected.length > 0) {
|
|
1459
|
+
const html = document.documentElement.getHTML({
|
|
1460
|
+
serializableShadowRoots: true,
|
|
1461
|
+
shadowRoots: hostsCollected.map((h) => h.shadowRoot),
|
|
1462
|
+
})
|
|
1463
|
+
const parsedDoc = Document.parseHTMLUnsafe(html)
|
|
1464
|
+
const parsedRoot = selector
|
|
1465
|
+
? parsedDoc.querySelector(selector)
|
|
1466
|
+
: parsedDoc.body
|
|
1467
|
+
if (parsedRoot) {
|
|
1468
|
+
const flattened = walk(parsedRoot, 0)
|
|
1469
|
+
// §V03-C safety guard: only substitute the live dom[] if the
|
|
1470
|
+
// re-walk did not lose value on EITHER axis (nodes or aggregate
|
|
1471
|
+
// text chars). parseHTMLUnsafe returns a detached doc;
|
|
1472
|
+
// getComputedStyle/getBoundingClientRect degrade to UA defaults
|
|
1473
|
+
// (zero bbox, empty computed). On pages where shadow flattening
|
|
1474
|
+
// adds value (PDPs Shopify with populated custom elements) the
|
|
1475
|
+
// gain is large; on pages without that workload (policies,
|
|
1476
|
+
// plain HTML), the re-walk can lose content because hydrated
|
|
1477
|
+
// shadow content visible to the live walker doesn't reproduce
|
|
1478
|
+
// on the detached tree. When that happens, keep the live
|
|
1479
|
+
// walker result and surface a warning.
|
|
1480
|
+
function measureTree(arr) {
|
|
1481
|
+
let nodes = 0
|
|
1482
|
+
let textChars = 0
|
|
1483
|
+
const stack = Array.isArray(arr) ? [...arr] : [arr]
|
|
1484
|
+
while (stack.length) {
|
|
1485
|
+
const node = stack.pop()
|
|
1486
|
+
if (!node || typeof node !== 'object') continue
|
|
1487
|
+
nodes++
|
|
1488
|
+
if (typeof node.text === 'string') textChars += node.text.length
|
|
1489
|
+
if (Array.isArray(node.children)) {
|
|
1490
|
+
for (const c of node.children) stack.push(c)
|
|
1491
|
+
}
|
|
1492
|
+
}
|
|
1493
|
+
return { nodes, textChars }
|
|
1494
|
+
}
|
|
1495
|
+
if (flattened) {
|
|
1496
|
+
const orig = measureTree(dom)
|
|
1497
|
+
const flat = measureTree([flattened])
|
|
1498
|
+
if (flat.nodes >= orig.nodes && flat.textChars >= orig.textChars) {
|
|
1499
|
+
// Snapshot the live walker result BEFORE substitution so
|
|
1500
|
+
// downstream consumers that depend on real layout (bbox
|
|
1501
|
+
// values are zero on the parsed detached doc) can fall
|
|
1502
|
+
// back when needed — see §V03-C domLive comment above.
|
|
1503
|
+
domLive = dom.length === 1 ? dom[0] : [...dom]
|
|
1504
|
+
dom.length = 0
|
|
1505
|
+
dom.push(flattened)
|
|
1506
|
+
shadowSerializedHostCount = hostsCollected.length
|
|
1507
|
+
} else {
|
|
1508
|
+
warnings.push({
|
|
1509
|
+
code: 'shadow-flatten-skipped-lossy',
|
|
1510
|
+
message: `re-walk lossy: nodes ${flat.nodes} vs ${orig.nodes}, textChars ${flat.textChars} vs ${orig.textChars}; keeping live walker result`,
|
|
1511
|
+
})
|
|
1512
|
+
}
|
|
1513
|
+
}
|
|
1514
|
+
// The re-walk of the parsed (detached) doc re-discovers shadow
|
|
1515
|
+
// roots that parseHTMLUnsafe re-attached, so it would
|
|
1516
|
+
// double-count if we let those increments leak. Restore the
|
|
1517
|
+
// canonical live counts here.
|
|
1518
|
+
shadowRootCount = originalShadowRootCount
|
|
1519
|
+
shadowDOMTraversed = originalShadowDOMTraversed
|
|
1520
|
+
}
|
|
1521
|
+
}
|
|
1522
|
+
} else {
|
|
1523
|
+
warnings.push({
|
|
1524
|
+
code: 'shadow-serialize-unavailable',
|
|
1525
|
+
message:
|
|
1526
|
+
'getHTML or Document.parseHTMLUnsafe not available; shadow DOM falls back to .shadowRoot.children walk (v0.2.x behavior)',
|
|
1527
|
+
})
|
|
1528
|
+
}
|
|
1529
|
+
} catch (err) {
|
|
1530
|
+
shadowRootCount = originalShadowRootCount
|
|
1531
|
+
shadowDOMTraversed = originalShadowDOMTraversed
|
|
1532
|
+
warnings.push({
|
|
1533
|
+
code: 'shadow-serialize-failed',
|
|
1534
|
+
message: String(err && err.message ? err.message : err),
|
|
1535
|
+
})
|
|
1536
|
+
}
|
|
1537
|
+
|
|
1409
1538
|
return {
|
|
1410
1539
|
sectionType,
|
|
1411
1540
|
sectionBoundingBox,
|
|
1412
1541
|
tokens,
|
|
1413
1542
|
dom,
|
|
1543
|
+
domLive,
|
|
1414
1544
|
pseudoElements,
|
|
1415
1545
|
imgUrls,
|
|
1416
1546
|
shadowDOMTraversed,
|
|
1417
1547
|
shadowRootCount,
|
|
1548
|
+
shadowSerializedHostCount,
|
|
1549
|
+
warnings,
|
|
1418
1550
|
externalIframes,
|
|
1419
1551
|
}
|
|
1420
1552
|
}
|
|
@@ -37,6 +37,24 @@ test('--help exits 0 and prints usage', () => {
|
|
|
37
37
|
assert.match(r.stdout, /--wait-strategy/)
|
|
38
38
|
})
|
|
39
39
|
|
|
40
|
+
// §V03-B — V0.3.0 changed --max-text default from 240 to 0 (no truncation).
|
|
41
|
+
// This regression-protects the policy-page coverage fix.
|
|
42
|
+
test('--help documents --max-text default 0 (no truncation)', () => {
|
|
43
|
+
const r = spawnSync('node', [SCRIPT, '--help'], { encoding: 'utf8' })
|
|
44
|
+
assert.equal(r.status, 0, `exit code was ${r.status}`)
|
|
45
|
+
assert.match(r.stdout, /--max-text/, '--max-text flag must appear in help')
|
|
46
|
+
// Scope the "default 0" assertion to the --max-text line specifically.
|
|
47
|
+
// The previous /default\s+0/ would match against any other flag whose
|
|
48
|
+
// description happens to contain "default 0" (e.g. a future flag whose
|
|
49
|
+
// default is "0ms" or similar).
|
|
50
|
+
assert.match(
|
|
51
|
+
r.stdout,
|
|
52
|
+
/--max-text[^\n]*default\s+0/,
|
|
53
|
+
'--max-text line must document default 0',
|
|
54
|
+
)
|
|
55
|
+
assert.match(r.stdout, /no truncation/i, 'help must explain semantics')
|
|
56
|
+
})
|
|
57
|
+
|
|
40
58
|
test('missing --url exits 2', () => {
|
|
41
59
|
const r = spawnSync('node', [SCRIPT, '--output-dir', '/tmp/sb-test'], { encoding: 'utf8' })
|
|
42
60
|
assert.equal(r.status, 2, `exit code was ${r.status}`)
|