webcake-landing-mcp 1.0.67 → 1.0.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,11 @@
1
1
  [
2
+ {
3
+ "v": "1.0.68",
4
+ "d": "12/06/2026",
5
+ "type": "Added",
6
+ "en": "ingest_html and ingest_url now auto-detect absolute-canvas builder exports (LadiPage-family pages and Webcake-published HTML): bare positioned-div…",
7
+ "vi": "ingest_html và ingest_url nay tự động phát hiện các bản export từ builder absolute-canvas (trang LadiPage-family và HTML đã publish của Webcake):…"
8
+ },
2
9
  {
3
10
  "v": "1.0.67",
4
11
  "d": "12/06/2026",
@@ -33,12 +40,5 @@
33
40
  "type": "Added",
34
41
  "en": "create_page now auto-publishes after a successful create: builds the rendered app via the build host and calls the editor's publish_html route so…",
35
42
  "vi": "create_page nay tự động publish sau khi tạo thành công: build rendered app qua build host rồi gọi route publish_html của editor để preview trang mới…"
36
- },
37
- {
38
- "v": "1.0.62",
39
- "d": "11/06/2026",
40
- "type": "Added",
41
- "en": "validate_page now warns when a text-block's estimated rendered height overflows onto a sibling element placed directly below its declared box; the…",
42
- "vi": "validate_page nay cảnh báo khi chiều cao render ước tính của text-block tràn xuống phần tử anh em đặt ngay phía dưới khung khai báo; cảnh báo nêu…"
43
43
  }
44
44
  ]
@@ -23,8 +23,10 @@
23
23
  * Commit path: update_page({draft_id, dry_run:false}) or
24
24
  * patch_page({draft_id, patches?, dry_run:false}).
25
25
  *
26
- * Bounded + TTL'd; a lost draft (process restart, eviction, expiry) just means the
27
- * model falls back to re-sending the full source never a failure.
26
+ * Bounded + TTL'd (SLIDING: every get/update refreshes the clock, so a draft being
27
+ * actively worked on never expires mid-workflow); a lost draft (process restart,
28
+ * eviction, expiry) just means the model falls back to re-sending the full source —
29
+ * never a failure.
28
30
  * Process-global, but draft_ids are random/unguessable AND persisting still uses the
29
31
  * CALLER's own creds, so a draft only ever yields a page in the caller's account.
30
32
  */
@@ -71,10 +73,14 @@ export function updateDraft(id, source) {
71
73
  d.created = Date.now();
72
74
  }
73
75
  }
74
- /** Fetch a live (non-expired) draft, or null if missing/expired. */
76
+ /** Fetch a live (non-expired) draft, or null if missing/expired. Refreshes the TTL (sliding expiration) so an in-progress workflow never loses its draft. */
75
77
  export function getDraft(id) {
76
- sweep(Date.now());
77
- return store.get(id) ?? null;
78
+ const now = Date.now();
79
+ sweep(now);
80
+ const d = store.get(id);
81
+ if (d)
82
+ d.created = now;
83
+ return d ?? null;
78
84
  }
79
85
  export function deleteDraft(id) {
80
86
  store.delete(id);
@@ -133,11 +133,16 @@ function extractGradients(stylesheets) {
133
133
  }
134
134
  return [...seen];
135
135
  }
136
- // ─── main parse entry point ──────────────────────────────────────────────────
137
- export function parseHtml(html, detail = "compact") {
136
+ export function parseHtml(html, detail = "compact", opts = {}) {
138
137
  if (!html || typeof html !== "string" || html.trim().length === 0) {
139
138
  return { sections: [], warnings: ["empty input"] };
140
139
  }
140
+ const warnings = [];
141
+ const repaired = fixMojibake(html);
142
+ if (repaired) {
143
+ html = repaired;
144
+ warnings.push("text encoding repaired (UTF-8 bytes were mis-decoded as Latin-1 mojibake)");
145
+ }
141
146
  // Stylesheet extraction (fast, regex-level, done on raw HTML before DOM parse).
142
147
  const styleBlocks = extractStyleBlocks(html);
143
148
  const googleFonts = extractGoogleFonts(html);
@@ -150,6 +155,29 @@ export function parseHtml(html, detail = "compact") {
150
155
  const body = root.querySelector("body") ?? root;
151
156
  if (!body)
152
157
  return { title, description, og_image, language, sections: [], warnings: ["no <body>"] };
158
+ // Absolute-canvas builders (LadiPage-family exports / Webcake-published pages):
159
+ // the body is bare positioned divs — ALL layout lives in per-id stylesheet
160
+ // rules — so role classification sees nothing useful, but the geometry is
161
+ // machine-readable, and the source canvas widths (mobile 420 / desktop 960)
162
+ // match the Webcake canvas. Return a `canvas` payload that transfers 1:1.
163
+ const canvas = parseAbsoluteCanvas(html, root, styleBlocks, opts.sections);
164
+ if (canvas) {
165
+ const hints = brandHints(body, styleBlocks, googleFonts);
166
+ const bg = [...new Set(hints.background_images.map(stripCdnSizePrefix))];
167
+ return {
168
+ title,
169
+ description,
170
+ og_image,
171
+ language,
172
+ sections: canvasRoleSections(canvas),
173
+ canvas,
174
+ colors: hints.colors.length ? hints.colors : undefined,
175
+ fonts: hints.fonts.length ? hints.fonts : undefined,
176
+ palette: hints.palette,
177
+ background_images: bg.length ? bg : undefined,
178
+ warnings: warnings.length ? warnings : undefined,
179
+ };
180
+ }
153
181
  // CSR heuristic — empty body usually means React/Vue/Next that hasn't rendered.
154
182
  const bodyText = body.textContent.trim();
155
183
  if (bodyText.length < 50) {
@@ -175,35 +203,19 @@ export function parseHtml(html, detail = "compact") {
175
203
  }
176
204
  return sec;
177
205
  });
178
- // Brand hints from inline styles (both modes).
179
- const styleAttrs = [];
180
- body.querySelectorAll("[style]").forEach((el) => {
181
- const s = el.getAttribute("style");
182
- if (s)
183
- styleAttrs.push(s);
184
- });
185
- // Merge stylesheet + inline colors/fonts.
186
- const ssColors = extractStylesheetColors(styleBlocks);
187
- const inlineColors = topColors(styleAttrs, 20);
188
- const mergedColors = mergeTopN([...ssColors, ...inlineColors], 5);
189
- const ssFonts = extractStylesheetFonts(styleBlocks);
190
- const inlineFonts = topFonts(styleAttrs, 10);
191
- const mergedFonts = mergeTopNFonts([...googleFonts, ...ssFonts, ...inlineFonts], 4);
192
- // Background images (both modes).
193
- const bgImages = extractBackgroundImages([...styleBlocks, ...styleAttrs]);
194
- // CSS var palette (both modes — cheap, very useful for clone flows).
195
- const paletteRaw = extractCssVarPalette(styleBlocks);
196
- const palette = Object.keys(paletteRaw).length ? paletteRaw : undefined;
206
+ // Brand hints from stylesheets + inline styles (both modes).
207
+ const hints = brandHints(body, styleBlocks, googleFonts);
197
208
  const base = {
198
209
  title,
199
210
  description,
200
211
  og_image,
201
212
  language,
202
213
  sections,
203
- colors: mergedColors.length ? mergedColors : undefined,
204
- fonts: mergedFonts.length ? mergedFonts : undefined,
205
- palette,
206
- background_images: bgImages.length ? bgImages : undefined,
214
+ colors: hints.colors.length ? hints.colors : undefined,
215
+ fonts: hints.fonts.length ? hints.fonts : undefined,
216
+ palette: hints.palette,
217
+ background_images: hints.background_images.length ? hints.background_images : undefined,
218
+ warnings: warnings.length ? warnings : undefined,
207
219
  };
208
220
  if (detail !== "full")
209
221
  return base;
@@ -891,6 +903,536 @@ function mergeTopNFonts(ranked, n) {
891
903
  }
892
904
  return out;
893
905
  }
906
+ // ─── brand hints (shared by the role path and the canvas path) ───────────────
907
+ function brandHints(body, styleBlocks, googleFonts) {
908
+ const styleAttrs = [];
909
+ body.querySelectorAll("[style]").forEach((el) => {
910
+ const s = el.getAttribute("style");
911
+ if (s)
912
+ styleAttrs.push(s);
913
+ });
914
+ const colors = mergeTopN([...extractStylesheetColors(styleBlocks), ...topColors(styleAttrs, 20)], 5);
915
+ const fonts = mergeTopNFonts([...googleFonts, ...extractStylesheetFonts(styleBlocks), ...topFonts(styleAttrs, 10)], 4);
916
+ const background_images = extractBackgroundImages([...styleBlocks, ...styleAttrs]);
917
+ const paletteRaw = extractCssVarPalette(styleBlocks);
918
+ return {
919
+ colors,
920
+ fonts,
921
+ background_images,
922
+ palette: Object.keys(paletteRaw).length ? paletteRaw : undefined,
923
+ };
924
+ }
925
+ // ─── mojibake repair ─────────────────────────────────────────────────────────
926
+ // UTF-8 bytes mis-decoded as Latin-1 ("TẨY LÔNG" instead of "TẨY LÔNG") —
927
+ // common in saved-to-disk builder exports. Signature pairs of the double
928
+ // decoding; genuine Vietnamese text contains the precomposed letters instead.
929
+ const MOJIBAKE_RE = /Ã[€-ÿ]|á»|áº|ư/g;
930
+ const GENUINE_VI_RE = /[ăđơưạảắằẵặẹẻẽềếểễệịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹ]/g;
931
+ /** Repair mojibake by re-encoding through Latin-1; null when not mojibake (or not safely repairable). */
932
+ function fixMojibake(html) {
933
+ const hits = (html.match(MOJIBAKE_RE) ?? []).length;
934
+ if (hits < 8)
935
+ return null;
936
+ const genuine = (html.match(GENUINE_VI_RE) ?? []).length;
937
+ if (genuine > hits)
938
+ return null; // mixed/legit text — don't touch
939
+ try {
940
+ const decoded = Buffer.from(html, "latin1").toString("utf8");
941
+ // Invalid UTF-8 sequences mean the input wasn't pure Latin-1 mojibake
942
+ // (e.g. cp1252) — repairing would corrupt it, so keep the original.
943
+ if (decoded.includes("�"))
944
+ return null;
945
+ return decoded;
946
+ }
947
+ catch {
948
+ return null;
949
+ }
950
+ }
951
+ const LADI_TYPE_BY_PREFIX = {
952
+ SECTION: "section",
953
+ HEADLINE: "headline",
954
+ PARAGRAPH: "paragraph",
955
+ LIST_PARAGRAPH: "list",
956
+ IMAGE: "image",
957
+ BOX: "box",
958
+ BUTTON: "button",
959
+ BUTTON_TEXT: "button_text",
960
+ FORM: "form",
961
+ FORM_ITEM: "form_item",
962
+ GROUP: "group",
963
+ LINE: "line",
964
+ SHAPE: "shape",
965
+ COUNTDOWN: "countdown",
966
+ COUNTDOWN_ITEM: "countdown_item",
967
+ CAROUSEL: "carousel",
968
+ GALLERY: "gallery",
969
+ SPINLUCKY: "spin_wheel",
970
+ POPUP: "popup",
971
+ HTML_CODE: "html_code",
972
+ NOTIFY: "notify",
973
+ VIDEO: "video",
974
+ TABS: "tabs",
975
+ FRAME: "frame",
976
+ BANNER: "banner",
977
+ SURVEY: "survey",
978
+ COLLECTION: "collection",
979
+ COMBOBOX: "combobox",
980
+ CART: "cart",
981
+ };
982
+ const LADI_TEXT_TYPES = new Set(["headline", "paragraph", "button_text"]);
983
+ const LADI_STYLE_KEYS = new Set([
984
+ "font-family", "font-size", "font-weight", "font-style", "color", "text-align",
985
+ "line-height", "letter-spacing", "text-transform", "text-shadow", "text-decoration-line",
986
+ "background", "background-color", "background-image", "background-size", "background-position",
987
+ "border", "border-style", "border-color", "border-width", "border-radius",
988
+ "border-top", "border-right", "border-bottom", "border-left",
989
+ "opacity", "transform", "box-shadow", "fill",
990
+ ]);
991
+ const LADI_CONFIG_KEY_RE = /countdown_type|countdown_minute|thankyou_value|form_config_id|show_popup_welcome_page|delay_popup_welcome_page|autoplay|max_turn|time_show|time_delay/;
992
+ const MAX_CANVAS_ELEMENTS = 500;
993
+ const CANVAS_SIZE_CAP = 80_000;
994
+ /** Shedding step 1 keeps only these style keys — the look-defining minimum for a rebuild. */
995
+ const CANVAS_CORE_STYLE_KEYS = ["font-family", "font-size", "font-weight", "color", "text-align", "background-color", "border-radius", "fill"];
996
+ const CANVAS_SVG_CAP = 1200;
997
+ const CANVAS_EMBED_CAP = 1200;
998
+ function ladiTypeFromId(id) {
999
+ const m = /^([A-Z][A-Z_]*?)(\d+)$/.exec(id);
1000
+ const prefix = m ? m[1].replace(/_$/, "") : id;
1001
+ return LADI_TYPE_BY_PREFIX[prefix] ?? prefix.toLowerCase();
1002
+ }
1003
+ /** Split a CSS declaration block on semicolons NOT inside parens (data-URI urls contain `;`). */
1004
+ function splitDeclarations(block) {
1005
+ const out = [];
1006
+ let depth = 0;
1007
+ let cur = "";
1008
+ for (const ch of block) {
1009
+ if (ch === "(")
1010
+ depth++;
1011
+ else if (ch === ")")
1012
+ depth = Math.max(0, depth - 1);
1013
+ if (ch === ";" && depth === 0) {
1014
+ out.push(cur);
1015
+ cur = "";
1016
+ }
1017
+ else
1018
+ cur += ch;
1019
+ }
1020
+ if (cur.trim())
1021
+ out.push(cur);
1022
+ return out;
1023
+ }
1024
+ function mergeLadiDecls(map, id, declsRaw) {
1025
+ const rec = map.get(id) ?? {};
1026
+ for (const d of splitDeclarations(declsRaw)) {
1027
+ const i = d.indexOf(":");
1028
+ if (i <= 0)
1029
+ continue;
1030
+ const k = d.slice(0, i).trim().toLowerCase();
1031
+ const v = d.slice(i + 1).trim().replace(/\s*!important\s*$/i, "");
1032
+ if (!k || !v)
1033
+ continue;
1034
+ // data-URI artwork (arrow/list-bullet icons) is noise AND would overwrite a
1035
+ // real background-image URL merged from an earlier rule — never store it.
1036
+ if (v.includes("data:"))
1037
+ continue;
1038
+ rec[k] = v;
1039
+ }
1040
+ map.set(id, rec);
1041
+ }
1042
+ /**
1043
+ * Index the per-id stylesheet rules: `own` = the `#ID { … }` rule (geometry),
1044
+ * `child` = every `#ID <descendant> { … }` rule merged (visual styling).
1045
+ * Pseudo/state variants like `#ID.ladi-animation > …` are skipped on purpose.
1046
+ */
1047
+ function buildLadiRules(styleBlocks) {
1048
+ const own = new Map();
1049
+ const child = new Map();
1050
+ const anim = new Map();
1051
+ const ruleRe = /([^{}]+)\{([^{}]*)\}/g;
1052
+ for (const css of styleBlocks) {
1053
+ ruleRe.lastIndex = 0;
1054
+ let m;
1055
+ while ((m = ruleRe.exec(css)) !== null) {
1056
+ const declsRaw = m[2].trim();
1057
+ if (!declsRaw)
1058
+ continue;
1059
+ for (const selRaw of m[1].split(",")) {
1060
+ const sel = selRaw.trim().replace(/\s*>\s*/g, " ").replace(/\s+/g, " ");
1061
+ if (!sel.startsWith("#"))
1062
+ continue;
1063
+ const animSel = /^#([\w-]+)\.ladi-animation( .+)?$/.exec(sel);
1064
+ if (animSel) {
1065
+ mergeLadiDecls(anim, animSel[1], declsRaw);
1066
+ continue;
1067
+ }
1068
+ const lead = /^#([\w-]+)( .+)?$/.exec(sel);
1069
+ if (!lead)
1070
+ continue;
1071
+ mergeLadiDecls(lead[2] ? child : own, lead[1], declsRaw);
1072
+ }
1073
+ }
1074
+ }
1075
+ return { own, child, anim };
1076
+ }
1077
+ function pxValue(v) {
1078
+ if (!v)
1079
+ return undefined;
1080
+ const m = /^(-?\d+(?:\.\d+)?)px$/.exec(v.trim());
1081
+ return m ? Math.round(parseFloat(m[1])) : undefined;
1082
+ }
1083
+ function urlFromCss(v) {
1084
+ if (!v)
1085
+ return undefined;
1086
+ const m = /url\(\s*["']?([^"')]+)["']?\s*\)/.exec(v);
1087
+ const u = m?.[1]?.trim();
1088
+ return u && /^https?:\/\//i.test(u) ? u : undefined;
1089
+ }
1090
+ /** `…ladicdn.com/s768x703/path.jpg` → `…ladicdn.com/path.jpg` (the full-size original). */
1091
+ function stripCdnSizePrefix(url) {
1092
+ return url.replace(/^(https?:\/\/[^/]*ladicdn\.com)\/s\d+x\d+\//i, "$1/");
1093
+ }
1094
+ function collapseWs(s) {
1095
+ return s.replace(/\s+/g, " ").trim();
1096
+ }
1097
+ function pickCanvasStyle(decls) {
1098
+ if (!decls)
1099
+ return undefined;
1100
+ const out = {};
1101
+ for (const [k, v] of Object.entries(decls)) {
1102
+ if (!LADI_STYLE_KEYS.has(k))
1103
+ continue;
1104
+ if (k === "background-image" && v === "none")
1105
+ continue;
1106
+ out[k] = v.length > 160 ? v.slice(0, 160) : v;
1107
+ }
1108
+ return Object.keys(out).length ? out : undefined;
1109
+ }
1110
+ function sectionBackground(bag) {
1111
+ if (!bag)
1112
+ return undefined;
1113
+ const out = {};
1114
+ for (const [k, v] of Object.entries(bag)) {
1115
+ if (!/^background/.test(k) && k !== "opacity")
1116
+ continue;
1117
+ if (k === "background-image") {
1118
+ const u = urlFromCss(v);
1119
+ if (u)
1120
+ out[k] = stripCdnSizePrefix(u);
1121
+ continue;
1122
+ }
1123
+ if (v === "none")
1124
+ continue;
1125
+ out[k] = v.slice(0, 160);
1126
+ }
1127
+ return Object.keys(out).length ? out : undefined;
1128
+ }
1129
+ function parseLadiEventData(root) {
1130
+ const map = new Map();
1131
+ const script = root.querySelector("#script_event_data");
1132
+ if (!script)
1133
+ return map;
1134
+ let data;
1135
+ try {
1136
+ data = JSON.parse(script.text || script.innerHTML || "");
1137
+ }
1138
+ catch {
1139
+ return map;
1140
+ }
1141
+ for (const [id, entry] of Object.entries(data)) {
1142
+ if (!entry || typeof entry !== "object")
1143
+ continue;
1144
+ const info = {};
1145
+ const de = entry["option.data_event"];
1146
+ if (Array.isArray(de)) {
1147
+ const events = de
1148
+ .map((x) => ({ type: String(x?.type ?? ""), action: String(x?.action ?? "") }))
1149
+ .filter((x) => x.type && x.action)
1150
+ .slice(0, 4);
1151
+ if (events.length)
1152
+ info.events = events;
1153
+ }
1154
+ if (entry["mobile.option.sticky"] === true || entry["option.sticky"] === true) {
1155
+ info.sticky = String(entry["mobile.option.sticky_position"] ?? entry["option.sticky_position"] ?? "bottom_left");
1156
+ }
1157
+ const config = {};
1158
+ for (const [k, v] of Object.entries(entry)) {
1159
+ if (LADI_CONFIG_KEY_RE.test(k))
1160
+ config[k.replace(/^.*option\./, "")] = v;
1161
+ // Spin-wheel prize list ships as base64("label|message|chance") entries — decode it.
1162
+ if (k.endsWith("spinlucky_setting.list_value") && Array.isArray(v)) {
1163
+ const prizes = v
1164
+ .map((x) => {
1165
+ try {
1166
+ const parts = Buffer.from(String(x), "base64").toString("utf8").split("|").map((p) => p.trim());
1167
+ return parts[0] ? { label: parts[0], chance: parts[2] ?? "" } : null;
1168
+ }
1169
+ catch {
1170
+ return null;
1171
+ }
1172
+ })
1173
+ .filter(Boolean);
1174
+ if (prizes.length)
1175
+ config["prizes"] = prizes;
1176
+ }
1177
+ }
1178
+ if (Object.keys(config).length)
1179
+ info.config = config;
1180
+ if (info.events || info.sticky || info.config)
1181
+ map.set(id, info);
1182
+ }
1183
+ return map;
1184
+ }
1185
+ /** Direct + nested `.ladi-element` nodes under `node`, preserving the builder's parent→child tree. */
1186
+ function collectCanvasElements(node, ctx) {
1187
+ const out = [];
1188
+ for (const child of elementChildren(node)) {
1189
+ const tag = child.tagName?.toLowerCase();
1190
+ if (tag === "script" || tag === "style" || tag === "svg")
1191
+ continue;
1192
+ const cls = child.getAttribute("class") ?? "";
1193
+ const id = child.getAttribute("id") ?? "";
1194
+ if (id && /(^|\s)ladi-element(\s|$)/.test(cls)) {
1195
+ if (ctx.count >= MAX_CANVAS_ELEMENTS) {
1196
+ ctx.truncated = true;
1197
+ return out;
1198
+ }
1199
+ ctx.count++;
1200
+ out.push(buildCanvasElement(child, id, ctx));
1201
+ }
1202
+ else {
1203
+ out.push(...collectCanvasElements(child, ctx));
1204
+ }
1205
+ }
1206
+ return out;
1207
+ }
1208
+ function buildCanvasElement(el, id, ctx) {
1209
+ const type = ladiTypeFromId(id);
1210
+ const node = { id, type };
1211
+ const own = ctx.rules.own.get(id);
1212
+ if (own) {
1213
+ const box = {};
1214
+ for (const k of ["top", "left", "width", "height", "bottom", "right"]) {
1215
+ const v = pxValue(own[k]);
1216
+ if (v !== undefined)
1217
+ box[k] = v;
1218
+ }
1219
+ if ((own["position"] ?? "").includes("fixed"))
1220
+ box.fixed = true;
1221
+ if (Object.keys(box).length)
1222
+ node.box = box;
1223
+ }
1224
+ const bag = ctx.rules.child.get(id);
1225
+ if (bag) {
1226
+ const bgUrl = urlFromCss(bag["background-image"]);
1227
+ if (bgUrl && (type === "image" || type === "video" || type === "popup"))
1228
+ node.src = stripCdnSizePrefix(bgUrl);
1229
+ const style = pickCanvasStyle(bag);
1230
+ if (style) {
1231
+ if (node.src)
1232
+ delete style["background-image"];
1233
+ if (Object.keys(style).length)
1234
+ node.style = style;
1235
+ }
1236
+ // Inner image-layer geometry ≠ element box ⇒ an offset/zoom crop.
1237
+ if (type === "image") {
1238
+ const crop = {};
1239
+ for (const k of ["top", "left", "width", "height"]) {
1240
+ const v = pxValue(bag[k]);
1241
+ if (v !== undefined)
1242
+ crop[k] = v;
1243
+ }
1244
+ const offset = (crop.top !== undefined && crop.top !== 0) || (crop.left !== undefined && crop.left !== 0);
1245
+ const zoomed = (crop.width !== undefined && node.box?.width !== undefined && crop.width !== node.box.width) ||
1246
+ (crop.height !== undefined && node.box?.height !== undefined && crop.height !== node.box.height);
1247
+ if (Object.keys(crop).length && (offset || zoomed))
1248
+ node.crop = crop;
1249
+ }
1250
+ }
1251
+ const animBag = ctx.rules.anim.get(id);
1252
+ if (animBag) {
1253
+ const animation = {};
1254
+ for (const k of ["animation-name", "animation-duration", "animation-delay", "animation-iteration-count"]) {
1255
+ if (animBag[k])
1256
+ animation[k.replace("animation-", "")] = animBag[k];
1257
+ }
1258
+ if (animation["name"])
1259
+ node.animation = animation;
1260
+ }
1261
+ if (LADI_TEXT_TYPES.has(type)) {
1262
+ const t = collapseWs(el.textContent ?? "");
1263
+ if (t)
1264
+ node.text = t.slice(0, 300);
1265
+ }
1266
+ else if (type === "list") {
1267
+ const items = el
1268
+ .querySelectorAll("li")
1269
+ .map((li) => collapseWs(li.text))
1270
+ .filter(Boolean)
1271
+ .slice(0, 15);
1272
+ if (items.length)
1273
+ node.text = items.join("\n");
1274
+ }
1275
+ else if (type === "shape") {
1276
+ const svg = el.querySelector("svg");
1277
+ if (svg) {
1278
+ const markup = svg.toString().replace(/\s{2,}/g, " ").trim();
1279
+ if (markup.length <= CANVAS_SVG_CAP)
1280
+ node.svg = markup;
1281
+ }
1282
+ }
1283
+ else if (type === "html_code" || type === "notify") {
1284
+ const inner = collapseWs(el.innerHTML ?? "");
1285
+ if (inner)
1286
+ node.html = inner.slice(0, CANVAS_EMBED_CAP);
1287
+ }
1288
+ else if (type === "form_item") {
1289
+ const inp = el.querySelector("input, textarea, select");
1290
+ if (inp) {
1291
+ const tag = inp.tagName?.toLowerCase();
1292
+ node.input = {
1293
+ name: inp.getAttribute("name") || undefined,
1294
+ placeholder: inp.getAttribute("placeholder") || undefined,
1295
+ input_type: tag === "input" ? inp.getAttribute("type") ?? "text" : tag,
1296
+ required: inp.hasAttribute("required") || undefined,
1297
+ pattern: inp.getAttribute("pattern") || undefined,
1298
+ };
1299
+ }
1300
+ }
1301
+ if (el.tagName?.toLowerCase() === "a") {
1302
+ const href = el.getAttribute("href");
1303
+ if (href)
1304
+ node.href = href;
1305
+ }
1306
+ const evt = ctx.events.get(id);
1307
+ if (evt)
1308
+ Object.assign(node, evt);
1309
+ const children = collectCanvasElements(el, ctx);
1310
+ if (children.length)
1311
+ node.children = children;
1312
+ return node;
1313
+ }
1314
+ function parseAbsoluteCanvas(html, root, styleBlocks, only) {
1315
+ const sectionEls = root.querySelectorAll(".ladi-section");
1316
+ if (!sectionEls.length)
1317
+ return null;
1318
+ const rules = buildLadiRules(styleBlocks);
1319
+ if (!rules.own.size)
1320
+ return null; // ladi-ish classes but no per-id geometry — let the role path handle it
1321
+ const wrapWidth = /\.ladi-wraper\s*\{[^}]*width:\s*(\d+)px/.exec(styleBlocks.join("\n"));
1322
+ const width = wrapWidth ? parseInt(wrapWidth[1], 10) : 960;
1323
+ const mobileOnly = /is_mobile_only\s*=\s*true/.test(html) || width <= 480;
1324
+ const ctx = { rules, events: parseLadiEventData(root), count: 0, truncated: false };
1325
+ const sections = [];
1326
+ const popups = [];
1327
+ for (const secEl of sectionEls) {
1328
+ const id = secEl.getAttribute("id") ?? `SECTION_${sections.length + 1}`;
1329
+ if (only?.length && !only.includes(id))
1330
+ continue; // section filter: full-detail re-fetch
1331
+ const elements = collectCanvasElements(secEl, ctx);
1332
+ if (id === "SECTION_POPUP") {
1333
+ popups.push(...elements.filter((e) => e.type === "popup"));
1334
+ continue;
1335
+ }
1336
+ const sec = { id, elements };
1337
+ const h = pxValue((rules.own.get(id) ?? {})["height"]);
1338
+ if (h !== undefined)
1339
+ sec.height = h;
1340
+ const bg = sectionBackground(rules.child.get(id));
1341
+ if (bg)
1342
+ sec.background = bg;
1343
+ sections.push(sec);
1344
+ }
1345
+ if (!sections.length && !popups.length)
1346
+ return null;
1347
+ const canvas = {
1348
+ builder: "ladi",
1349
+ width,
1350
+ ...(mobileOnly ? { mobile_only: true } : {}),
1351
+ sections,
1352
+ ...(popups.length ? { popups } : {}),
1353
+ element_count: ctx.count,
1354
+ ...(ctx.truncated ? { truncated: true } : {}),
1355
+ };
1356
+ shedCanvas(canvas);
1357
+ return canvas;
1358
+ }
1359
+ /** Keep the canvas payload under the size cap: prune styles to the core keys → svg/embeds → all styles → long text. */
1360
+ function shedCanvas(canvas) {
1361
+ const walk = (els, fn) => {
1362
+ for (const e of els) {
1363
+ fn(e);
1364
+ if (e.children)
1365
+ walk(e.children, fn);
1366
+ }
1367
+ };
1368
+ const all = (fn) => {
1369
+ for (const s of canvas.sections)
1370
+ walk(s.elements, fn);
1371
+ if (canvas.popups)
1372
+ walk(canvas.popups, fn);
1373
+ };
1374
+ if (JSON.stringify(canvas).length <= CANVAS_SIZE_CAP)
1375
+ return;
1376
+ all((e) => {
1377
+ if (!e.style)
1378
+ return;
1379
+ const pruned = {};
1380
+ for (const k of CANVAS_CORE_STYLE_KEYS)
1381
+ if (e.style[k] !== undefined)
1382
+ pruned[k] = e.style[k];
1383
+ if (Object.keys(pruned).length)
1384
+ e.style = pruned;
1385
+ else
1386
+ delete e.style;
1387
+ });
1388
+ if (JSON.stringify(canvas).length > CANVAS_SIZE_CAP) {
1389
+ all((e) => {
1390
+ delete e.svg;
1391
+ delete e.html;
1392
+ });
1393
+ }
1394
+ if (JSON.stringify(canvas).length > CANVAS_SIZE_CAP) {
1395
+ all((e) => delete e.style);
1396
+ }
1397
+ if (JSON.stringify(canvas).length > CANVAS_SIZE_CAP) {
1398
+ all((e) => {
1399
+ if (e.text && e.text.length > 80)
1400
+ e.text = e.text.slice(0, 80);
1401
+ });
1402
+ }
1403
+ canvas.truncated = true;
1404
+ canvas.hint =
1405
+ "payload exceeded the size cap, so per-element styles were pruned/dropped — re-call the ingest tool with sections:[<id>] (one or a few ids from sections[].id; 'SECTION_POPUP' selects the popups) to get those sections in full untrimmed detail.";
1406
+ }
1407
+ /** Minimal role-section view of the canvas so existing consumers keep working. */
1408
+ function canvasRoleSections(canvas) {
1409
+ return canvas.sections.map((s) => {
1410
+ const headings = [];
1411
+ const imgs = [];
1412
+ let hasForm = false;
1413
+ const walk = (els) => {
1414
+ for (const e of els) {
1415
+ if (e.type === "headline" && e.text)
1416
+ headings.push(e.text);
1417
+ if (e.src)
1418
+ imgs.push(e.src);
1419
+ if (e.type === "form")
1420
+ hasForm = true;
1421
+ if (e.children)
1422
+ walk(e.children);
1423
+ }
1424
+ };
1425
+ walk(s.elements);
1426
+ const sec = { role: hasForm ? "form" : "unknown" };
1427
+ if (headings.length)
1428
+ sec.heading = headings[0].slice(0, 240);
1429
+ if (imgs.length)
1430
+ sec.images = imgs.slice(0, 12);
1431
+ if (s.height)
1432
+ sec.size_hint = { height: s.height, basis: "css", css: `${s.height}px` };
1433
+ return sec;
1434
+ });
1435
+ }
894
1436
  export async function fetchHtml(url, opts = {}) {
895
1437
  if (!/^https?:\/\//i.test(url)) {
896
1438
  return { ok: false, error: "URL must start with http:// or https://" };
package/dist/smoke.js CHANGED
@@ -380,6 +380,114 @@ check("widgets: scripts stripped from html", !(w0?.html ?? "").includes("<script
380
380
  check("widgets: matching css rules attached", !!w0?.css?.includes(".phone-mockup") && !!w0?.css?.includes(".chat-bubble"), w0?.css);
381
381
  check("widgets: none on plain sections", widgetAst.sections[1]?.widgets === undefined, widgetAst.sections[1]);
382
382
  check("widgets: compact mode emits none", parseHtml(widgetHtml, "compact").sections.every((s) => s.widgets === undefined));
383
+ console.log("== ingest: absolute-canvas (LadiPage-family) mode ==");
384
+ // Synthetic fixture modeled on a real LadiPage export: bare positioned divs,
385
+ // per-id CSS geometry, lazyload style, data-URI arrow rule AFTER the real
386
+ // background (must not clobber it), event-data JSON, popup band, fixed CTA.
387
+ const ladiHtml = `<!DOCTYPE html><html><head><title>Ladi Test</title>
388
+ <style id="style_page">.ladi-wraper { margin: 0 auto; width: 420px; }</style>
389
+ <style id="style_element">
390
+ #SECTION1 { height: 700.4px; }
391
+ #SECTION1 > .ladi-section-background { background-size: cover; background-image: url("https://w.ladicdn.com/s768x703/abc/bg.jpg"); background-position: center top; }
392
+ #SECTION1 .ladi-section-arrow-down { background-image: url("data:image/svg+xml;utf8,%3Csvg%3B%3C/svg%3E"); }
393
+ #HEADLINE10 { width: 296px; top: 117.6px; left: 9px; }
394
+ #HEADLINE10 > .ladi-headline { color: rgb(37, 22, 199); font-size: 22px; font-weight: bold; text-align: center; line-height: 1.2; }
395
+ #IMAGE20 { width: 154.6px; height: 117.9px; top: 168px; left: 50px; }
396
+ #IMAGE20 > .ladi-image > .ladi-image-background { width: 188px; height: 125px; top: -27px; left: -24px; background-image: url("https://w.ladicdn.com/s500x450/abc/photo.png"); }
397
+ #BOX95 { width: 60px; height: 60px; top: 200px; left: 5px; }
398
+ #BOX95 > .ladi-box { border-style: solid; border-color: rgb(232, 58, 48); border-radius: 999px; }
399
+ #BOX95.ladi-animation > .ladi-box { animation-name: pulse; -webkit-animation-name: pulse; animation-delay: 1s; animation-duration: 1s; animation-iteration-count: infinite; }
400
+ #SPINLUCKY100 { width: 276px; height: 276px; top: 122px; left: 72px; }
401
+ #BUTTON30 { width: 240px; height: 40px; top: auto; left: 10px; bottom: 10px; position: fixed; z-index: 90000050; }
402
+ #BUTTON30 > .ladi-button > .ladi-button-background { background-color: rgb(232, 58, 48); }
403
+ #BUTTON_TEXT30 { width: 241px; top: 9px; left: 0px; }
404
+ #GROUP40 { width: 225px; height: 76px; top: 374px; left: 68px; }
405
+ #SHAPE50 { width: 20px; height: 20px; top: 5px; left: 5px; }
406
+ #SHAPE50 svg:last-child { fill: rgba(255, 188, 1, 1.0); }
407
+ #SECTION2 { height: 454px; }
408
+ #FORM60 { width: 299px; height: 261px; top: 80px; left: 32px; }
409
+ #FORM_ITEM61 { width: 299px; height: 43px; top: 0px; left: 0px; }
410
+ #LIST_PARAGRAPH80 { width: 379px; top: 69px; left: 21px; }
411
+ #COUNTDOWN90 { width: 225px; height: 51px; top: 10px; left: 0px; }
412
+ #POPUP70 { width: 420px; height: 516px; top: 0px; left: 0px; }
413
+ #HEADLINE71 { width: 266px; top: 5px; left: 77px; }
414
+ </style>
415
+ <style id="style_lazyload">.ladi-section-background, .ladi-image-background { background-image: none !important; }</style>
416
+ </head><body><div class="ladi-wraper">
417
+ <div id="SECTION1" class="ladi-section"><div class="ladi-section-background"></div><div class="ladi-container">
418
+ <div id="HEADLINE10" class="ladi-element"><h3 class="ladi-headline">SẠCH TRƠN LÔNG SÁNG MỊN</h3></div>
419
+ <div id="IMAGE20" class="ladi-element"><div class="ladi-image"><div class="ladi-image-background"></div></div></div>
420
+ <div data-action="true" id="BUTTON30" class="ladi-element"><div class="ladi-button"><div class="ladi-button-background"></div><div id="BUTTON_TEXT30" class="ladi-element"><p class="ladi-headline">NHẬN ƯU ĐÃI NGAY</p></div></div></div>
421
+ <div id="GROUP40" class="ladi-element"><div class="ladi-group">
422
+ <div id="SHAPE50" class="ladi-element"><div class="ladi-shape"><svg viewBox="0 0 24 24" fill="rgba(255,188,1,1)"><path d="M0 0h24v24z"></path></svg></div></div>
423
+ </div></div>
424
+ <div id="BOX95" class="ladi-element ladi-animation"><div class="ladi-box"></div></div>
425
+ </div></div>
426
+ <div id="SECTION2" class="ladi-section"><div class="ladi-container">
427
+ <div id="FORM60" class="ladi-element"><form method="post" class="ladi-form">
428
+ <div id="FORM_ITEM61" class="ladi-element"><div class="ladi-form-item-container"><div class="ladi-form-item"><input name="phone" required type="tel" placeholder="Số điện thoại"></div></div></div>
429
+ </form></div>
430
+ <div id="LIST_PARAGRAPH80" class="ladi-element"><div class="ladi-list-paragraph"><ul><li>Thành phần thiên nhiên</li><li>Không đau rát</li></ul></div></div>
431
+ <div id="COUNTDOWN90" class="ladi-element"><div class="ladi-countdown"><span>00</span></div></div>
432
+ </div></div>
433
+ <div id="SECTION_POPUP" class="ladi-section"><div class="ladi-container">
434
+ <div id="POPUP70" class="ladi-element"><div class="ladi-popup"><div class="ladi-popup-background"></div>
435
+ <div id="HEADLINE71" class="ladi-element"><h3 class="ladi-headline">VÒNG QUAY MAY MẮN</h3></div>
436
+ <div id="SPINLUCKY100" class="ladi-element"><div class="ladi-spin-lucky"><div class="ladi-spin-lucky-screen"></div></div></div>
437
+ </div></div>
438
+ </div></div>
439
+ </div>
440
+ <script id="script_event_data" type="application/json">{"BUTTON30":{"type":"button","option.data_event":[{"type":"popup","action":"POPUP70","action_type":"action"}],"mobile.option.sticky":true,"mobile.option.sticky_position":"bottom_left"},"POPUP70":{"type":"popup","option.show_popup_welcome_page":true,"option.delay_popup_welcome_page":6},"COUNTDOWN90":{"type":"countdown","option.countdown_type":"countdown","option.countdown_minute":360},"SPINLUCKY100":{"type":"spinlucky","option.spinlucky_setting.list_value":["${Buffer.from("Mất lượt|Mất lượt|0%", "utf8").toString("base64")}","${Buffer.from("FreeShip|FreeShip|100%", "utf8").toString("base64")}"],"option.spinlucky_setting.max_turn":1}}</script>
441
+ <script>window.LadiPageScript.runtime.is_mobile_only = true;</script>
442
+ </body></html>`;
443
+ const ladi = parseHtml(ladiHtml);
444
+ check("ladi: canvas payload detected", !!ladi.canvas, Object.keys(ladi));
445
+ const cv = ladi.canvas;
446
+ check("ladi: builder/width/mobile_only", cv.builder === "ladi" && cv.width === 420 && cv.mobile_only === true, cv);
447
+ check("ladi: 2 page sections (popup band separated)", cv.sections.length === 2, cv.sections.map((s) => s.id));
448
+ check("ladi: section height from per-id css", cv.sections[0].height === 700, cv.sections[0].height);
449
+ check("ladi: section bg survives data-URI arrow rule + size prefix stripped", cv.sections[0].background?.["background-image"] === "https://w.ladicdn.com/abc/bg.jpg", cv.sections[0].background);
450
+ const ladiEls = cv.sections[0].elements;
451
+ const ladiH = ladiEls.find((e) => e.id === "HEADLINE10");
452
+ check("ladi: headline box geometry (px, rounded)", ladiH?.box?.width === 296 && ladiH?.box?.top === 118 && ladiH?.box?.left === 9, ladiH?.box);
453
+ check("ladi: headline text + typography style", ladiH?.text === "SẠCH TRƠN LÔNG SÁNG MỊN" && ladiH?.style?.["font-size"] === "22px", ladiH);
454
+ const ladiImg = ladiEls.find((e) => e.id === "IMAGE20");
455
+ check("ladi: image src from bg rule, full-size original", ladiImg?.src === "https://w.ladicdn.com/abc/photo.png", ladiImg);
456
+ const ladiBtn = ladiEls.find((e) => e.id === "BUTTON30");
457
+ check("ladi: fixed button → box.fixed + sticky position", ladiBtn?.box?.fixed === true && ladiBtn?.sticky === "bottom_left", ladiBtn);
458
+ check("ladi: button event → open popup", ladiBtn?.events?.[0]?.type === "popup" && ladiBtn?.events?.[0]?.action === "POPUP70", ladiBtn?.events);
459
+ check("ladi: button_text nested as child", ladiBtn?.children?.[0]?.id === "BUTTON_TEXT30" && ladiBtn?.children?.[0]?.text === "NHẬN ƯU ĐÃI NGAY", ladiBtn?.children);
460
+ const ladiGrp = ladiEls.find((e) => e.id === "GROUP40");
461
+ check("ladi: group nests shape with svg + fill", ladiGrp?.children?.[0]?.type === "shape" && !!ladiGrp?.children?.[0]?.svg && ladiGrp?.children?.[0]?.style?.fill === "rgba(255, 188, 1, 1.0)", ladiGrp?.children);
462
+ const ladiForm = cv.sections[1].elements.find((e) => e.id === "FORM60");
463
+ check("ladi: form_item input facts captured", ladiForm?.children?.[0]?.input?.name === "phone" && ladiForm?.children?.[0]?.input?.input_type === "tel" && ladiForm?.children?.[0]?.input?.required === true, ladiForm?.children?.[0]);
464
+ const ladiList = cv.sections[1].elements.find((e) => e.id === "LIST_PARAGRAPH80");
465
+ check("ladi: list items joined as text", ladiList?.type === "list" && ladiList?.text === "Thành phần thiên nhiên\nKhông đau rát", ladiList);
466
+ const ladiCd = cv.sections[1].elements.find((e) => e.id === "COUNTDOWN90");
467
+ check("ladi: countdown config from event data", ladiCd?.config?.["countdown_minute"] === 360, ladiCd);
468
+ check("ladi: popup separated top-level w/ config", cv.popups?.[0]?.id === "POPUP70" && cv.popups?.[0]?.config?.["delay_popup_welcome_page"] === 6, cv.popups);
469
+ check("ladi: popup keeps its own children", cv.popups?.[0]?.children?.[0]?.text === "VÒNG QUAY MAY MẮN", cv.popups?.[0]?.children);
470
+ check("ladi: role sections still emitted with css size_hint", ladi.sections.length === 2 && ladi.sections[0].size_hint?.height === 700 && ladi.sections[0].size_hint?.basis === "css", ladi.sections);
471
+ check("ladi: form role propagated to role section", ladi.sections[1].role === "form", ladi.sections.map((s) => s.role));
472
+ check("ladi: classic html gets no canvas", parseHtml(sampleHtml).canvas === undefined);
473
+ check("ladi: image crop (offset/zoom inner layer) captured", ladiImg?.crop?.width === 188 && ladiImg?.crop?.top === -27 && ladiImg?.crop?.left === -24, ladiImg?.crop);
474
+ const ladiBox = ladiEls.find((e) => e.id === "BOX95");
475
+ check("ladi: animation captured from .ladi-animation rule", ladiBox?.animation?.["name"] === "pulse" && ladiBox?.animation?.["iteration-count"] === "infinite", ladiBox?.animation);
476
+ check("ladi: animation rule does not pollute base style", ladiBox?.style?.["border-radius"] === "999px" && ladiBox?.style?.["animation-name"] === undefined, ladiBox?.style);
477
+ const ladiSpin = cv.popups?.[0]?.children?.find((e) => e.id === "SPINLUCKY100");
478
+ check("ladi: spin-wheel prizes decoded from base64", ladiSpin?.config?.["prizes"]?.[0]?.label === "Mất lượt" && ladiSpin?.config?.["prizes"]?.[1]?.chance === "100%", ladiSpin?.config);
479
+ check("ladi: spin-wheel max_turn kept", ladiSpin?.config?.["spinlucky_setting.max_turn"] === 1, ladiSpin?.config);
480
+ const onlyS2 = parseHtml(ladiHtml, "compact", { sections: ["SECTION2"] }).canvas;
481
+ check("ladi: sections filter → only SECTION2, no popups", onlyS2?.sections.length === 1 && onlyS2?.sections[0].id === "SECTION2" && onlyS2?.popups === undefined, onlyS2?.sections.map((s) => s.id));
482
+ const onlyPopup = parseHtml(ladiHtml, "compact", { sections: ["SECTION_POPUP"] }).canvas;
483
+ check("ladi: sections filter → SECTION_POPUP selects popups only", onlyPopup?.sections.length === 0 && onlyPopup?.popups?.[0]?.id === "POPUP70", { sections: onlyPopup?.sections.length, popups: onlyPopup?.popups?.map((p) => p.id) });
484
+ console.log("== ingest: mojibake repair (UTF-8 mis-read as Latin-1) ==");
485
+ const vietText = "TẨY LÔNG – Kem tẩy lông Huyền Phi sạch trơn sáng mịn an toàn hiệu quả nhanh chóng";
486
+ const garbled = Buffer.from(vietText, "utf8").toString("latin1");
487
+ const mojibakeAst = parseHtml(`<!DOCTYPE html><html><head><title>${garbled}</title></head><body><section><h1>${garbled}</h1><p>${garbled}. ${garbled}.</p></section><section><h2>${garbled}</h2><p>${garbled}</p></section></body></html>`);
488
+ check("mojibake: title repaired", mojibakeAst.title === vietText, mojibakeAst.title);
489
+ check("mojibake: repair warning emitted", (mojibakeAst.warnings ?? []).some((w) => w.includes("encoding repaired")), mojibakeAst.warnings);
490
+ check("mojibake: genuine Vietnamese untouched", parseHtml(`<html><head><title>${vietText}</title></head><body><p>${vietText} ${vietText} ${vietText}</p></body></html>`).title === vietText);
383
491
  console.log("== ingest: nested-grid block detection (depth > 1) ==");
384
492
  // section > .grid-wrapper > .card — blocks must be found even though cards are not direct children
385
493
  const nestedGridHtml = `<!DOCTYPE html><html lang="en"><head><title>T</title></head><body>
@@ -688,6 +796,27 @@ console.log("== draft-cache: page draft round-trip (create_page failure flow) ==
688
796
  deleteDraft(draftId);
689
797
  check("page draft: deleteDraft removes entry", getDraft(draftId) === null, getDraft(draftId));
690
798
  }
799
+ console.log("== draft-cache: sliding TTL (every touch refreshes the clock) ==");
800
+ {
801
+ const id = putDraft({ source: { page: [] } });
802
+ const entry = getDraft(id);
803
+ // Backdate the entry (getDraft returns the live object), then touch it again:
804
+ // the read must refresh `created` to ~now so an active workflow never expires.
805
+ entry.created = Date.now() - 10_000;
806
+ const touched = getDraft(id);
807
+ check("sliding TTL: getDraft refreshes created", touched != null && Date.now() - touched.created < 2_000, touched && Date.now() - touched.created);
808
+ // updateDraft refreshes too.
809
+ touched.created = Date.now() - 10_000;
810
+ updateDraft(id, { page: [] });
811
+ check("sliding TTL: updateDraft refreshes created", Date.now() - getDraft(id).created < 2_000);
812
+ deleteDraft(id);
813
+ // An UNTOUCHED draft must still expire (default TTL 2h; skip when overridden).
814
+ if (!process.env.WEBCAKE_DRAFT_TTL_MS) {
815
+ const stale = putDraft({ source: { page: [] } });
816
+ getDraft(stale).created = Date.now() - 3 * 60 * 60 * 1000;
817
+ check("sliding TTL: untouched draft still expires", getDraft(stale) === null);
818
+ }
819
+ }
691
820
  console.log("== draft-cache: sections draft round-trip (add_section dry_run / failure flow) ==");
692
821
  {
693
822
  // Build a minimal expandedShell as add_section would — just the sections array.
@@ -21,27 +21,33 @@
21
21
  import { z } from "zod";
22
22
  import { text } from "../mcp/response.js";
23
23
  import { parseHtml, fetchHtml } from "../persistence/html-ingest.js";
24
+ const sectionsParam = z
25
+ .array(z.string())
26
+ .optional()
27
+ .describe("Absolute-canvas mode only: return ONLY these canvas section ids (use the ids from a previous call's canvas.sections[].id; 'SECTION_POPUP' selects the popups). When a full-page call comes back canvas.truncated:true (styles pruned to fit the size cap), re-call per section with this filter to get each section's elements in FULL untrimmed detail — pairs naturally with building the page incrementally via add_section.");
24
28
  const detailParam = z
25
29
  .enum(["compact", "full"])
26
30
  .optional()
27
31
  .describe("Level of detail in the returned AST. 'compact' (default) — backward-compatible ~2-5 KB shape with top colors/fonts from inline styles. 'full' — richer AST: CSS custom-property palette (design tokens by name), background_images from stylesheets, gradients, per-section blocks (repeating card/tile/step structures with title/body/image/cta), li lists, extended paragraphs, images as { src, alt } objects, and per-section widgets = { hint, html, css? } — the cleaned source HTML + matching CSS of composite visuals (phone/device mockup, chat thread, dashboard, browser frame) to rebuild VERBATIM as ONE html-box (inline the css; don't re-imagine the markup). Use 'full' for clone-faithful rebuilds. Image URLs found in the result (images, background_images, og_image) are the user's assets: re-host them via upload_images and reuse them in the generated page for BOTH intents (never hotlink, never replace them with search_images stock photos).");
28
32
  export function registerIngestTools(server) {
29
- server.tool("ingest_html", "Parses an HTML string into a reference AST: title, description, og_image, language, and sections classified by role (header, hero, features, about, form, cta, gallery, testimonials, pricing, faq, footer, unknown) with headings, subheadings, paragraphs, images, ctas, links, form fields, and a size_hint (desktop section height in px — from the source CSS when explicit, else a content-volume estimate; set the rebuilt section's desktop height from it) — plus top colors, fonts, CSS custom-property palette, and background_images pulled from both inline styles and <style> blocks. Returns ~2-5KB (compact) or up to ~25KB (full). Use detail:'full' for clone-faithful rebuilds — it adds per-section blocks (cards/tiles/steps), li lists, gradients, images as { src, alt } objects, and widgets (the source HTML + CSS of composite mockups, to paste into ONE html-box). Image URLs in the result (images, background_images, og_image) are the user's assets — re-host them via upload_images and reuse them for BOTH intents; use search_images only for slots with no source image.", {
33
+ server.tool("ingest_html", "Parses an HTML string into a reference AST: title, description, og_image, language, and sections classified by role (header, hero, features, about, form, cta, gallery, testimonials, pricing, faq, footer, unknown) with headings, subheadings, paragraphs, images, ctas, links, form fields, and a size_hint (desktop section height in px — from the source CSS when explicit, else a content-volume estimate; set the rebuilt section's desktop height from it) — plus top colors, fonts, CSS custom-property palette, and background_images pulled from both inline styles and <style> blocks. Returns ~2-5KB (compact) or up to ~25KB (full). Use detail:'full' for clone-faithful rebuilds — it adds per-section blocks (cards/tiles/steps), li lists, gradients, images as { src, alt } objects, and widgets (the source HTML + CSS of composite mockups, to paste into ONE html-box). ABSOLUTE-CANVAS builder exports (LadiPage-family pages / Webcake-published HTML — bare positioned divs whose layout lives in per-id CSS rules) are AUTO-DETECTED and additionally return `canvas`: { builder, width (420 mobile / 960 desktop — same widths as the Webcake canvas, so geometry transfers 1:1), mobile_only, sections: [{ id, height, background, elements }], popups, element_count } where each element carries { type (headline/paragraph/image/box/button/form_item/group/line/shape/countdown/carousel/spin_wheel/popup…), box (px top/left/width/height; fixed:true = floating/pinned), text, src (full-size original — CDN size prefix stripped), crop (inner image window when offset/zoomed — emulate via background-position/size), style, animation ({name,duration,delay,iteration-count} from the builder's entrance/attention effects), input, events ({type:'popup'|'section'|'link'|'phone', action:target}), sticky, config (spin-wheel prizes decoded to [{label,chance}], countdown minutes, popup delay…), children }. When `canvas` is present, rebuild from IT element-by-element using the boxes as-is (ignore the coarse role sections) and keep popups in the top-level popup array. If canvas.truncated:true, follow canvas.hint: re-call with sections:[id] per section for full untrimmed detail. Garbled Vietnamese mojibake (UTF-8 mis-read as Latin-1) is auto-repaired with a warning. Image URLs in the result (images, background_images, og_image, canvas src/background) are the user's assets — re-host them via upload_images and reuse them for BOTH intents; use search_images only for slots with no source image.", {
30
34
  html: z.string().describe("Raw HTML of a page or a section."),
31
35
  intent: z
32
36
  .enum(["adapt", "clone"])
33
37
  .optional()
34
38
  .describe("How the caller intends to use the result. 'adapt' (default) — use as a layout reference and rewrite the TEXT for the user's brand (images from the reference are still re-hosted via upload_images and reused). 'clone' — keep text and images close to the original."),
35
39
  detail: detailParam,
36
- }, { title: "Ingest HTML Reference", readOnlyHint: true, openWorldHint: false }, async ({ html, intent, detail }) => text({ intent: intent ?? "adapt", ...parseHtml(html, detail ?? "compact") }));
37
- server.tool("ingest_url", "Fetches a public webpage (GET, 10s timeout, 2MB cap) and parses it into the same reference AST as ingest_html (including per-section size_hint desktop heights). Returns a warning when the page appears client-rendered (empty <body>) so the caller can fall back to a screenshot — Claude can analyze a screenshot natively without this tool. Does not execute JavaScript; sites built with React/Vue/Next.js may return little content. Use detail:'full' for clone-faithful rebuilds — adds CSS palette, background_images, per-section blocks, lists, images as { src, alt } objects, and widgets (source HTML + CSS of composite mockups for html-box rebuilds). Image URLs in the result are the user's assets — re-host them via upload_images and reuse them for BOTH intents; use search_images only for slots with no source image.", {
40
+ sections: sectionsParam,
41
+ }, { title: "Ingest HTML Reference", readOnlyHint: true, openWorldHint: false }, async ({ html, intent, detail, sections }) => text({ intent: intent ?? "adapt", ...parseHtml(html, detail ?? "compact", { sections }) }));
42
+ server.tool("ingest_url", "Fetches a public webpage (GET, 10s timeout, 2MB cap) and parses it into the same reference AST as ingest_html (including per-section size_hint desktop heights). Returns a warning when the page appears client-rendered (empty <body>) so the caller can fall back to a screenshot — Claude can analyze a screenshot natively without this tool. Does not execute JavaScript; sites built with React/Vue/Next.js may return little content. Use detail:'full' for clone-faithful rebuilds — adds CSS palette, background_images, per-section blocks, lists, images as { src, alt } objects, and widgets (source HTML + CSS of composite mockups for html-box rebuilds). Absolute-canvas builder exports (LadiPage-family / Webcake-published pages) are auto-detected the same way as ingest_html and return the extra `canvas` payload — per-element px geometry on the matching 420/960 canvas; when present, rebuild from it 1:1 and keep popups top-level. Image URLs in the result are the user's assets — re-host them via upload_images and reuse them for BOTH intents; use search_images only for slots with no source image.", {
38
43
  url: z.string().describe("Public HTTP(S) URL of the page to fetch."),
39
44
  intent: z
40
45
  .enum(["adapt", "clone"])
41
46
  .optional()
42
47
  .describe("How the caller intends to use the result. 'adapt' (default) — use as a layout reference and rewrite the TEXT for the user's brand (images from the reference are still re-hosted via upload_images and reused). 'clone' — keep text and images close to the original."),
43
48
  detail: detailParam,
44
- }, { title: "Ingest URL Reference", readOnlyHint: true, openWorldHint: true }, async ({ url, intent, detail }) => {
49
+ sections: sectionsParam,
50
+ }, { title: "Ingest URL Reference", readOnlyHint: true, openWorldHint: true }, async ({ url, intent, detail, sections }) => {
45
51
  const fetched = await fetchHtml(url);
46
52
  if (!fetched.ok) {
47
53
  return text({
@@ -57,7 +63,7 @@ export function registerIngestTools(server) {
57
63
  url,
58
64
  status: fetched.status,
59
65
  intent: intent ?? "adapt",
60
- ...parseHtml(fetched.html, detail ?? "compact"),
66
+ ...parseHtml(fetched.html, detail ?? "compact", { sections }),
61
67
  });
62
68
  });
63
69
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webcake-landing-mcp",
3
- "version": "1.0.67",
3
+ "version": "1.0.68",
4
4
  "description": "MCP server exposing Webcake landing-page element schemas + AI usage hints, and persisting LLM-generated page sources to a Webcake backend.",
5
5
  "mcpName": "io.github.vuluu2k/webcake-landing-mcp",
6
6
  "type": "module",