sitezen-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,692 @@
1
+ /**
2
+ * Output normalizer — runs AFTER Claude returns HTML, BEFORE we push to WordPress.
3
+ *
4
+ * Ported verbatim from src/lib/normalize.ts (the platform engine) so MCP-driven
5
+ * conversions get the same baseline quality the platform delivered. Same
6
+ * processing, same intent, same rules — just running inside the MCP now
7
+ * instead of inside the Vercel function.
8
+ *
9
+ * Even with a comprehensive system prompt, the LLM occasionally:
10
+ * • Uses Swiper.js / Bootstrap class names instead of our .sz-* conventions
11
+ * • Writes inline <script> blocks (WP entity-encoding breaks them)
12
+ * • Forgets .sz-prev/.sz-next on slider arrows
13
+ * • Embeds <img> placeholders instead of real <iframe>s for videos
14
+ * • Uses .active on tab panels instead of [hidden]
15
+ *
16
+ * The enforce* functions take Figma values as parameters and APPLY them
17
+ * onto Claude's HTML — they NEVER invent values. The point is to use the
18
+ * REAL Figma fontSize / color / fontWeight / bg, overriding any drift in
19
+ * Claude's output. Exactly how the platform did it.
20
+ */
21
+ import * as cheerio from "cheerio";
22
+ export function normalizeHtml(html) {
23
+ if (!html || typeof html !== 'string')
24
+ return html;
25
+ let out = html;
26
+ // ── 0. STRIP MARKDOWN CODE FENCES ──────────────────────────────────
27
+ // v4.8 — defensive cleanup for when Claude wraps its output in ```html
28
+ // ... ``` despite the system prompt's instruction to emit raw HTML
29
+ // only. The fences leak through and render as plain text on the WP
30
+ // page (user saw literal "```html" at the top of the rendered section).
31
+ // We strip:
32
+ // • leading whitespace + ```html / ```HTML / ``` (any language tag)
33
+ // • trailing whitespace + ```
34
+ // • a single line containing just "```html" or "```" anywhere
35
+ out = out.trim();
36
+ out = out.replace(/^```[a-zA-Z]*\s*\n?/, ''); // leading ```html or ```
37
+ out = out.replace(/\n?\s*```\s*$/, ''); // trailing ```
38
+ // Also handle stray fences in the middle (rare but seen)
39
+ out = out.replace(/^\s*```[a-zA-Z]*\s*$/gm, ''); // standalone ```html line
40
+ out = out.replace(/^\s*```\s*$/gm, ''); // standalone ``` line
41
+ out = out.trim();
42
+ // ── 1. SLIDER class normalisation ──────────────────────────────────
43
+ // Swiper.js variants → SiteZen conventions
44
+ out = out.replace(/\bswiper-slide\b/g, 'sz-slide');
45
+ out = out.replace(/\bswiper-wrapper\b/g, 'sz-slider-track');
46
+ out = out.replace(/\bswiper-container\b/g, 'sz-slider');
47
+ out = out.replace(/\bswiper-pagination\b/g, 'dots');
48
+ // Add .sz-slide if class="slide" (common LLM choice)
49
+ out = out.replace(/class="slide(\s[^"]*)?"/g, 'class="sz-slide$1"');
50
+ out = out.replace(/class="(\s*)slider(\s[^"]*)?"/g, 'class="$1sz-slider$2"');
51
+ // Prev/Next button common variations → .sz-prev / .sz-next
52
+ out = out.replace(/class="([^"]*\b)(slider-prev|prev-btn|carousel-prev|swiper-button-prev)(\b[^"]*)"/g, 'class="$1sz-prev$3"');
53
+ out = out.replace(/class="([^"]*\b)(slider-next|next-btn|carousel-next|swiper-button-next)(\b[^"]*)"/g, 'class="$1sz-next$3"');
54
+ // Dots variants → .dots
55
+ out = out.replace(/class="([^"]*\b)(slider-dots|pagination-dots|carousel-dots|swiper-pagination)(\b[^"]*)"/g, 'class="$1dots$3"');
56
+ out = out.replace(/class="([^"]*\b)(slider-dot|pagination-dot|carousel-dot|swiper-pagination-bullet)(\b[^"]*)"/g, 'class="$1sz-dot$3"');
57
+ // ── 2. ACCORDION class normalisation ───────────────────────────────
58
+ out = out.replace(/class="([^"]*\b)accordion-item(\b[^"]*)"/g, 'class="$1sz-accordion-item$2"');
59
+ out = out.replace(/class="([^"]*\b)accordion-header(\b[^"]*)"/g, 'class="$1sz-accordion-trigger$2"');
60
+ out = out.replace(/class="([^"]*\b)accordion-button(\b[^"]*)"/g, 'class="$1sz-accordion-trigger$2"');
61
+ out = out.replace(/class="([^"]*\b)accordion-trigger(\b[^"]*)"/g, 'class="$1sz-accordion-trigger$2"');
62
+ out = out.replace(/class="([^"]*\b)accordion-body(\b[^"]*)"/g, 'class="$1sz-accordion-body$2"');
63
+ out = out.replace(/class="([^"]*\b)accordion-content(\b[^"]*)"/g, 'class="$1sz-accordion-body$2"');
64
+ out = out.replace(/class="([^"]*\b)accordion-collapse(\b[^"]*)"/g, 'class="$1sz-accordion-body$2"');
65
+ // ── 3. TAB class normalisation ─────────────────────────────────────
66
+ out = out.replace(/class="([^"]*\b)tab-button(\b[^"]*)"/g, 'class="$1sz-tab-btn$2"');
67
+ out = out.replace(/class="([^"]*\b)tab-link(\b[^"]*)"/g, 'class="$1sz-tab-btn$2"');
68
+ out = out.replace(/class="([^"]*\b)tab-panel(\b[^"]*)"/g, 'class="$1sz-tab-panel$2"');
69
+ out = out.replace(/class="([^"]*\b)tab-content(\b[^"]*)"/g, 'class="$1sz-tab-panel$2"');
70
+ out = out.replace(/class="([^"]*\b)tab-pane(\b[^"]*)"/g, 'class="$1sz-tab-panel$2"');
71
+ // Tabs: convert .active on panels (Bootstrap pattern) to hidden attribute (plugin pattern).
72
+ // If a .sz-tab-panel does NOT have .active class, ensure it has hidden attribute.
73
+ out = out.replace(/<(div|section)([^>]*\bclass="[^"]*\bsz-tab-panel\b[^"]*"[^>]*)>/g, (match, tag, attrs) => {
74
+ // If panel has .active, strip it and don't add hidden
75
+ if (/\bactive\b/.test(attrs)) {
76
+ const cleaned = attrs.replace(/\s*active\s*/, ' ').replace(/\s+/g, ' ');
77
+ return `<${tag}${cleaned}>`;
78
+ }
79
+ // If panel doesn't already have hidden attr, add it
80
+ if (/\bhidden\b/.test(attrs))
81
+ return match;
82
+ return `<${tag}${attrs} hidden>`;
83
+ });
84
+ // ...but make sure the FIRST tab panel doesn't end up with hidden (it should be visible)
85
+ // We'll do this by walking each .sz-tabs/.tabs group and clearing hidden on the first child panel.
86
+ out = out.replace(/(<(?:div|section)[^>]*\bclass="[^"]*\bsz-tab-panel\b[^"]*"[^>]*?)\s+hidden(\b[^>]*>(?:(?!<\/(?:div|section)>).)*<\/(?:div|section)>)/, '$1$2');
87
+ // ── 4. VIDEO normalisation ─────────────────────────────────────────
88
+ // YouTube URLs inside <a href> → wrap into iframe embed if no iframe exists nearby
89
+ // (Best effort — Claude usually generates iframes correctly when prompted.)
90
+ out = out.replace(/href="https:\/\/(?:www\.)?youtube\.com\/watch\?v=([a-zA-Z0-9_-]{11})"/g, 'href="https://www.youtube.com/watch?v=$1" data-youtube-id="$1"');
91
+ // ── 5. SCRIPT removal ──────────────────────────────────────────────
92
+ // The plugin owns ALL interactivity. Inline <script> blocks break in WP
93
+ // (& entity-encoding mangles && operators). Strip them entirely.
94
+ out = out.replace(/<script\b[^>]*>[\s\S]*?<\/script>/gi, '');
95
+ // Strip inline event handlers (onclick=, onload=, etc.)
96
+ out = out.replace(/\s+on[a-z]+\s*=\s*"[^"]*"/gi, '');
97
+ out = out.replace(/\s+on[a-z]+\s*=\s*'[^']*'/gi, '');
98
+ // ── 6. POST LISTING guard ──────────────────────────────────────────
99
+ // Catch placehold.co URLs Claude might use as fallback — strip them so the plugin's
100
+ // auto-create-from-card flow doesn't import junk images.
101
+ out = out.replace(/src="https:\/\/placehold\.co\/[^"]*"/g, 'src=""');
102
+ out = out.replace(/src="https:\/\/via\.placeholder\.com\/[^"]*"/g, 'src=""');
103
+ // ── 7. Ensure .sz-fullwidth on top-level section ───────────────────
104
+ // If Claude wrote a <section> at the start without .sz-fullwidth, add it.
105
+ out = out.replace(/^(\s*<section\b)((?![^>]*\bclass="[^"]*\bsz-fullwidth\b)[^>]*)/, (m, open, attrs) => {
106
+ if (/\bclass="/.test(attrs)) {
107
+ return open + attrs.replace(/\bclass="/, 'class="sz-fullwidth ');
108
+ }
109
+ return open + ' class="sz-fullwidth"' + attrs;
110
+ });
111
+ return out;
112
+ }
113
+ /**
114
+ * Enforce the section's background colour from Figma data.
115
+ *
116
+ * Claude often forgets to apply the parent frame's background fill (e.g. dark green
117
+ * section → comes out white because Claude only writes content, not the section bg).
118
+ * Find the outermost <section> in the HTML and inject the Figma bg as a fallback colour.
119
+ *
120
+ * IMPORTANT FIX (vs platform): we now use `background-color:` (NOT the `background:`
121
+ * shorthand) and we DON'T use `!important`. The old code applied
122
+ * `background: #0E426C !important;`
123
+ * which destroyed every gradient overlay, image background, and multi-layer
124
+ * composition Claude wrote on the section. Sections with image+gradient
125
+ * backgrounds (heroes with photos behind navy overlay) came out as a flat
126
+ * solid colour because the !important shorthand killed everything.
127
+ *
128
+ * The new behaviour:
129
+ * - If Claude wrote `background-image: url(...)` or `background: linear-gradient(...)`
130
+ * on the section, those stay intact
131
+ * - The Figma bg colour shows through any TRANSPARENT areas (correct
132
+ * behaviour — the colour is the fallback, not an override)
133
+ * - If Claude wrote nothing, the colour applies cleanly
134
+ */
135
+ export function enforceSectionBackground(html, sectionBg) {
136
+ if (!html || !sectionBg)
137
+ return html;
138
+ // Use background-color (long-hand) so we don't clobber background-image,
139
+ // gradient, or other background-* longhand properties Claude wrote.
140
+ // Skip !important — let Claude's intentional CSS win when it's more specific.
141
+ const bgRule = `background-color:${sectionBg};`;
142
+ // Find the first <section ...> tag (the outermost container)
143
+ return html.replace(/<section\b([^>]*)>/, (match, attrs) => {
144
+ // If the section already declares background-color inline, don't double-apply
145
+ if (/\bstyle="[^"]*\bbackground-color\s*:/.test(attrs))
146
+ return match;
147
+ if (/\bstyle="/.test(attrs)) {
148
+ return `<section${attrs.replace(/\bstyle="([^"]*)"/, (m, s) => `style="${s.replace(/;?\s*$/, ';')}${bgRule}"`)}>`;
149
+ }
150
+ return `<section${attrs} style="${bgRule}">`;
151
+ });
152
+ }
153
+ /**
154
+ * Prevent horizontal page scroll caused by fixed widths bleeding through from
155
+ * Figma absoluteBoundingBox values (e.g. width: 1920px on the section root,
156
+ * or oversized child elements). Adds a defensive CSS block scoped to the
157
+ * section id that:
158
+ * - forces the section to viewport width with no overflow
159
+ * - clamps any descendant max-width to 100% of the section
160
+ * - lets images/svgs shrink to fit
161
+ * This is a universal safety net — works on ANY section, ANY design, because
162
+ * it's pure CSS scoped under the section's id. Claude's own styles still win
163
+ * for anything more specific.
164
+ */
165
+ export function enforceNoHorizontalOverflow(html) {
166
+ if (!html)
167
+ return html;
168
+ const idMatch = html.match(/<section\b[^>]*\bid="(sz-[^"]+)"/);
169
+ if (!idMatch)
170
+ return html;
171
+ const id = idMatch[1];
172
+ // Defensive CSS that wins against Claude's inline widths (e.g. width:1920px
173
+ // bleeding through from Figma absoluteBoundingBox). Uses !important on the
174
+ // overflow/width clamp because no design ever legitimately wants its section
175
+ // to cause horizontal page scroll. Vertical margin is also forced to 0 so
176
+ // consecutive SiteZen Section blocks on the same page sit flush.
177
+ // Decorative shapes positioned absolutely (waves, blobs at the section edge)
178
+ // explicitly opt out via the .sz-overflow exception so wave-extends-past-viewport
179
+ // doesn't reintroduce scroll.
180
+ const guard = `
181
+ <style>
182
+ #${id}{width:100%!important;max-width:100vw!important;overflow-x:hidden!important;box-sizing:border-box;margin-top:0!important;margin-bottom:0!important}
183
+ #${id} *{max-width:100%;box-sizing:border-box}
184
+ #${id} img,#${id} svg,#${id} video,#${id} iframe{max-width:100%;height:auto}
185
+ #${id} [style*="position:absolute"],#${id} [style*="position: absolute"]{max-width:none}
186
+ </style>`;
187
+ return html.replace(/<section\b/, `${guard}\n<section`);
188
+ }
189
+ /**
190
+ * Minify inline <style> blocks in the section HTML. Sections emit hand-
191
+ * written CSS with comments, indentation, and blank lines for readability
192
+ * (good for editor debugging). For the rendered page, none of that matters —
193
+ * shaves 30-50% off CSS bytes for typical sections, which shows up in
194
+ * PageSpeed's "Reduce unused CSS" and total transfer size.
195
+ *
196
+ * Conservative — never touches CSS inside content (text in <style>...</style>
197
+ * tags only), preserves rule semantics, just removes whitespace and comments.
198
+ */
199
+ export function minifyInlineCss(html) {
200
+ if (!html)
201
+ return html;
202
+ return html.replace(/<style\b([^>]*)>([\s\S]*?)<\/style>/gi, (_m, attrs, css) => {
203
+ const minified = String(css)
204
+ // Strip CSS comments
205
+ .replace(/\/\*[\s\S]*?\*\//g, "")
206
+ // Collapse all whitespace runs
207
+ .replace(/\s+/g, " ")
208
+ // Remove space around CSS symbols
209
+ .replace(/\s*([{}:;,>+~])\s*/g, "$1")
210
+ // Remove trailing semicolons before closing brace
211
+ .replace(/;}/g, "}")
212
+ .trim();
213
+ return `<style${attrs}>${minified}</style>`;
214
+ });
215
+ }
216
+ /**
217
+ * Optimise every <img> in the HTML for PageSpeed scores:
218
+ * - loading="lazy" — defer offscreen images (browser-native, free)
219
+ * - decoding="async" — decode off the main thread (no JS blocking)
220
+ * - fetchpriority="high" on the FIRST image (LCP candidate)
221
+ * - width="N" height="M" — explicit dimensions prevent CLS (cumulative
222
+ * layout shift) which is one of the 3 Core Web Vitals.
223
+ *
224
+ * Dimensions are read from the image's existing style attribute when
225
+ * possible (Figma extraction puts width/height in inline styles for
226
+ * the asset). When no dimensions are inferrable, we add only the
227
+ * lazy/decoding hints — better than nothing.
228
+ *
229
+ * This is a pure-HTML transform with zero runtime cost. Gives 5-15
230
+ * points on Lighthouse for image-heavy pages.
231
+ */
232
+ export function optimiseImagesForPageSpeed(html) {
233
+ if (!html)
234
+ return html;
235
+ let firstImageSeen = false;
236
+ return html.replace(/<img\b([^>]*)>/gi, (match, attrs) => {
237
+ let out = String(attrs);
238
+ // 1. loading: explicit "eager" on the first image (LCP), "lazy" on rest.
239
+ if (!/\bloading\s*=/i.test(out)) {
240
+ out = (firstImageSeen ? ' loading="lazy"' : ' loading="eager"') + out;
241
+ }
242
+ // 2. decoding async on all (no harm, helps LCP).
243
+ if (!/\bdecoding\s*=/i.test(out)) {
244
+ out = ' decoding="async"' + out;
245
+ }
246
+ // 3. fetchpriority high on the first image only.
247
+ if (!firstImageSeen && !/\bfetchpriority\s*=/i.test(out)) {
248
+ out = ' fetchpriority="high"' + out;
249
+ }
250
+ // 4. width/height from inline style (prevents CLS).
251
+ if (!/\bwidth\s*=/i.test(out) && !/\bheight\s*=/i.test(out)) {
252
+ const wMatch = out.match(/style="[^"]*\bwidth\s*:\s*(\d+)px/i);
253
+ const hMatch = out.match(/style="[^"]*\bheight\s*:\s*(\d+)px/i);
254
+ const arMatch = out.match(/style="[^"]*\baspect-ratio\s*:\s*([\d.]+)\s*\/\s*([\d.]+)/i);
255
+ if (wMatch && hMatch) {
256
+ out = ` width="${wMatch[1]}" height="${hMatch[1]}"` + out;
257
+ }
258
+ else if (wMatch && arMatch) {
259
+ const w = parseInt(wMatch[1], 10);
260
+ const ar = parseFloat(arMatch[1]) / parseFloat(arMatch[2]);
261
+ if (ar > 0)
262
+ out = ` width="${w}" height="${Math.round(w / ar)}"` + out;
263
+ }
264
+ }
265
+ firstImageSeen = true;
266
+ return `<img${out}>`;
267
+ });
268
+ }
269
+ /** Normalise a URL for set comparison — strip whitespace + lowercase host. */
270
+ function normaliseUrl(u) {
271
+ return u.trim().replace(/^["']|["']$/g, "");
272
+ }
273
+ export function detectBakedBackgroundViolations(html, ctx) {
274
+ if (!html)
275
+ return [];
276
+ const violations = [];
277
+ // Build a Set of legitimate background-image URLs Claude is allowed to use.
278
+ // image_assets URLs (legitimate content photos) go in here so they DON'T
279
+ // trigger the Figma-render-URL block when reused as a section bg photo.
280
+ const legitimate = new Set();
281
+ if (ctx?.legitimate_urls) {
282
+ for (const u of ctx.legitimate_urls) {
283
+ const n = normaliseUrl(u);
284
+ if (n)
285
+ legitimate.add(n);
286
+ }
287
+ }
288
+ const sectionRender = ctx?.section_render_url ? normaliseUrl(ctx.section_render_url) : "";
289
+ // ONLY scan CSS background-image — NEVER <img src=>.
290
+ // <img src> with any Figma URL is always legitimate (product photo, hero, etc.)
291
+ // and the plugin's sideloader downloads it to local WP media on push.
292
+ // Helper that decides whether a given URL value should be flagged.
293
+ // Order:
294
+ // 1. Same as section_render_url → ALWAYS BLOCK
295
+ // 2. In legitimate_urls set → ALLOW
296
+ // 3. Looks like a Figma render URL → BLOCK (probably the section render,
297
+ // since legit ones would have been in the set)
298
+ // 4. Huge data: URI → BLOCK
299
+ // 5. Long signed-URL query string → BLOCK
300
+ // 6. Otherwise → ALLOW (could be a user-uploaded WP media URL, etc.)
301
+ function flag(url) {
302
+ const n = normaliseUrl(url);
303
+ if (!n)
304
+ return null;
305
+ if (sectionRender && n === sectionRender) {
306
+ return `Section render URL used as background-image — this flattens text + nav + buttons into a static image. Reconstruct bg from color/gradient/SVG; render content as HTML.`;
307
+ }
308
+ if (legitimate.has(n))
309
+ return null; // ✅ legitimate content asset, allow
310
+ if (/^data:image\/[a-z]+;base64,[A-Za-z0-9+/=]{12000,}/.test(n)) {
311
+ return `Huge data: URI in background-image (${Math.round(n.length / 1024)} KB) — almost certainly a baked screenshot. Use color/gradient/SVG.`;
312
+ }
313
+ if (/figma[^/?#]*\/(?:img|images)\b|s3-alpha-sig\.figma\.com\//.test(n)) {
314
+ return `Figma render URL used as background-image but NOT in this section's known image_assets — almost certainly the full section render. Reconstruct bg from extracted color/gradient/SVG.`;
315
+ }
316
+ if (/^https?:\/\/[^?]+\?[^"')\s]{200,}/.test(n)) {
317
+ return `Signed-URL-shaped URL used as background-image (very long query string suggests temporary render link). Use color/gradient/SVG.`;
318
+ }
319
+ return null;
320
+ }
321
+ // Scan every background-image: url(...) in the HTML
322
+ const re = /background-image\s*:\s*url\(\s*(["']?)([^"')]+)\1\s*\)/gi;
323
+ let m;
324
+ while ((m = re.exec(html)) !== null) {
325
+ const v = flag(m[2]);
326
+ if (v)
327
+ violations.push(v);
328
+ }
329
+ return violations;
330
+ }
331
+ /**
332
+ * Detect duplicated content blocks — the same heading or paragraph text
333
+ * appearing multiple times in HTML usually means Claude wrote the section
334
+ * twice, or wrote the text in HTML AND baked it into a background.
335
+ *
336
+ * Heuristic: any non-trivial text (>= 15 chars) that appears more than
337
+ * once as the inner text of a heading/paragraph/span is flagged. We pick
338
+ * heading tags first because hero duplication is most visible there.
339
+ */
340
+ export function detectDuplicateContent(html) {
341
+ if (!html)
342
+ return [];
343
+ const violations = [];
344
+ // Pull inner text from h1-h6 and p tags
345
+ const tagRe = /<(h[1-6]|p)[^>]*>([\s\S]*?)<\/\1>/gi;
346
+ const counts = new Map();
347
+ let m;
348
+ while ((m = tagRe.exec(html)) !== null) {
349
+ const text = m[2].replace(/<[^>]+>/g, "").replace(/\s+/g, " ").trim();
350
+ if (text.length < 15)
351
+ continue;
352
+ counts.set(text, (counts.get(text) || 0) + 1);
353
+ }
354
+ for (const [text, n] of counts) {
355
+ if (n > 1) {
356
+ violations.push(`Duplicate text "${text.slice(0, 60)}${text.length > 60 ? "…" : ""}" appears ${n}× in headings/paragraphs — usually means the section was written twice OR the bg image bakes in text that's also HTML.`);
357
+ }
358
+ }
359
+ return violations;
360
+ }
361
+ /**
362
+ * Detect nav-in-hero violations — a SiteZen page section (one pushed via
363
+ * create_page / push_section_to_page, NOT a header template) that
364
+ * contains a <nav>, <header>, or any element with role="navigation" /
365
+ * class*="navbar" / class*="nav-bar".
366
+ *
367
+ * Per CONVERSION_RULES.md §0.3.K: nav goes via create_header_footer
368
+ * (template_type='header'), the page section gets a <div
369
+ * class='sz-nav-spacer'> reserve at top. If we let nav-in-hero through,
370
+ * the user gets a duplicate header on every page (the global header
371
+ * template + the nav baked into the section).
372
+ */
373
+ export function detectNavInHeroViolation(html) {
374
+ if (!html)
375
+ return [];
376
+ // Only checks "page" sections — skip if the markup IS a header template.
377
+ if (/<header\b[^>]*\bclass=["'][^"']*\b(?:sz-template-header|sz-header-template)\b/i.test(html)) {
378
+ return [];
379
+ }
380
+ const violations = [];
381
+ // Forbidden patterns inside a non-header section
382
+ const navRe = /<(nav|header)\b[^>]*>/gi;
383
+ let m;
384
+ while ((m = navRe.exec(html)) !== null) {
385
+ violations.push(`<${m[1].toLowerCase()}> element found inside a page section — navigation/header markup must be pushed via create_header_footer(template_type='header'), not as part of a page section. Per §0.3.K: split the design into (1) header template push and (2) page section starting with <div class='sz-nav-spacer'>. Otherwise the user gets a duplicate header on every page (global template + baked-in nav).`);
386
+ if (violations.length > 3)
387
+ break;
388
+ }
389
+ return violations;
390
+ }
391
+ /**
392
+ * Scan pushed HTML for graceful-degradation placeholders — elements Claude
393
+ * couldn't auto-fill (image fetch failed, video src unknown, form embed
394
+ * needed, custom font missing, etc.) — and return a structured list so the
395
+ * push response can hand the user a precise "X spots need your input"
396
+ * checklist.
397
+ *
398
+ * Pattern Claude emits per the workflow rules:
399
+ * <... class="sz-asset-needed"
400
+ * data-sz-asset-type="image|video|map|form|font|audio|embed|download|icon|lottie"
401
+ * data-sz-spot="hero photo" (or any human label)
402
+ * data-sz-original="original Figma name / font family / etc."
403
+ * ...visual styling intact so layout is preserved...>
404
+ *
405
+ * This function is intentionally permissive — it surfaces ANY element with
406
+ * class containing "sz-asset-needed" so future asset types don't require
407
+ * code changes here.
408
+ */
409
+ export function extractPendingAssets(html) {
410
+ if (!html)
411
+ return [];
412
+ const out = [];
413
+ const re = /<[^>]*\bclass="[^"]*\bsz-asset-needed\b[^"]*"[^>]*>/g;
414
+ const matches = html.match(re) || [];
415
+ for (const tag of matches) {
416
+ const typeM = tag.match(/data-sz-asset-type="([^"]+)"/);
417
+ const labelM = tag.match(/data-sz-spot="([^"]+)"/);
418
+ const origM = tag.match(/data-sz-original="([^"]+)"/);
419
+ const type = typeM ? typeM[1] : "asset";
420
+ const label = labelM ? labelM[1] : type;
421
+ const original = origM ? origM[1] : undefined;
422
+ out.push({
423
+ type,
424
+ label,
425
+ original,
426
+ what_to_do: WHAT_TO_DO_BY_TYPE[type] || "Open the page in the editor — the highlighted spot will let you fill it in.",
427
+ });
428
+ }
429
+ return out;
430
+ }
431
+ const WHAT_TO_DO_BY_TYPE = {
432
+ image: "Open the page in the editor → click the highlighted image → upload an image or paste an image URL.",
433
+ video: "Open the page in the editor → click the highlighted video block → paste the video URL (YouTube / Vimeo / MP4). The cover image and play button stay the same.",
434
+ map: "Open the page in the editor → click the highlighted map → enter the address or coordinates.",
435
+ form: "Open the page in the editor → click the highlighted form → paste your form embed code (Mailchimp / Fluent Forms / etc.) or build it with SiteZen Forms.",
436
+ audio: "Open the page in the editor → click the highlighted audio block → upload or paste an audio URL.",
437
+ embed: "Open the page in the editor → click the highlighted embed → paste the embed URL (Calendly / Spotify / etc.).",
438
+ download: "Open the page in the editor → click the highlighted download button → upload the file or paste a download URL.",
439
+ icon: "Open the page in the editor → click the highlighted icon → upload your SVG.",
440
+ lottie: "Open the page in the editor → click the highlighted animation block → upload your Lottie JSON file.",
441
+ font: "Open SiteZen → Custom Fonts → upload your font files → assign them. The highlighted text will switch over automatically.",
442
+ };
443
+ /**
444
+ * Audit Claude's HTML and verify that all Figma TEXT NODES appear somewhere in the output.
445
+ * Logs (via return value) any missing text so the platform can surface a warning to the user.
446
+ * (Does NOT modify the HTML — losing text is a serious failure we want to flag, not silently patch.)
447
+ */
448
+ export function findMissingTexts(html, textNodes) {
449
+ if (!html || !textNodes || textNodes.length === 0)
450
+ return [];
451
+ // Strip tags + decode common HTML entities for comparison
452
+ const plain = html
453
+ .replace(/<[^>]+>/g, ' ')
454
+ .replace(/&nbsp;/gi, ' ')
455
+ .replace(/&amp;/gi, '&')
456
+ .replace(/&quot;/gi, '"')
457
+ .replace(/&#039;|&apos;/gi, "'")
458
+ .replace(/\s+/g, ' ')
459
+ .toLowerCase();
460
+ const missing = [];
461
+ for (const t of textNodes) {
462
+ const txt = (t.text || '').trim();
463
+ if (txt.length < 4)
464
+ continue; // skip very short text (likely incidental)
465
+ const key = txt.toLowerCase().slice(0, 80); // first 80 chars
466
+ if (!plain.includes(key.replace(/\s+/g, ' '))) {
467
+ missing.push(txt.slice(0, 80));
468
+ }
469
+ }
470
+ return missing;
471
+ }
472
+ /**
473
+ * Auto-detect static card grids and convert them to Dynamic Post Listing markup.
474
+ *
475
+ * Even with explicit prompt rules, Claude often emits a 3+ card grid as plain
476
+ * HTML divs instead of using data-sz-post-listing. This walks the rendered HTML,
477
+ * finds those patterns, and rewrites them — so the plugin's auto-create flow
478
+ * registers the CPT + creates real posts.
479
+ *
480
+ * Detection rule:
481
+ * - Container with >= 3 direct children that are all similar (same tag)
482
+ * - Each child has at least one <img>, one heading (h1-h6), one paragraph
483
+ * - Container has display:grid or display:flex (or is .grid/.cards)
484
+ *
485
+ * Post-type inference:
486
+ * - Reads nearby h1/h2/h3 OR the section's id/class for hints
487
+ * - "service/layanan/solusi/spesialisasi" → sz_service
488
+ * - "project/case/portfolio" → sz_project
489
+ * - "team/people" → sz_team_member
490
+ * - "event/webinar" → sz_event
491
+ * - "testimonial" → sz_testimonial
492
+ * - default → sz_item (safe generic CPT)
493
+ */
494
+ export function autoConvertCardGrids(html) {
495
+ if (!html)
496
+ return html;
497
+ // cheerio imported at top of file (ESM). Was lazy-required in the
498
+ // platform to avoid the edge bundle; not a concern in Node MCP runtime.
499
+ // cheerio v1.x dropped the `decodeEntities` option from CheerioOptions; pass
500
+ // it via parser options under `_useHtmlParser2:false` mode (default).
501
+ const $ = cheerio.load(html, { xmlMode: false });
502
+ // Find candidate containers: any element holding 3+ direct children that themselves
503
+ // contain image + heading + paragraph (the classic card pattern).
504
+ const candidates = [];
505
+ $('*').each((_, el) => {
506
+ const $el = $(el);
507
+ // Skip if already a post listing
508
+ if ($el.is('[data-sz-post-listing]'))
509
+ return;
510
+ if ($el.parents('[data-sz-post-listing]').length > 0)
511
+ return;
512
+ const children = $el.children().toArray();
513
+ if (children.length < 3)
514
+ return;
515
+ // All children must be similar — same tag name AND each has image + heading + para
516
+ const tagSet = new Set(children.map((c) => c.tagName?.toLowerCase()));
517
+ if (tagSet.size !== 1)
518
+ return;
519
+ const allCardLike = children.every((c) => {
520
+ const $c = $(c);
521
+ return $c.find('img').length >= 1
522
+ && $c.find('h1, h2, h3, h4, h5, h6').length >= 1
523
+ && $c.find('p').length >= 1;
524
+ });
525
+ if (!allCardLike)
526
+ return;
527
+ // The container element passed all checks — it's a card grid
528
+ candidates.push(el);
529
+ });
530
+ // De-dupe: if a candidate is nested inside another candidate, keep only the OUTER one
531
+ const outerCandidates = candidates.filter((el) => {
532
+ return !candidates.some((other) => other !== el && $(other).find(el).length > 0);
533
+ });
534
+ for (const containerEl of outerCandidates) {
535
+ const $container = $(containerEl);
536
+ // Infer post type from the nearest preceding heading. Fully dynamic — no
537
+ // hardcoded keyword list. Whatever the design's heading says becomes a
538
+ // properly-named CPT in WordPress.
539
+ //
540
+ // "Our Services" → sz_service / "Services"
541
+ // "Spesialisasi Kami" → sz_spesialisasi / "Spesialisasi"
542
+ // "Características" → sz_caracteristica / "Caracteristica"
543
+ // "Notre équipe" → sz_equipe / "Equipe"
544
+ //
545
+ // Small "smart shortcuts" map well-known English/Indonesian/Spanish/Portuguese
546
+ // synonyms to the canonical singular slug (so "Layanan" + "Services" + "Solusi"
547
+ // all end up at `sz_service`). Everything else uses the heading itself.
548
+ const inferType = () => {
549
+ // Heading text — the most reliable signal for CPT identity
550
+ const $heading = $container.prevAll('h1, h2, h3, h4').first();
551
+ const headingText = ($heading.text() || '').trim();
552
+ // Smart shortcuts so well-known synonyms across languages map to the same canonical CPT
553
+ const SHORTCUTS = [
554
+ { kw: /\b(service|services|layanan|solusi|spesialisasi|servicio|servicios|servi[çc]os|prestaci[óo]n)\b/i, type: 'sz_service', label: 'Services' },
555
+ { kw: /\b(project|projects|case|cases|portfolio|proyek|proyecto|projet|projetos?)\b/i, type: 'sz_project', label: 'Projects' },
556
+ { kw: /\b(team|teams|people|tim|equipo|équipe|equipe|nosotros)\b/i, type: 'sz_team_member', label: 'Team Members' },
557
+ { kw: /\b(event|events|webinar|acara|evento|évènement|evenement)\b/i, type: 'sz_event', label: 'Events' },
558
+ { kw: /\b(testimonial|testimonials|review|reviews|opini[óo]n)\b/i, type: 'sz_testimonial', label: 'Testimonials' },
559
+ { kw: /\b(blog|article|articles|artikel|news|berita|resource|resources|noticias?)\b/i, type: 'post', label: 'Posts' },
560
+ ];
561
+ for (const s of SHORTCUTS) {
562
+ if (s.kw.test(headingText))
563
+ return { type: s.type, label: s.label };
564
+ }
565
+ // No shortcut match — derive a unique CPT slug from the heading itself
566
+ if (headingText) {
567
+ // Take first 2 meaningful words (skip articles like "Our", "The", "Kami", "Notre")
568
+ const STOPWORDS = /^(the|our|my|your|us|kami|kita|nuestra|nuestro|notre|nos|los|las|el|la|de|del|para|en|y|i)$/i;
569
+ const words = headingText
570
+ .normalize('NFD').replace(/[̀-ͯ]/g, '') // strip accents
571
+ .replace(/[^\p{Letter}\s]/gu, ' ') // letters + spaces only
572
+ .split(/\s+/)
573
+ .filter((w) => w && !STOPWORDS.test(w))
574
+ .slice(0, 2);
575
+ const slugRaw = words.join('_').toLowerCase();
576
+ if (slugRaw && slugRaw.length >= 3) {
577
+ const slug = 'sz_' + slugRaw.slice(0, 24); // cap at 24 chars
578
+ const label = words.map((w) => w.charAt(0).toUpperCase() + w.slice(1).toLowerCase()).join(' ');
579
+ return { type: slug, label };
580
+ }
581
+ }
582
+ // No heading at all — last resort
583
+ return { type: 'sz_item', label: 'Items' };
584
+ };
585
+ const { type, label } = inferType();
586
+ const count = $container.children().length;
587
+ // Wrap container as a post listing
588
+ $container.attr('data-sz-post-listing', '');
589
+ $container.attr('data-sz-post-type', type);
590
+ $container.attr('data-sz-post-label', label);
591
+ $container.attr('data-sz-post-count', String(count));
592
+ // Mark each child as a card + add data-sz-post-field to img / heading / paragraph
593
+ $container.children().each((_, cardEl) => {
594
+ const $card = $(cardEl);
595
+ $card.attr('data-sz-card', '');
596
+ const $img = $card.find('img').first();
597
+ if ($img.length && !$img.attr('data-sz-post-field')) {
598
+ $img.attr('data-sz-post-field', 'image');
599
+ }
600
+ const $heading = $card.find('h1, h2, h3, h4, h5, h6').first();
601
+ if ($heading.length && !$heading.find('[data-sz-post-field="title"]').length) {
602
+ const headingText = $heading.html() || '';
603
+ // If heading already wraps an <a>, add field to span inside
604
+ const $existingA = $heading.find('a').first();
605
+ if ($existingA.length) {
606
+ $existingA.attr('data-sz-post-field', 'url');
607
+ const innerHtml = $existingA.html() || '';
608
+ if (!/data-sz-post-field="title"/.test(innerHtml)) {
609
+ $existingA.html('<span data-sz-post-field="title">' + innerHtml + '</span>');
610
+ }
611
+ }
612
+ else {
613
+ $heading.html('<a data-sz-post-field="url" href="#" style="color:inherit;text-decoration:none"><span data-sz-post-field="title">' + headingText + '</span></a>');
614
+ }
615
+ }
616
+ const $p = $card.find('p').first();
617
+ if ($p.length && !$p.attr('data-sz-post-field')) {
618
+ $p.attr('data-sz-post-field', 'excerpt');
619
+ }
620
+ });
621
+ }
622
+ // Return the rewritten HTML. Strip the <html><body> wrappers cheerio adds.
623
+ let result = $.root().html() || '';
624
+ result = result.replace(/^<!DOCTYPE[^>]*>/i, '').replace(/<\/?(html|head|body)[^>]*>/gi, '');
625
+ return result;
626
+ }
627
+ /**
628
+ * Enforce exact Figma text properties on rendered HTML.
629
+ *
630
+ * Walks Claude's HTML and, for each h1/h2/h3/h4/h5/h6/p/span that matches a TEXT node
631
+ * from the Figma data (by exact text content), overrides its inline font-size, font-weight,
632
+ * and color with the EXACT values from Figma. This eliminates drift where Claude approximates
633
+ * "this looks like an h1" but uses a different size.
634
+ *
635
+ * Why this is necessary: even with the strongest prompt rules, LLM output drifts. Deterministic
636
+ * post-processing makes the styling reliable instead of probabilistic.
637
+ */
638
+ export function enforceFigmaTextStyles(html, textNodes) {
639
+ if (!html || !textNodes || textNodes.length === 0)
640
+ return html;
641
+ let out = html;
642
+ // Build a lookup by trimmed text → first matching Figma node
643
+ const byText = new Map();
644
+ for (const n of textNodes) {
645
+ const key = (n.text || '').trim().toLowerCase();
646
+ if (key.length >= 3 && !byText.has(key))
647
+ byText.set(key, n);
648
+ }
649
+ // For each h1-h6/p/span/blockquote tag in HTML, check if its inner text matches a Figma node.
650
+ out = out.replace(/<(h[1-6]|p|span|blockquote|li)([^>]*)>([^<]+)<\/\1>/g, (match, tag, attrs, inner) => {
651
+ const cleanText = inner.replace(/&[a-z#0-9]+;/gi, ' ').trim().toLowerCase();
652
+ if (cleanText.length < 3)
653
+ return match;
654
+ // Try exact match, then partial match (Claude may have wrapped some words in <span>)
655
+ let figmaNode = byText.get(cleanText);
656
+ if (!figmaNode) {
657
+ for (const [key, node] of byText) {
658
+ if (cleanText.includes(key) || key.includes(cleanText)) {
659
+ figmaNode = node;
660
+ break;
661
+ }
662
+ }
663
+ }
664
+ if (!figmaNode)
665
+ return match;
666
+ // Build the enforced style string with EXACT Figma values
667
+ const overrides = [];
668
+ if (figmaNode.fontSize) {
669
+ // Use clamp() so it stays responsive while still maxing at the Figma px
670
+ const px = figmaNode.fontSize;
671
+ const minPx = Math.max(11, Math.round(px * 0.55));
672
+ const vw = (px / 1440 * 100).toFixed(2);
673
+ overrides.push(`font-size:clamp(${minPx}px,${vw}vw,${px}px)`);
674
+ }
675
+ if (figmaNode.fontWeight)
676
+ overrides.push(`font-weight:${figmaNode.fontWeight}`);
677
+ if (figmaNode.color)
678
+ overrides.push(`color:${figmaNode.color}`);
679
+ if (figmaNode.fontFamily)
680
+ overrides.push(`font-family:'${figmaNode.fontFamily}',system-ui,sans-serif`);
681
+ if (overrides.length === 0)
682
+ return match;
683
+ // Inject the overrides into the existing style="" attribute or add a new one.
684
+ // !important wins over any conflicting CSS Claude wrote.
685
+ const overrideCss = overrides.map((o) => o + ' !important').join('; ') + ';';
686
+ if (/\bstyle="/.test(attrs)) {
687
+ return match.replace(/\bstyle="([^"]*)"/, (m2, existing) => `style="${existing.replace(/;?\s*$/, ';')} ${overrideCss}"`);
688
+ }
689
+ return `<${tag}${attrs} style="${overrideCss}">${inner}</${tag}>`;
690
+ });
691
+ return out;
692
+ }