npm - quicklook-pptx-renderer - Versions diffs - 0.1.0 - Mend

quicklook-pptx-renderer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

package/LICENSE +21 -0
package/README.md +266 -0
package/dist/cli.d.ts +2 -0
package/dist/cli.js +175 -0
package/dist/diff/compare.d.ts +17 -0
package/dist/diff/compare.js +71 -0
package/dist/index.d.ts +29 -0
package/dist/index.js +72 -0
package/dist/lint.d.ts +27 -0
package/dist/lint.js +328 -0
package/dist/mapper/bleed-map.d.ts +6 -0
package/dist/mapper/bleed-map.js +1 -0
package/dist/mapper/constants.d.ts +2 -0
package/dist/mapper/constants.js +4 -0
package/dist/mapper/drawable-mapper.d.ts +16 -0
package/dist/mapper/drawable-mapper.js +1464 -0
package/dist/mapper/html-generator.d.ts +13 -0
package/dist/mapper/html-generator.js +539 -0
package/dist/mapper/image-mapper.d.ts +14 -0
package/dist/mapper/image-mapper.js +70 -0
package/dist/mapper/nano-malloc.d.ts +130 -0
package/dist/mapper/nano-malloc.js +197 -0
package/dist/mapper/ql-bleed.d.ts +35 -0
package/dist/mapper/ql-bleed.js +254 -0
package/dist/mapper/shape-mapper.d.ts +41 -0
package/dist/mapper/shape-mapper.js +2384 -0
package/dist/mapper/slide-mapper.d.ts +4 -0
package/dist/mapper/slide-mapper.js +112 -0
package/dist/mapper/style-builder.d.ts +12 -0
package/dist/mapper/style-builder.js +30 -0
package/dist/mapper/text-mapper.d.ts +14 -0
package/dist/mapper/text-mapper.js +302 -0
package/dist/model/enums.d.ts +25 -0
package/dist/model/enums.js +2 -0
package/dist/model/types.d.ts +482 -0
package/dist/model/types.js +7 -0
package/dist/package/content-types.d.ts +1 -0
package/dist/package/content-types.js +4 -0
package/dist/package/package.d.ts +10 -0
package/dist/package/package.js +52 -0
package/dist/package/relationships.d.ts +6 -0
package/dist/package/relationships.js +25 -0
package/dist/package/zip.d.ts +6 -0
package/dist/package/zip.js +17 -0
package/dist/reader/color.d.ts +3 -0
package/dist/reader/color.js +79 -0
package/dist/reader/drawing.d.ts +17 -0
package/dist/reader/drawing.js +403 -0
package/dist/reader/effects.d.ts +2 -0
package/dist/reader/effects.js +83 -0
package/dist/reader/fill.d.ts +2 -0
package/dist/reader/fill.js +94 -0
package/dist/reader/presentation.d.ts +5 -0
package/dist/reader/presentation.js +127 -0
package/dist/reader/slide-layout.d.ts +2 -0
package/dist/reader/slide-layout.js +28 -0
package/dist/reader/slide-master.d.ts +4 -0
package/dist/reader/slide-master.js +49 -0
package/dist/reader/slide.d.ts +2 -0
package/dist/reader/slide.js +26 -0
package/dist/reader/text-list-style.d.ts +2 -0
package/dist/reader/text-list-style.js +9 -0
package/dist/reader/text.d.ts +5 -0
package/dist/reader/text.js +295 -0
package/dist/reader/theme.d.ts +2 -0
package/dist/reader/theme.js +109 -0
package/dist/reader/transform.d.ts +2 -0
package/dist/reader/transform.js +21 -0
package/dist/render/image-renderer.d.ts +3 -0
package/dist/render/image-renderer.js +33 -0
package/dist/render/renderer.d.ts +9 -0
package/dist/render/renderer.js +178 -0
package/dist/render/shape-renderer.d.ts +3 -0
package/dist/render/shape-renderer.js +175 -0
package/dist/render/text-renderer.d.ts +3 -0
package/dist/render/text-renderer.js +152 -0
package/dist/resolve/color-resolver.d.ts +18 -0
package/dist/resolve/color-resolver.js +321 -0
package/dist/resolve/font-map.d.ts +2 -0
package/dist/resolve/font-map.js +66 -0
package/dist/resolve/inheritance.d.ts +5 -0
package/dist/resolve/inheritance.js +106 -0
package/package.json +74 -0

package/dist/mapper/nano-malloc.d.ts ADDED Viewed

@@ -0,0 +1,130 @@
+/**
+ * Simulates Apple's nanov2 allocator for the 80-byte size class.
+ *
+ * OfficeImport uses [NSValue valueWithNonretainedObject:] to cache rendered
+ * elements by OADDrawable pointer.  When a slide is freed and the next slide's
+ * drawables land at the same addresses (malloc reuse), the stale cache entry
+ * produces "bleed" — ghost PDF images from earlier slides.
+ *
+ * nanov2 blocks are 16 KB, each holding 204 fixed-size 80-byte slots.
+ * The free list is a per-block LIFO singly-linked stack (from libmalloc
+ * nanov2_malloc.c, confirmed via disassembly + malloc_logger tracing).
+ *
+ * Source: apple-oss-distributions/libmalloc, tag libmalloc-792.80.2
+ *   - nanov2_malloc.c: nanov2_allocate_from_block_inline (line 2513)
+ *   - nanov2_malloc.c: nanov2_free_to_block_inline (line 3101)
+ *
+ * RE findings on OfficeImport's PX parser allocation patterns:
+ *
+ * Size classes (from ObjC header ivar analysis):
+ *   - OADShape: 72-80 bytes → 80-byte nanov2 class ✓
+ *   - OADImage: 80 bytes → 80-byte class ✓
+ *   - OADConnector: 72-80 bytes (subclass of OADShape) → 80-byte class ✓
+ *   - OADShapeProperties: ~137 bytes → 144-byte class (NOT 80-byte)
+ *   - OADGraphicProperties: ~136 bytes → NOT 80-byte
+ *   - OADDrawableProperties (base): 80 bytes, but shapes use OADShapeProperties
+ *   - OADOrientedBounds: ~60 bytes → 64-byte class
+ *   - OADTextBody: ~32 bytes → 32/48-byte class
+ *   - OADTextBodyProperties: ~157 bytes → NOT 80-byte
+ *   - OADOuterShadowEffect: ~64 bytes → 64-byte class (NOT 80-byte)
+ *   - OADPresetShapeGeometry: ~28 bytes → 32-byte class
+ *   - OADGroup: ~88 bytes → 96-byte class (NOT 80-byte)
+ *   - OADTable: 64 bytes → 64-byte class
+ *
+ * Conclusion: ONLY OADShape/OADImage/OADConnector objects reside in the
+ * 80-byte nanov2 pool. Each drawable = exactly 1 allocation in this pool.
+ * No sub-objects share the same size class.
+ *
+ * Progressive reading pipeline (from PMTop / OIProgressiveReaderDelegate):
+ *   1. PXReader reads slide N's XML → allocates OADDrawable objects
+ *   2. PMTop.readerDidReadElement: → maps slide N (cache check + store)
+ *   3. PDSlide.doneWithContent → frees slide N's drawables (mDrawables = nil)
+ *   4. Freed addresses go back to nanov2 LIFO pool
+ *   5. Repeat for slide N+1: new drawables reuse freed addresses
+ *
+ * LIFO correspondence verified empirically:
+ *   For immediate-predecessor bleeds, targetAllocPos maps to
+ *   srcAllocPos = prevSlideSize - 1 - targetAllocPos (pure LIFO).
+ *   This was confirmed for slides 5←4, 6←5, 7←6, 9←8, 17←16.
+ *
+ * Cache mechanism (CMArchiveManager.mDrawableCache):
+ *   - NSMutableDictionary keyed by [NSValue valueWithNonretainedObject:drawable]
+ *   - cachedPathForDrawable: checked for EVERY drawable before rendering
+ *   - addResourceForDrawable:withType:drawable: stores entry for PDF attachments
+ *   - Only PDF-rendered shapes create cache entries (sources)
+ *   - ALL drawable types can be cache hit targets (CSS rects included)
+ *
+ * Known limitations of this simulation:
+ *   - Predicts ~167 bleeds vs 89 ground truth for the test file
+ *   - False positives occur because we can't model every object in the 80-byte
+ *     pool (system objects like NSMutableArray internals may interleave)
+ *   - Cross-slide bleeds from older slides sometimes have wrong addresses
+ *   - Use ql-bleed.ts extraction as verification/fallback
+ */
+/**
+ * Simulates the nanov2 80-byte allocator across an entire presentation.
+ *
+ * Each OADDrawable (OADShape/OADImage/OADConnector) = 1 allocation.
+ * Properties, geometry, effects, text body etc. are in different size classes.
+ */
+export declare class NanoAllocator {
+    private blocks;
+    private nextBlockBase;
+    /** Allocate one 80-byte slot, returns virtual address. */
+    alloc(): number;
+    /** Free one 80-byte slot. */
+    free(addr: number): void;
+}
+/** Tracks one drawable's allocation and metadata. */
+export interface DrawableSlot {
+    /** Virtual address (from NanoAllocator). */
+    addr: number;
+    /** Whether this drawable was rendered as PDF (non-rect geometry). */
+    isPdf: boolean;
+    /** Index within the slide's drawable array. */
+    index: number;
+}
+/**
+ * Cache entry: the drawableElementCache stores rendered HTML keyed by
+ * [NSValue valueWithNonretainedObject:oadDrawable] — i.e. the raw pointer.
+ * After the drawable is freed, the address becomes stale but stays in the cache.
+ */
+export interface CacheEntry {
+    /** The stale virtual address. */
+    addr: number;
+    /** Source slide index (0-based). */
+    srcSlide: number;
+    /** Source drawable index within that slide. */
+    srcIndex: number;
+    /** Target drawable index on the slide receiving bleed (set during collision check). */
+    targetIndex?: number;
+    /** The cached HTML string — populated during rendering. */
+    html: string;
+}
+/**
+ * Per-drawable allocation info extracted from the PPTX.
+ *
+ * Only OADShape/OADImage/OADConnector objects are in the 80-byte nanov2 pool.
+ * Each drawable = exactly 1 allocation. Properties, geometry, effects, text body
+ * etc. are all in different size classes and do NOT interfere with the 80-byte pool.
+ *
+ * OADTable (graphicFrame) = 64 bytes → different nano size class, excluded.
+ */
+export interface DrawableAllocInfo {
+    isPdf: boolean;
+    index: number;
+    /** Drawable type for size class routing. */
+    type: "sp" | "pic" | "cxnSp" | "graphicFrame";
+}
+/**
+ * Simulates the full OfficeImport drawable lifecycle for bleed prediction.
+ *
+ * Progressive pipeline per slide:
+ *   1. Free previous slide's drawables (NSArray releases in forward index order;
+ *      each OADDrawable.dealloc frees self → pushed to LIFO).
+ *   2. Allocate new slide's drawables (1 alloc each from 80-byte pool).
+ *   3. Check the element cache for address collisions (= bleed).
+ *      ALL drawable types are checked (CSS rects included).
+ *   4. Cache PDF-rendered drawables' addresses.
+ */
+export declare function computeBleedMap(slides: Array<Array<DrawableAllocInfo>>): Map<number, CacheEntry[]>;

package/dist/mapper/nano-malloc.js ADDED Viewed

@@ -0,0 +1,197 @@
+/**
+ * Simulates Apple's nanov2 allocator for the 80-byte size class.
+ *
+ * OfficeImport uses [NSValue valueWithNonretainedObject:] to cache rendered
+ * elements by OADDrawable pointer.  When a slide is freed and the next slide's
+ * drawables land at the same addresses (malloc reuse), the stale cache entry
+ * produces "bleed" — ghost PDF images from earlier slides.
+ *
+ * nanov2 blocks are 16 KB, each holding 204 fixed-size 80-byte slots.
+ * The free list is a per-block LIFO singly-linked stack (from libmalloc
+ * nanov2_malloc.c, confirmed via disassembly + malloc_logger tracing).
+ *
+ * Source: apple-oss-distributions/libmalloc, tag libmalloc-792.80.2
+ *   - nanov2_malloc.c: nanov2_allocate_from_block_inline (line 2513)
+ *   - nanov2_malloc.c: nanov2_free_to_block_inline (line 3101)
+ *
+ * RE findings on OfficeImport's PX parser allocation patterns:
+ *
+ * Size classes (from ObjC header ivar analysis):
+ *   - OADShape: 72-80 bytes → 80-byte nanov2 class ✓
+ *   - OADImage: 80 bytes → 80-byte class ✓
+ *   - OADConnector: 72-80 bytes (subclass of OADShape) → 80-byte class ✓
+ *   - OADShapeProperties: ~137 bytes → 144-byte class (NOT 80-byte)
+ *   - OADGraphicProperties: ~136 bytes → NOT 80-byte
+ *   - OADDrawableProperties (base): 80 bytes, but shapes use OADShapeProperties
+ *   - OADOrientedBounds: ~60 bytes → 64-byte class
+ *   - OADTextBody: ~32 bytes → 32/48-byte class
+ *   - OADTextBodyProperties: ~157 bytes → NOT 80-byte
+ *   - OADOuterShadowEffect: ~64 bytes → 64-byte class (NOT 80-byte)
+ *   - OADPresetShapeGeometry: ~28 bytes → 32-byte class
+ *   - OADGroup: ~88 bytes → 96-byte class (NOT 80-byte)
+ *   - OADTable: 64 bytes → 64-byte class
+ *
+ * Conclusion: ONLY OADShape/OADImage/OADConnector objects reside in the
+ * 80-byte nanov2 pool. Each drawable = exactly 1 allocation in this pool.
+ * No sub-objects share the same size class.
+ *
+ * Progressive reading pipeline (from PMTop / OIProgressiveReaderDelegate):
+ *   1. PXReader reads slide N's XML → allocates OADDrawable objects
+ *   2. PMTop.readerDidReadElement: → maps slide N (cache check + store)
+ *   3. PDSlide.doneWithContent → frees slide N's drawables (mDrawables = nil)
+ *   4. Freed addresses go back to nanov2 LIFO pool
+ *   5. Repeat for slide N+1: new drawables reuse freed addresses
+ *
+ * LIFO correspondence verified empirically:
+ *   For immediate-predecessor bleeds, targetAllocPos maps to
+ *   srcAllocPos = prevSlideSize - 1 - targetAllocPos (pure LIFO).
+ *   This was confirmed for slides 5←4, 6←5, 7←6, 9←8, 17←16.
+ *
+ * Cache mechanism (CMArchiveManager.mDrawableCache):
+ *   - NSMutableDictionary keyed by [NSValue valueWithNonretainedObject:drawable]
+ *   - cachedPathForDrawable: checked for EVERY drawable before rendering
+ *   - addResourceForDrawable:withType:drawable: stores entry for PDF attachments
+ *   - Only PDF-rendered shapes create cache entries (sources)
+ *   - ALL drawable types can be cache hit targets (CSS rects included)
+ *
+ * Known limitations of this simulation:
+ *   - Predicts ~167 bleeds vs 89 ground truth for the test file
+ *   - False positives occur because we can't model every object in the 80-byte
+ *     pool (system objects like NSMutableArray internals may interleave)
+ *   - Cross-slide bleeds from older slides sometimes have wrong addresses
+ *   - Use ql-bleed.ts extraction as verification/fallback
+ */
+const BLOCK_SLOTS = 204; // 16384 / 80
+/** A single nanov2 block for the 80-byte size class. */
+class NanoBlock {
+    /** LIFO free list: indices of free slots (top-of-stack = end of array). */
+    freeList = [];
+    /** Bump pointer for virgin space (next unallocated slot index). */
+    bump = 0;
+    /** Base "address" for this block (monotonically increasing virtual address). */
+    base;
+    constructor(base) {
+        this.base = base;
+    }
+    isFull() {
+        return this.freeList.length === 0 && this.bump >= BLOCK_SLOTS;
+    }
+    /** Allocate → returns virtual address. */
+    alloc() {
+        // Pop from LIFO free list if non-empty
+        if (this.freeList.length > 0) {
+            return this.base + this.freeList.pop() * 80;
+        }
+        // Bump-allocate from virgin space
+        if (this.bump < BLOCK_SLOTS) {
+            return this.base + this.bump++ * 80;
+        }
+        return -1; // block full
+    }
+    /** Free → push slot index onto LIFO stack. */
+    free(addr) {
+        const slot = (addr - this.base) / 80;
+        this.freeList.push(slot);
+    }
+    owns(addr) {
+        return addr >= this.base && addr < this.base + BLOCK_SLOTS * 80;
+    }
+}
+/**
+ * Simulates the nanov2 80-byte allocator across an entire presentation.
+ *
+ * Each OADDrawable (OADShape/OADImage/OADConnector) = 1 allocation.
+ * Properties, geometry, effects, text body etc. are in different size classes.
+ */
+export class NanoAllocator {
+    blocks = [];
+    nextBlockBase = 0x1000_0000; // arbitrary starting virtual address
+    /** Allocate one 80-byte slot, returns virtual address. */
+    alloc() {
+        // Try the last block first (hot path)
+        const last = this.blocks[this.blocks.length - 1];
+        if (last && !last.isFull()) {
+            return last.alloc();
+        }
+        // Need a new block
+        const block = new NanoBlock(this.nextBlockBase);
+        this.nextBlockBase += BLOCK_SLOTS * 80;
+        this.blocks.push(block);
+        return block.alloc();
+    }
+    /** Free one 80-byte slot. */
+    free(addr) {
+        for (let i = this.blocks.length - 1; i >= 0; i--) {
+            if (this.blocks[i].owns(addr)) {
+                this.blocks[i].free(addr);
+                return;
+            }
+        }
+    }
+}
+/**
+ * Simulates the full OfficeImport drawable lifecycle for bleed prediction.
+ *
+ * Progressive pipeline per slide:
+ *   1. Free previous slide's drawables (NSArray releases in forward index order;
+ *      each OADDrawable.dealloc frees self → pushed to LIFO).
+ *   2. Allocate new slide's drawables (1 alloc each from 80-byte pool).
+ *   3. Check the element cache for address collisions (= bleed).
+ *      ALL drawable types are checked (CSS rects included).
+ *   4. Cache PDF-rendered drawables' addresses.
+ */
+export function computeBleedMap(slides) {
+    const allocator = new NanoAllocator();
+    const cache = new Map(); // addr → CacheEntry
+    const bleedMap = new Map(); // slideIndex → bleeds
+    let prevSlots = [];
+    for (let s = 0; s < slides.length; s++) {
+        // 1. Free previous slide's drawables
+        // NSArray releases in forward order. Each OADDrawable.dealloc frees
+        // sub-objects first (different size classes), then free(self) in 80-byte pool.
+        for (const slot of prevSlots) {
+            if (slot.addr !== -1)
+                allocator.free(slot.addr);
+        }
+        // 2. Allocate new slide's drawables (1 alloc each)
+        const drawables = slides[s];
+        const currentSlots = [];
+        for (const d of drawables) {
+            if (d.type === "graphicFrame") {
+                // OADTable = 64 bytes, different nano size class — skip 80-byte pool
+                currentSlots.push({ addr: -1, isPdf: false, index: d.index });
+                continue;
+            }
+            // Alloc drawable itself (80 bytes): OADShape/OADImage/OADConnector
+            const addr = allocator.alloc();
+            currentSlots.push({ addr, isPdf: d.isPdf, index: d.index });
+        }
+        // 3. Check cache for bleed.
+        // In OfficeImport, ALL drawables are checked (CSS rects included).
+        const bleeds = [];
+        for (const slot of currentSlots) {
+            if (slot.addr === -1)
+                continue;
+            const cached = cache.get(slot.addr);
+            if (cached) {
+                bleeds.push({ ...cached, targetIndex: slot.index });
+            }
+        }
+        if (bleeds.length > 0) {
+            bleedMap.set(s, bleeds);
+        }
+        // 4. Cache PDF-rendered drawables (only non-rect shapes produce PDF attachments)
+        for (const slot of currentSlots) {
+            if (slot.isPdf && slot.addr !== -1) {
+                cache.set(slot.addr, {
+                    addr: slot.addr,
+                    srcSlide: s,
+                    srcIndex: slot.index,
+                    html: "",
+                });
+            }
+        }
+        prevSlots = currentSlots;
+    }
+    return bleedMap;
+}

package/dist/mapper/ql-bleed.d.ts ADDED Viewed

@@ -0,0 +1,35 @@
+/**
+ * General bleed extraction via qlmanage diffing.
+ *
+ * Bleed = ghost PDF images from CMArchiveManager's use-after-free cache bug.
+ * OfficeImport's cache replaces invisible drawables with stale PDFs, ADDING
+ * extra <img> elements to slides.
+ *
+ * We extract these by diffing our clean HTML against qlmanage's HTML:
+ * any <img src="*.pdf"> in QL that has no position-match in ours is bleed.
+ *
+ * Works for ANY PPTX file — no hardcoding needed.
+ */
+import type { BleedEntry, BleedRemoval, QLBleedData } from "../model/types.js";
+export type { BleedEntry, BleedRemoval, QLBleedData };
+/**
+ * Extract bleed entries by diffing QL HTML against our HTML.
+ *
+ * Detects BOTH:
+ * - Additions: QL has <img> elements our output doesn't (bleed PDFs to inject)
+ * - Removals: Our output has elements QL doesn't (own shapes replaced by bleed)
+ *
+ * Uses set-based position matching with 2px tolerance for rounding differences.
+ */
+export declare function extractBleedEntries(qlHtml: string, ourHtml: string): QLBleedData;
+/** Run qlmanage and return the .qlpreview directory path. */
+export declare function runQLManage(pptxPath: string): string;
+/**
+ * Full pipeline: extract bleed for any PPTX file.
+ *
+ * 1. Runs qlmanage to get ground-truth HTML
+ * 2. Diffs against our clean HTML
+ * 3. Reads bleed PDF attachments from QL output
+ * 4. Returns structured bleed data ready for injection
+ */
+export declare function extractQLBleed(ourHtml: string, pptxPath: string): QLBleedData;

package/dist/mapper/ql-bleed.js ADDED Viewed

@@ -0,0 +1,254 @@
+/**
+ * General bleed extraction via qlmanage diffing.
+ *
+ * Bleed = ghost PDF images from CMArchiveManager's use-after-free cache bug.
+ * OfficeImport's cache replaces invisible drawables with stale PDFs, ADDING
+ * extra <img> elements to slides.
+ *
+ * We extract these by diffing our clean HTML against qlmanage's HTML:
+ * any <img src="*.pdf"> in QL that has no position-match in ours is bleed.
+ *
+ * Works for ANY PPTX file — no hardcoding needed.
+ */
+import { execSync } from "node:child_process";
+import { readFileSync, readdirSync, existsSync, rmSync, mkdirSync } from "node:fs";
+import { join } from "node:path";
+/** Parse position values from a style string. */
+function parsePos(style) {
+    const t = style.match(/top:\s*(-?\d+)/);
+    const l = style.match(/left:\s*(-?\d+)/);
+    const w = style.match(/width:\s*(-?\d+)/);
+    const h = style.match(/height:\s*(-?\d+)/);
+    if (!t || !l || !w || !h)
+        return null;
+    return { top: +t[1], left: +l[1], width: +w[1], height: +h[1] };
+}
+/** Check if two positions match within a tolerance (handles 1px rounding diffs). */
+function posMatch(a, b, tol = 2) {
+    return Math.abs(a.top - b.top) <= tol &&
+        Math.abs(a.left - b.left) <= tol &&
+        Math.abs(a.width - b.width) <= tol &&
+        Math.abs(a.height - b.height) <= tol;
+}
+/**
+ * Parse top-level elements from slide inner HTML.
+ * Tracks nesting depth to only capture direct children of <div class="slide">.
+ */
+function parseTopLevelElements(slideHtml) {
+    const elements = [];
+    let depth = 0;
+    let i = 0;
+    let elemStart = -1;
+    while (i < slideHtml.length) {
+        if (slideHtml[i] !== '<') {
+            i++;
+            continue;
+        }
+        // Closing tag
+        if (slideHtml[i + 1] === '/') {
+            const end = slideHtml.indexOf('>', i);
+            if (end === -1)
+                break;
+            depth--;
+            if (depth === 0 && elemStart >= 0) {
+                pushElement(slideHtml.substring(elemStart, end + 1));
+                elemStart = -1;
+            }
+            i = end + 1;
+            continue;
+        }
+        const tagEnd = slideHtml.indexOf('>', i);
+        if (tagEnd === -1)
+            break;
+        const tag = slideHtml.substring(i, tagEnd + 1);
+        const selfClose = tag.startsWith('<img ') || tag.startsWith('<col ') ||
+            tag.startsWith('<br') || tag.endsWith('/>');
+        if (depth === 0) {
+            if (selfClose) {
+                pushElement(tag);
+                i = tagEnd + 1;
+                continue;
+            }
+            elemStart = i;
+        }
+        if (!selfClose)
+            depth++;
+        i = tagEnd + 1;
+    }
+    function pushElement(html) {
+        const styleMatch = html.match(/style="([^"]+)"/);
+        const srcMatch = html.match(/src="([^"]+)"/);
+        const pos = styleMatch ? parsePos(styleMatch[1]) : null;
+        const type = html.startsWith('<img') ? 'img' :
+            html.startsWith('<table') ? 'table' :
+                html.startsWith('<div') ? 'div' : 'other';
+        elements.push({ type, html, pos, src: srcMatch?.[1], style: styleMatch?.[1] });
+    }
+    return elements;
+}
+/** Split full HTML into per-slide inner content strings. */
+function splitSlides(html) {
+    const slides = [];
+    const marker = '<div class="slide" style="top:0; left:0;">';
+    let pos = 0;
+    while (true) {
+        const start = html.indexOf(marker, pos);
+        if (start === -1)
+            break;
+        const contentStart = start + marker.length;
+        // Find matching closing </div> — it's always the outermost one
+        // before the next <style> or </body>
+        let depth = 1;
+        let j = contentStart;
+        while (j < html.length && depth > 0) {
+            if (html[j] === '<') {
+                if (html.substring(j, j + 6) === '</div>') {
+                    depth--;
+                    if (depth === 0) {
+                        slides.push(html.substring(contentStart, j));
+                        break;
+                    }
+                    j += 6;
+                    continue;
+                }
+                // Opening div
+                if (html.substring(j, j + 4) === '<div') {
+                    depth++;
+                }
+            }
+            j++;
+        }
+        pos = j;
+    }
+    return slides;
+}
+/**
+ * Extract bleed entries by diffing QL HTML against our HTML.
+ *
+ * Detects BOTH:
+ * - Additions: QL has <img> elements our output doesn't (bleed PDFs to inject)
+ * - Removals: Our output has elements QL doesn't (own shapes replaced by bleed)
+ *
+ * Uses set-based position matching with 2px tolerance for rounding differences.
+ */
+export function extractBleedEntries(qlHtml, ourHtml) {
+    const qlSlides = splitSlides(qlHtml);
+    const ourSlides = splitSlides(ourHtml);
+    const entries = new Map();
+    const removals = new Map();
+    const pdfs = new Map(); // filled by caller
+    const count = Math.min(qlSlides.length, ourSlides.length);
+    for (let s = 0; s < count; s++) {
+        const qlElems = parseTopLevelElements(qlSlides[s]);
+        const ourElems = parseTopLevelElements(ourSlides[s]);
+        // Find matched pairs (QL element ↔ our element) by fuzzy position matching.
+        // Walk both lists sequentially since elements appear in similar order.
+        const qlMatched = new Set();
+        const ourMatched = new Set();
+        let ourPtr = 0;
+        for (let qi = 0; qi < qlElems.length; qi++) {
+            // Try to match against our[ourPtr]
+            while (ourPtr < ourElems.length && ourMatched.has(ourPtr))
+                ourPtr++;
+            if (ourPtr < ourElems.length && elementsMatch(qlElems[qi], ourElems[ourPtr])) {
+                qlMatched.add(qi);
+                ourMatched.add(ourPtr);
+                ourPtr++;
+                continue;
+            }
+            // Try a lookahead: maybe our[ourPtr] was removed and next one matches
+            if (ourPtr + 1 < ourElems.length && !ourMatched.has(ourPtr + 1) &&
+                elementsMatch(qlElems[qi], ourElems[ourPtr + 1])) {
+                qlMatched.add(qi);
+                ourMatched.add(ourPtr + 1);
+                ourPtr = ourPtr + 2;
+                continue;
+            }
+        }
+        // Bleed ADDITIONS: QL elements not matched → extra elements (bleed PDFs)
+        const bleeds = [];
+        let ownCount = 0;
+        for (let qi = 0; qi < qlElems.length; qi++) {
+            if (qlMatched.has(qi)) {
+                ownCount++;
+                continue;
+            }
+            const el = qlElems[qi];
+            if (el.type === 'img' && el.src?.endsWith('.pdf') && el.style) {
+                bleeds.push({ style: el.style, afterOwnElement: ownCount });
+            }
+        }
+        if (bleeds.length > 0)
+            entries.set(s, bleeds);
+        // Bleed REMOVALS: our elements not matched → replaced by bleed
+        const slideRemovals = [];
+        for (let oi = 0; oi < ourElems.length; oi++) {
+            if (!ourMatched.has(oi) && ourElems[oi].pos) {
+                slideRemovals.push({ pos: ourElems[oi].pos, type: ourElems[oi].type });
+            }
+        }
+        if (slideRemovals.length > 0)
+            removals.set(s, slideRemovals);
+    }
+    return { entries, removals, pdfs };
+}
+/**
+ * Check if two elements from QL and our output represent the same drawable.
+ * Matches by type and position (with 2px tolerance for rounding differences).
+ */
+function elementsMatch(ql, ours) {
+    if (ql.type !== ours.type)
+        return false;
+    if (ql.pos && ours.pos)
+        return posMatch(ql.pos, ours.pos);
+    if (!ql.pos && !ours.pos)
+        return true;
+    return false;
+}
+/** Run qlmanage and return the .qlpreview directory path. */
+export function runQLManage(pptxPath) {
+    const tmpDir = "/tmp/ql-bleed-extract";
+    if (existsSync(tmpDir))
+        rmSync(tmpDir, { recursive: true });
+    mkdirSync(tmpDir, { recursive: true });
+    execSync(`qlmanage -p -o "${tmpDir}" "${pptxPath}"`, {
+        timeout: 30000,
+        stdio: ['pipe', 'pipe', 'pipe'],
+    });
+    const entries = readdirSync(tmpDir);
+    const qlpreview = entries.find(e => e.endsWith('.qlpreview'));
+    if (!qlpreview)
+        throw new Error('qlmanage did not produce .qlpreview output');
+    return join(tmpDir, qlpreview);
+}
+/**
+ * Full pipeline: extract bleed for any PPTX file.
+ *
+ * 1. Runs qlmanage to get ground-truth HTML
+ * 2. Diffs against our clean HTML
+ * 3. Reads bleed PDF attachments from QL output
+ * 4. Returns structured bleed data ready for injection
+ */
+export function extractQLBleed(ourHtml, pptxPath) {
+    const qlDir = runQLManage(pptxPath);
+    const qlHtml = readFileSync(join(qlDir, "Preview.html"), "utf8");
+    const data = extractBleedEntries(qlHtml, ourHtml);
+    // Read PDF buffers for bleed entries from QL output
+    // Map by style string since QL attachment names won't be reused
+    const qlSlides = splitSlides(qlHtml);
+    for (const [slideIdx, slideEntries] of data.entries) {
+        const qlElems = parseTopLevelElements(qlSlides[slideIdx]);
+        // Find the QL attachment name for each bleed entry by matching style
+        for (const entry of slideEntries) {
+            const qlElem = qlElems.find(e => e.type === 'img' && e.style === entry.style && e.src?.endsWith('.pdf'));
+            if (qlElem?.src) {
+                const pdfPath = join(qlDir, qlElem.src);
+                try {
+                    data.pdfs.set(entry.style, readFileSync(pdfPath));
+                }
+                catch { /* missing PDF */ }
+            }
+        }
+    }
+    return data;
+}

package/dist/mapper/shape-mapper.d.ts ADDED Viewed

@@ -0,0 +1,41 @@
+import type { Shape, Connector, Fill, ColorMap, ColorScheme, FontScheme, StyleMatrix, Slide } from "../model/types.js";
+import type { StyleBuilder } from "./style-builder.js";
+export declare const PDF_PADDING = 20;
+export declare const SUPPORTED_GEOMETRIES: Set<string>;
+export interface ShapeMapperContext {
+    colorMap: ColorMap;
+    colorScheme: ColorScheme;
+    fontScheme: FontScheme;
+    styleMatrix?: StyleMatrix;
+    slide?: Slide;
+}
+export declare function mapShape(shape: Shape, styles: StyleBuilder, attachments: Map<string, Buffer>, ctx: ShapeMapperContext): string;
+/**
+ * Generate PDF path drawing commands for a shape.
+ * pad = uniform offset from (0,0) where the shape starts in screen coords.
+ * totalH = total height of the coordinate space (needed for Y-flipping).
+ * For standalone shapes: pad=PDF_PADDING, totalH=h+2*pad.
+ * For group children: pad=0, totalH=h (shape draws at origin, caller uses cm to translate).
+ */
+export declare function getShapeDrawCmd(pad: number, w: number, h: number, totalH: number, preset: string, adjustValues?: Record<string, string>): string;
+/** Wrap a PDF content stream into a minimal PDF 1.4 document. */
+export declare function buildPdf(totalW: number, totalH: number, stream: string): Buffer;
+export declare function mapConnector(conn: Connector, attachments: Map<string, Buffer>, ctx: ShapeMapperContext): string;
+/**
+ * Compute the inscribed text frame for a geometry preset (shapeTextBoxRect in OfficeImport).
+ * OfficeImport computes inset using float px values for radius, then truncates dimensions.
+ * Input/output in EMU, but computation happens in pixel space to match QL's rounding.
+ */
+export declare function shapeTextBox(bounds: {
+    x: number;
+    y: number;
+    cx: number;
+    cy: number;
+}, preset: string, adjustValues?: Record<string, string>): {
+    x: number;
+    y: number;
+    cx: number;
+    cy: number;
+};
+/** Returns just the color string (for PDF fill, which only needs a single color) */
+export declare function fillToColor(fill: Fill | undefined, ctx: ShapeMapperContext): string | null;