@imgly/pdf-importer 0.1.0-rc.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/node.d.ts CHANGED
@@ -1,22 +1,10 @@
1
1
  // Generated by dts-bundle-generator v9.5.1
2
2
 
3
3
  import CreativeEngine from '@cesdk/engine';
4
- import { Font, Typeface } from '@cesdk/engine';
4
+ import { AssetAPI, Font, FontStyle, FontWeight, Typeface } from '@cesdk/engine';
5
5
 
6
- export interface TypefaceParams {
7
- family: string;
8
- style: Font["style"];
9
- weight: Font["weight"];
10
- }
11
- export type TypefaceResolver = (fontParameters: TypefaceParams, engine: CreativeEngine) => Promise<FontResolverResult | null>;
12
- export declare function addGfontsAssetLibrary(engine: CreativeEngine): Promise<void>;
13
- export interface FontResolverResult {
14
- typeface: Typeface;
15
- font: Font;
16
- substitutedFrom?: string;
17
- }
18
- export type WarningSeverity = "error" | "warning" | "info";
19
- export interface WarningDefinition {
6
+ type WarningSeverity = "error" | "warning" | "info";
7
+ interface WarningDefinition {
20
8
  /** Default + sole severity. Call sites do not override. */
21
9
  severity: WarningSeverity;
22
10
  /**
@@ -25,7 +13,7 @@ export interface WarningDefinition {
25
13
  */
26
14
  template: string;
27
15
  }
28
- export interface LogMessage<TCode extends string = string> {
16
+ interface LogMessage<TCode extends string = string> {
29
17
  /** Stable machine-readable identifier — never rename once shipped. */
30
18
  code: TCode;
31
19
  /** Inherited from the code definition. */
@@ -47,6 +35,50 @@ export declare class Logger<R extends Record<string, WarningDefinition>> {
47
35
  emit<K extends keyof R & string>(code: K, params?: Record<string, unknown>): void;
48
36
  getMessages(): LogMessage<keyof R & string>[];
49
37
  }
38
+ type AssetQueryAPI = Pick<AssetAPI, "findAllSources" | "addLocalAssetSourceFromJSONURI" | "findAssets">;
39
+ type AssetEngine = {
40
+ asset: AssetQueryAPI;
41
+ };
42
+ interface TypefaceParams {
43
+ family: string;
44
+ style?: Font["style"];
45
+ weight?: Font["weight"];
46
+ }
47
+ interface FontResolverOptions {
48
+ /**
49
+ * When the requested weight is not available in the matched typeface,
50
+ * pick the closest available weight using the CSS Font Matching algorithm.
51
+ * Defaults to false — return null instead so the caller can decide what
52
+ * to do (e.g. log a warning, use a different font).
53
+ *
54
+ * Only enable when the typeface name is known to be a good match. The
55
+ * typeface query uses fuzzy matching, so applying weight fallback to a
56
+ * poor name match can silently produce wildly wrong results.
57
+ */
58
+ closestWeightMatch?: boolean;
59
+ }
60
+ interface FontResolverResult {
61
+ typeface: Typeface;
62
+ font: Font;
63
+ /**
64
+ * Set to the originally requested family name when the typeface came from
65
+ * the proprietary-fallbacks source (e.g. requested "Helvetica", got Roboto).
66
+ * Undefined when the family matched the main Google Fonts catalog directly.
67
+ * Always populated when applicable; consumers may ignore it.
68
+ */
69
+ substitutedFrom?: string;
70
+ }
71
+ type TypefaceResolver = (params: TypefaceParams, engine: AssetEngine, options?: FontResolverOptions) => Promise<FontResolverResult | null>;
72
+ /**
73
+ * Register the @imgly/gfonts asset sources (Google Fonts catalog + proprietary
74
+ * font fallbacks) with the CE.SDK engine.
75
+ *
76
+ * Adds two sources:
77
+ * - `ly.img.gfonts` — 1,394 Google Fonts typefaces (fuzzy-matched).
78
+ * - `ly.img.gfonts-fallbacks` — 16 proprietary-font alias entries
79
+ * (e.g. Helvetica → Roboto), strict-matched.
80
+ */
81
+ export declare function addGfontsAssetLibrary(engine: AssetEngine): Promise<void>;
50
82
  declare const WARNING_CODES: {
51
83
  readonly DOC_PAGE_COUNT: {
52
84
  readonly severity: "info";
@@ -64,6 +96,10 @@ declare const WARNING_CODES: {
64
96
  readonly severity: "error";
65
97
  readonly template: "An unknown block kind \"{kind}\" was encountered and was skipped.";
66
98
  };
99
+ readonly PAGE_INDEX_OUT_OF_RANGE: {
100
+ readonly severity: "warning";
101
+ readonly template: "Requested page index {requested} is out of range; PDF has {available} page(s). No blocks were emitted.";
102
+ };
67
103
  readonly FONT_OPENTYPE_PARSE_FAILED: {
68
104
  readonly severity: "warning";
69
105
  readonly template: "{prefix}font \"{fontName}\" could not be parsed as OpenType{reason}. Glyph outlines will fall back to pdf.js raster paths \u2014 some shapes may render without stroke/anti-aliasing detail.";
@@ -92,6 +128,22 @@ declare const WARNING_CODES: {
92
128
  readonly severity: "error";
93
129
  readonly template: "Text run references font \"{fontRef}\" which was not extracted. The text was skipped.";
94
130
  };
131
+ readonly FONT_SUBSTITUTIONS: {
132
+ readonly severity: "warning";
133
+ readonly template: "Substituted {count} font famil{plural} during import: {pairs}.";
134
+ };
135
+ readonly FONT_SUBSET_FALLBACK: {
136
+ readonly severity: "warning";
137
+ readonly template: string;
138
+ };
139
+ readonly TEXT_OUTLINE_NO_FONT_FAMILY: {
140
+ readonly severity: "warning";
141
+ readonly template: "Text outline has no declared font family. The run was skipped.";
142
+ };
143
+ readonly TEXT_RUN_SKIPPED_NO_RESOLUTION: {
144
+ readonly severity: "warning";
145
+ readonly template: "No typeface or glyph outlines for \"{fontFamily}\"; run skipped.";
146
+ };
95
147
  readonly TEXT_RENDER_MODE_SKIPPED: {
96
148
  readonly severity: "warning";
97
149
  readonly template: "Text item{pageTag}: skipped \"{text}\" \u2014 PDF text rendering mode {modeLabel}. This text paints no ink; for mode 7 it acts as a clipping mask for following images, an effect the importer does not currently reproduce.";
@@ -110,7 +162,7 @@ declare const WARNING_CODES: {
110
162
  };
111
163
  readonly IMAGE_PDFJS_UNRESOLVED: {
112
164
  readonly severity: "error";
113
- readonly template: "{prefix}image placement {idLabel} at ({bboxX}, {bboxY}) {bboxW}\u00D7{bboxH} pt could not be resolved by pdf.js and was dropped.";
165
+ readonly template: "{prefix}image placement {idLabel} at ({bboxX}, {bboxY}) {bboxW}\u00D7{bboxH} pt could not be resolved by pdf.js (reason={reason}) and was dropped.";
114
166
  };
115
167
  readonly IMAGE_NO_PIXELS: {
116
168
  readonly severity: "warning";
@@ -196,6 +248,10 @@ declare const WARNING_CODES: {
196
248
  readonly severity: "warning";
197
249
  readonly template: "Failed to register spot color \"{name}\": {error}";
198
250
  };
251
+ readonly VECTOR_PATH_FILL_RULE_UNSUPPORTED: {
252
+ readonly severity: "warning";
253
+ readonly template: "CE.SDK does not expose `{property}` on `vector_path` shapes (added in CE.SDK {sinceVersion}); the source PDF used non-zero winding fill, but imported paths render with the engine default (even-odd) winding. Self-overlapping or compound paths may render with holes inverted. Upgrade `@cesdk/engine` to {sinceVersion} or newer to preserve the source winding rule.";
254
+ };
199
255
  readonly BLEND_MODE_UNSUPPORTED: {
200
256
  readonly severity: "warning";
201
257
  readonly template: "Blend mode \"{mode}\" is not supported. Using \"Normal\" as the default.";
@@ -236,15 +292,468 @@ declare const WARNING_CODES: {
236
292
  readonly severity: "error";
237
293
  readonly template: "{prefix}gradient shading was emitted without a pattern reference and was dropped.";
238
294
  };
295
+ readonly TILING_PATTERN_AS_IMAGE: {
296
+ readonly severity: "info";
297
+ readonly template: "{prefix}tiling pattern \"{patternId}\" was imported as an image (single-image tiling pattern, e.g. Skia/Chrome PDF export of an embedded bitmap).";
298
+ };
299
+ readonly TILING_PATTERN_UNSUPPORTED: {
300
+ readonly severity: "warning";
301
+ readonly template: "{prefix}tiling pattern \"{patternId}\" contains {imageCount} image XObject(s) and {otherOpCount} other paint op(s); only single-image tiling patterns are imported. The pattern fill was dropped.";
302
+ };
303
+ readonly TILING_PATTERN_UNRESOLVED: {
304
+ readonly severity: "error";
305
+ readonly template: "{prefix}tiling pattern \"{patternId}\" could not be resolved by pdf.js and was dropped.";
306
+ };
307
+ readonly TILING_PATTERN_IMAGE_DECODE_FAILED: {
308
+ readonly severity: "warning";
309
+ readonly template: "{prefix}tiling pattern \"{patternId}\" referenced image \"{imageObjId}\" but the image bytes could not be decoded. The pattern fill was dropped.";
310
+ };
311
+ };
312
+ interface Color {
313
+ space: "rgb" | "cmyk" | "gray";
314
+ /** in [0..1]. rgb:[r,g,b] cmyk:[c,m,y,k] gray:[g] */
315
+ values: number[];
316
+ /** in [0..1]; defaults to 1 */
317
+ alpha?: number;
318
+ }
319
+ type CesdkFontWeight = "thin" | "extraLight" | "light" | "normal" | "medium" | "semiBold" | "bold" | "extraBold" | "heavy";
320
+ interface TextOutline {
321
+ kind: "text-outline";
322
+ /** pt, run bbox top-left */
323
+ x: number;
324
+ y: number;
325
+ width: number;
326
+ height: number;
327
+ /** SVG path data (all glyphs concatenated) */
328
+ d: string;
329
+ /** Original text (for diffing / accessibility) */
330
+ text: string;
331
+ /**
332
+ * Family base parsed from the PDF PostScript name (subset prefix and
333
+ * recognized weight/style/width suffix stripped — e.g.
334
+ * `WTNEYF+Poppins-ExtraBold` → `Poppins`). Falls back to the full
335
+ * subset-stripped name when no recognized suffix is present, so families
336
+ * with internal hyphens like `SVN-BlogScript` survive intact. Suitable
337
+ * as a font-resolver query.
338
+ */
339
+ fontFamily: string;
340
+ /** pt */
341
+ fontSize: number;
342
+ /** Derived from fontObj.italic + name suffix. */
343
+ fontStyle?: "normal" | "italic";
344
+ /**
345
+ * CE.SDK weight string. Together with `fontStyle` and `fontFamily` lets
346
+ * the font-resolver substitute the correct typeface variant for an
347
+ * editable text block, or — on resolver miss — be preserved as block
348
+ * metadata.
349
+ */
350
+ fontWeight?: CesdkFontWeight;
351
+ fill: Color | null;
352
+ /**
353
+ * pt, pen x of the first glyph in device coords. Used by the
354
+ * resolver-substitution path to place the editable text block at the
355
+ * generator's setPositionX rather than the ink-bbox left edge (which is
356
+ * shifted by the first glyph's LSB, typically ~1–2pt for caps).
357
+ */
358
+ textOriginX?: number;
359
+ /**
360
+ * Radians in CE.SDK screen (y-down) frame, 0 for horizontal. Optional;
361
+ * absent on truly horizontal runs. Mirrors the `rotation` field on
362
+ * TextRun so both emit paths can apply `setRotation` from the same
363
+ * source. Sign convention: PDF's y-up Tm rotation atan2(b,a) negated to
364
+ * match CE.SDK's y-down screen frame.
365
+ */
366
+ rotation?: number;
367
+ /**
368
+ * pt, ink-bbox width in the un-rotated text frame. Equals `width` for
369
+ * horizontal runs; differs for ±90°/180° runs where `width`/`height`
370
+ * describe the rotated AABB.
371
+ */
372
+ unrotWidth?: number;
373
+ /**
374
+ * pt, ink-bbox height in the un-rotated text frame. Pair with
375
+ * `unrotWidth` to size the editable text block at its natural
376
+ * pre-rotation dimensions; CE.SDK then auto-fits to its own typeface
377
+ * metrics.
378
+ */
379
+ unrotHeight?: number;
380
+ /**
381
+ * [0..1], default 1. Like `VectorPath.opacity` and `ImageBox.opacity`,
382
+ * lets the emitter apply `setOpacity` to text-outline blocks (the
383
+ * walker uses this for runs that inherited a reduced fill alpha).
384
+ */
385
+ opacity?: number;
386
+ /**
387
+ * pt, baseline y in device (top-down) coords. Pen origin's y, used by
388
+ * the resolver-substitution path to position the editable text block
389
+ * at the baseline rather than the ink-bbox top.
390
+ */
391
+ textOriginY?: number;
392
+ }
393
+ interface EmbeddedFont {
394
+ /**
395
+ * Unique per subset. When a PDF embeds multiple subsets that share a PS
396
+ * family name (common: one subset per chapter/page), this field is
397
+ * suffixed with pdf.js's `loadedName` (e.g.
398
+ * `PublicSans-Light#g_d0_f3`) so each subset keeps its own entry in
399
+ * `Document.fonts`. Use `family` for display.
400
+ */
401
+ postScriptName: string;
402
+ /**
403
+ * Human typeface name from the font's opentype name table (falls back to
404
+ * the PS family name).
405
+ */
406
+ family: string;
407
+ /** 400 | 700 | 900 (pdf.js bold/black flags) */
408
+ weight?: number;
409
+ style?: "normal" | "italic";
410
+ /**
411
+ * Width-axis label (OS/2 `usWidthClass` mapped to a normalized string,
412
+ * or parsed from the PostScript suffix's width token: `cond`,
413
+ * `extended`, `narrow`, …). Distinguishes condensed-width subsets
414
+ * from normal-width subsets so the merger does not collapse them.
415
+ * Absent when neither source carries width information.
416
+ */
417
+ width?: string;
418
+ /** Raw TTF/OTF bytes */
419
+ data: Uint8Array;
420
+ /** 'font/otf' | 'font/ttf' */
421
+ mimeType: string;
422
+ }
423
+ interface ColorSpan {
424
+ from: number;
425
+ to: number;
426
+ fill: Color | null;
427
+ }
428
+ interface StyleSpan {
429
+ from: number;
430
+ to: number;
431
+ /** PostScript name from the originating run. */
432
+ fontRef: string;
433
+ /** CSS 100..900; matches `EmbeddedFont.weight`. */
434
+ fontWeight?: number;
435
+ /** Matches `EmbeddedFont.style`. */
436
+ fontStyle?: "normal" | "italic";
437
+ }
438
+ interface TextRun {
439
+ kind: "text-run";
440
+ /** pt, unrotated bbox top-left */
441
+ x: number;
442
+ /** pt */
443
+ y: number;
444
+ /** pt (with a small advance padding) */
445
+ width: number;
446
+ /** pt (ascender - descender, scaled by fontSize) */
447
+ height: number;
448
+ /** Rendered string (Unicode, not glyph codes) */
449
+ text: string;
450
+ /** postScriptName; resolves via Document.fonts */
451
+ fontRef: string;
452
+ /**
453
+ * Logical font family (subset prefix + weight/style suffix stripped),
454
+ * mirroring `EmbeddedFont.family`. Populated by `text-items.ts` from
455
+ * the resolved EmbeddedFont so `postprocess-text.ts::canGroup` can
456
+ * decide whether two distinct `fontRef`s (e.g. `Helvetica` +
457
+ * `Helvetica-Bold`) belong to the same logical typeface and should
458
+ * merge into one block with `styleSpans`. Absent on legacy inputs
459
+ * (synthetic IR, older callers) — `canGroup` then falls back to the
460
+ * stricter `fontRef === fontRef` test and keeps the runs split.
461
+ */
462
+ family?: string;
463
+ /**
464
+ * CSS 100..900, mirroring `EmbeddedFont.weight`. Used as the per-run
465
+ * weight contribution when the merger bridges variants of the same
466
+ * `family`. Absent on legacy inputs (merger then refuses to merge
467
+ * across `fontRef` mismatches — see `family`).
468
+ */
469
+ fontWeight?: number;
470
+ /** Mirrors `EmbeddedFont.style`. Pairs with `fontWeight`. */
471
+ fontStyle?: "normal" | "italic";
472
+ /**
473
+ * Width-axis label, mirroring `EmbeddedFont.width`. Distinguishes
474
+ * `Helvetica-Condensed` from `Helvetica-Regular` so the merger does
475
+ * not collapse them — CE.SDK's per-range setters (`setTextFontWeight`,
476
+ * `setTextFontStyle`) have no width parameter and would silently
477
+ * render condensed glyphs at normal width.
478
+ *
479
+ * Values follow OS/2 `usWidthClass` semantics, normalized to a small
480
+ * string set: `'ultraCondensed' | 'extraCondensed' | 'condensed' |
481
+ * 'semiCondensed' | 'normal' | 'semiExpanded' | 'expanded' |
482
+ * 'extraExpanded' | 'ultraExpanded'`. Absent on legacy inputs.
483
+ */
484
+ fontWidth?: string;
485
+ /** pt */
486
+ fontSize: number;
487
+ /**
488
+ * Radians, 0 for horizontal text. x/y place the block so rotation around
489
+ * its center matches the PDF.
490
+ */
491
+ rotation: number;
492
+ fill: Color | null;
493
+ /**
494
+ * pt, pen origin x in PDF view-frame (top-left origin), pre-pivot-comp.
495
+ * Populated by `text-items.ts`; used by `groupLineRuns` to test
496
+ * pen-origin continuity between runs that pdf.js split mid-line for
497
+ * kerning.
498
+ */
499
+ baselineXPt?: number;
500
+ /** pt, pen origin y, same frame. */
501
+ baselineYPt?: number;
502
+ /**
503
+ * pt, raw along-baseline advance from pdf.js (`item.width`). Distinct
504
+ * from `width`, which carries a small rendering padding that is
505
+ * intentionally invisible to merger logic.
506
+ */
507
+ advanceWidthPt?: number;
508
+ /**
509
+ * Per-character (UTF-16) color ranges within `text`. Set by `mergeRuns`
510
+ * only when constituent runs differ in fill; absent for single-run
511
+ * blocks and for merged groups with uniform fill (back-compat). Range
512
+ * indices match `engine.block.setTextColor(id, color, from, to)`'s
513
+ * contract.
514
+ */
515
+ colorSpans?: ColorSpan[];
516
+ /**
517
+ * Per-character (UTF-16) weight/style ranges within `text`. Set by
518
+ * `mergeRuns` only when the merger bridged adjacent same-family runs
519
+ * that use different weight/style variants (e.g. inline bold inside a
520
+ * paragraph). Absent for single-run blocks and for merged groups with
521
+ * uniform variant. Range indices match `engine.block.setTextFontWeight
522
+ * (id, weight, from, to)` / `setTextFontStyle(id, style, from, to)`.
523
+ */
524
+ styleSpans?: StyleSpan[];
525
+ /**
526
+ * Lazy builder for the run's glyph outline as a vector path. Captures
527
+ * the walker's pending run + glyph + font references; invoked by
528
+ * `emit/cesdk.ts::emitGlyphsAsVectorPath` only when the font-strategy
529
+ * cascade returns null and emission needs a fallback. Returns the SVG
530
+ * `d` plus bbox geometry, or null when path extraction is unavailable
531
+ * (Type 3 glyph charproc missing, opentype parse failure).
532
+ *
533
+ * Populated by `text-items.ts::extractTextRuns` from a per-BT/ET queue
534
+ * stashed by the walker (`placeholder._lazyGlyphOutlines`). Holds
535
+ * references — discarded when the IR is GC'd after emit. Builder is
536
+ * called at most once per run (cascade-null is rare for asset-library
537
+ * coverage).
538
+ */
539
+ _buildGlyphOutline?: () => GlyphOutlineData | null;
540
+ }
541
+ interface GlyphOutlineData {
542
+ /** SVG path data in local bbox coordinates (top-left origin). */
543
+ d: string;
544
+ /** pt, bbox top-left in page coords. */
545
+ x: number;
546
+ /** pt */
547
+ y: number;
548
+ /** pt */
549
+ width: number;
550
+ /** pt */
551
+ height: number;
552
+ }
553
+ /**
554
+ * Per-run input to the cascade and emission layers. Built by
555
+ * `lib/make-request.ts` from the source IR block (either `TextRun` or
556
+ * `TextOutline`).
557
+ *
558
+ * Vector-outline data has two shapes depending on source kind:
559
+ * - `TextOutline` sources carry `glyphPaths` eagerly (the walker built
560
+ * the path because the font wasn't embeddable — Type 3 / restricted /
561
+ * PUA).
562
+ * - `TextRun` sources carry `buildGlyphOutline`, a lazy builder set up
563
+ * by the walker + text-items pairing. Emission only invokes it when
564
+ * the cascade returns null and a vector fallback is needed — most
565
+ * documents have asset-library coverage and never call it.
566
+ */
567
+ export interface FontRequest {
568
+ /** Display family (subset prefix stripped). */
569
+ family: string;
570
+ style: "normal" | "italic";
571
+ weight: Font["weight"];
572
+ /** Original text content; preserved for metadata + recovery. */
573
+ text: string;
574
+ /** Embedded subset bytes — present when the source was a TextRun. */
575
+ embeddedFont?: EmbeddedFont;
576
+ /** Concatenated glyph paths in local coords — present when the source was a TextOutline. */
577
+ glyphPaths?: string;
578
+ /** Lazy glyph-outline builder (TextRun sources). Built by walker;
579
+ * invoked at most once by emission when the cascade returns null. */
580
+ buildGlyphOutline?: () => GlyphOutlineData | null;
581
+ /** Original IR block — used by emission for geometry, fills, rotation, etc. */
582
+ source: TextRun | TextOutline;
583
+ }
584
+ export type CascadeResolution = {
585
+ kind: "typeface";
586
+ typeface: Typeface;
587
+ font?: Font;
588
+ /** Identifies which stage produced this resolution. */
589
+ provenance: string;
590
+ /** Forwarded from `FontResolverResult.substitutedFrom`. When set, the
591
+ * emission layer treats the resolution as substituted and recomputes
592
+ * layout. */
593
+ substitutedFrom?: string;
594
+ } | {
595
+ kind: "subset";
596
+ embeddedFont: EmbeddedFont;
597
+ provenance: "embedded-subset";
239
598
  };
240
- export type PageBounds = "trim" | "media";
241
- export type PdfLogger = Logger<typeof WARNING_CODES>;
599
+ /**
600
+ * Stage outcome. Distinguishing "pass with reason" from `null` lets debug
601
+ * logs explain *why* the cascade reached a given stage's neighbour. Cheap
602
+ * to carry now; expensive to retrofit.
603
+ */
604
+ export type StageOutcome = CascadeResolution | {
605
+ kind: "pass";
606
+ reason: string;
607
+ };
608
+ interface CascadeLogger {
609
+ emit?: (code: string, params: Record<string, unknown>) => void;
610
+ log?: (message: string, level?: string) => void;
611
+ }
612
+ export interface FontStageContext {
613
+ engine: CreativeEngine;
614
+ resolver: TypefaceResolver | null;
615
+ /**
616
+ * Per-import-call cache. Same `family|style|weight` resolves once per
617
+ * `PDFParser.fromFile` invocation; reopening the PDF produces a fresh
618
+ * cache. Created in the parser entry, discarded on completion.
619
+ */
620
+ resolverCache: Map<string, FontResolverResult | null>;
621
+ logger: CascadeLogger | null;
622
+ }
623
+ export interface FontStage {
624
+ /** Stable identifier for logs and tests. */
625
+ readonly name: string;
626
+ resolve(req: FontRequest, ctx: FontStageContext): Promise<StageOutcome>;
627
+ }
628
+ export interface FontCascade {
629
+ readonly stages: readonly FontStage[];
630
+ resolve(req: FontRequest, ctx: FontStageContext): Promise<CascadeResolution | null>;
631
+ }
632
+ /**
633
+ * Result of a single emission call. The emission layer may produce zero,
634
+ * one, or more blocks; the `blocks` array is in z-order (back to front).
635
+ * `substituted` is true when the cascade's answer triggered a
636
+ * substitution that the dispatcher should account against the
637
+ * per-import substitution-count log.
638
+ */
639
+ export interface EmissionResult {
640
+ /** CE.SDK block ids created. Empty array means the run was skipped. */
641
+ blocks: number[];
642
+ /** True when the resolution involved a font substitution. */
643
+ substituted: boolean;
644
+ /** Display name of the typeface the run resolved to, if any. */
645
+ resolvedFamily?: string;
646
+ }
647
+ /**
648
+ * Block-creation primitives that emission delegates to. The dispatcher
649
+ * fills these in so emission stays decoupled from `engine.block.*` and
650
+ * from the geometry helpers that already live in `emit/cesdk.ts`. Keeps
651
+ * the emission policy testable in isolation: each callback can be
652
+ * stubbed.
653
+ */
654
+ export interface EmissionHelpers {
655
+ /**
656
+ * Create an editable text block with the request's geometry and the
657
+ * caller-supplied `applyFont` callback (called between layout and
658
+ * properties — order matters for CE.SDK's `replaceText` wrap-marker).
659
+ * `layoutIr` may be the original block or a typeface-aware substitute
660
+ * (see `resolveLayoutForSubstitution` below).
661
+ */
662
+ createEditableText(req: FontRequest, layoutIr: TextOutline | TextRun, applyFont: (engine: CreativeEngine, block: number) => void): number;
663
+ /**
664
+ * Emit the request's `glyphPaths` as a `vector_path` graphic block at
665
+ * the source bbox. Returns null when the request carries no
666
+ * `glyphPaths` (TextRun source).
667
+ */
668
+ emitGlyphsAsVectorPath(req: FontRequest): number | null;
669
+ /**
670
+ * Compute the layout-aware IR for a substituted typeface. For TextRun
671
+ * sources this is a no-op (returns `req.source`); for TextOutline
672
+ * sources it derives the unrotated bbox CE.SDK needs using the
673
+ * substitute's font metrics.
674
+ */
675
+ resolveLayoutForSubstitution(req: FontRequest, resolution: CascadeResolution): Promise<TextOutline | TextRun>;
676
+ }
677
+ export interface EmissionContext {
678
+ engine: CreativeEngine;
679
+ logger: CascadeLogger | null;
680
+ /** Page index (1-based) for diagnostic context. */
681
+ pageIndex?: number;
682
+ helpers: EmissionHelpers;
683
+ }
684
+ export interface FontEmission {
685
+ readonly name: string;
686
+ emit(req: FontRequest, resolution: CascadeResolution | null, ctx: EmissionContext): Promise<EmissionResult>;
687
+ }
688
+ export interface FontStrategy {
689
+ readonly cascade: FontCascade;
690
+ readonly emission: FontEmission;
691
+ }
692
+ export declare const perfectMatchResolverStage: FontStage;
693
+ export declare const embeddedSubsetStage: FontStage;
694
+ export declare const anyMatchResolverStage: FontStage;
695
+ export declare const defaultEmission: FontEmission;
696
+ /**
697
+ * Build a cascade from an ordered list of stages. The cascade walks the
698
+ * list and returns the first non-pass outcome. When every stage passes,
699
+ * the cascade returns `null` (emission decides what to do).
700
+ */
701
+ export declare function createFontCascade(stages: FontStage[]): FontCascade;
702
+ interface FontStrategyDef {
703
+ cascade: FontCascade;
704
+ emission: FontEmission;
705
+ }
706
+ export declare function createFontStrategy(def: FontStrategyDef): FontStrategy;
707
+ /**
708
+ * Default for `PDFParser.fromFile({ fontStrategy })`. Embedded subset
709
+ * bytes are used as an intermediate fallback to preserve the source's
710
+ * exact rendering when the family isn't in the asset library — vector
711
+ * outline extraction can produce incorrect bboxes or partial glyph
712
+ * coverage for some fonts, while the engine renders subset bytes
713
+ * faithfully.
714
+ *
715
+ * - Run's family resolves perfectly in the asset library →
716
+ * perfect-match typeface (HTTPS URI).
717
+ * - Embedded subset present (TextRun source) → register the bytes and
718
+ * emit as editable text with `setFont`. The resulting scene depends
719
+ * on the bytes living in a `buffer://` URI; this is the offline-safe
720
+ * path.
721
+ * - Otherwise → any-match resolver (possibly substituted via the
722
+ * fallbacks source; a per-import warning summarizes substitutions).
723
+ * - Neither hits → cascade returns null → emission falls through to a
724
+ * vector outline (TextOutline-source runs always have glyph paths;
725
+ * TextRun-source runs use the Phase-2 lazy closure built by the
726
+ * walker).
727
+ *
728
+ * For tools that explicitly want vectorization over subset bytes
729
+ * (e.g. brand-locked editors), use `assetLibraryStrategy`.
730
+ */
731
+ export declare const editableFirstStrategy: FontStrategy;
732
+ /**
733
+ * For print finalization, PDF-viewer-style integrations, and anywhere a
734
+ * silent Helvetica → Roboto substitution is unacceptable. Skips
735
+ * `anyMatchResolverStage`; non-embeddable runs without a perfect match
736
+ * fall through to vector outline.
737
+ */
738
+ export declare const exactFidelityStrategy: FontStrategy;
739
+ /**
740
+ * For brand-locked tools: every editable text must come from the asset
741
+ * library (or the resolver's alias fallbacks). Drops the subset stage so
742
+ * PDF-embedded subset bytes never leak through; runs whose family isn't
743
+ * in the library go straight to the any-match resolver.
744
+ */
745
+ export declare const assetLibraryStrategy: FontStrategy;
746
+ export declare const defaultStrategy: FontStrategy;
747
+ type PageBounds = "trim" | "media";
748
+ type PdfLogger = Logger<typeof WARNING_CODES>;
242
749
  export declare class PDFParser {
243
750
  private engine;
244
751
  private pdfBytes;
245
752
  private fontResolver;
753
+ private fontStrategy;
246
754
  private pageBounds;
247
755
  private logger;
756
+ private ir?;
248
757
  private constructor();
249
758
  /**
250
759
  * Create a PDFParser instance from a PDF file.
@@ -252,17 +761,24 @@ export declare class PDFParser {
252
761
  * @param engine - CE.SDK engine instance
253
762
  * @param file - PDF file as Blob, File, or ArrayBuffer
254
763
  * @param options - Optional configuration
255
- * @param options.fontResolver - Custom font resolver. Invoked per-font
256
- * for any text run whose font bytes are not embeddable (Type3 fonts,
257
- * restricted-embedding fonts). On hit the importer emits an editable
258
- * text block with the resolved typeface; on miss it falls back to a
259
- * vector-path outline with preserved text metadata
260
- * (`ly.img.pdf-importer.text.*`). Requires `addGfontsAssetLibrary`
261
- * to be called on the engine before parse.
764
+ * @param options.fontResolver - Custom font resolver. Invoked by the
765
+ * cascade's resolver-using stages (`perfectMatchResolverStage`,
766
+ * `anyMatchResolverStage`). Requires `addGfontsAssetLibrary` to be
767
+ * called on the engine before parse.
768
+ * @param options.fontStrategy - Strategy controlling how text runs
769
+ * become blocks. Default: `editableFirstStrategy` — prefers a perfect
770
+ * asset-library match, falls back to the PDF-embedded subset, then to
771
+ * resolver substitution (any-match), and finally to vector outline
772
+ * when nothing else hits. Other shipped presets:
773
+ * `exactFidelityStrategy` (never substitute), `assetLibraryStrategy`
774
+ * (drop subset). Build custom strategies with `createFontStrategy({
775
+ * cascade, emission })`. See `packages/pdf-importer/CLAUDE.md` →
776
+ * "Font resolution" for the mental model and offline-safe recipe.
262
777
  * @returns A new PDFParser instance
263
778
  */
264
779
  static fromFile(engine: CreativeEngine, file: Blob | File | ArrayBuffer, options?: {
265
780
  fontResolver?: TypefaceResolver;
781
+ fontStrategy?: FontStrategy;
266
782
  /**
267
783
  * Which PDF page box the emitted CE.SDK pages are sized to. Defaults
268
784
  * to `"trim"` (designed page + bleed margin, the end-user view).
@@ -285,16 +801,54 @@ export declare class PDFParser {
285
801
  * lines that share font/size and horizontal overlap merged into a single
286
802
  * multi-line block.
287
803
  *
804
+ * Mirrors `engine.scene.loadFromString`: replaces the engine's current
805
+ * scene with the imported PDF.
806
+ *
288
807
  * @returns Parse result containing the scene ID and logger
289
808
  */
290
809
  parse(): Promise<{
291
810
  scene: number;
292
811
  logger: PdfLogger;
293
812
  }>;
813
+ /**
814
+ * Parse the PDF and return the imported pages as detached CE.SDK blocks.
815
+ *
816
+ * Mirrors `engine.block.loadFromString`: returned blocks are NOT attached
817
+ * to a scene. Caller appends and positions them however they want.
818
+ *
819
+ * Each PDF page yields one returned `page` block, sized to the PDF's
820
+ * MediaBox in inches with the page's content as children. CE.SDK pages
821
+ * are the right wrapper here — `setWidth`/`setHeight` scales children
822
+ * proportionally (so callers can fit a PDF page into an arbitrary host
823
+ * frame), and pages clip to their bounds (matching IDML's frame
824
+ * semantics when content overflows).
825
+ *
826
+ * @param options
827
+ * @param options.pageIndex 0-based page to load. Omit to load all pages
828
+ * (returns one block per page, in document order).
829
+ * @returns Detached page block IDs (length 1 for single-page mode,
830
+ * `pdf.numPages` for all-pages mode), plus the logger so callers can
831
+ * forward parser warnings into their own diagnostic stream.
832
+ */
833
+ loadAsBlocks(options?: {
834
+ pageIndex?: number;
835
+ }): Promise<{
836
+ blocks: number[];
837
+ logger: PdfLogger;
838
+ }>;
839
+ /**
840
+ * Number of pages in the PDF document.
841
+ *
842
+ * Triggers a full IR extraction on first call (the same work `parse` /
843
+ * `loadAsBlocks` does), then caches it. Subsequent calls — including
844
+ * `parse` and `loadAsBlocks` themselves — reuse the cached IR.
845
+ */
846
+ getPageCount(): Promise<number>;
294
847
  /**
295
848
  * Get the logger for this parser instance.
296
849
  */
297
850
  getLogger(): PdfLogger;
851
+ private ensureIR;
298
852
  }
299
853
 
300
854
  export {};