@embedpdf/models 2.6.0 → 2.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/pdf.d.ts CHANGED
@@ -2174,6 +2174,56 @@ export interface PdfPageGeometry {
2174
2174
  */
2175
2175
  runs: PdfRun[];
2176
2176
  }
2177
+ /**
2178
+ * Font information extracted from a PDF text object.
2179
+ *
2180
+ * @public
2181
+ */
2182
+ export interface PdfFontInfo {
2183
+ /** PostScript name (e.g. "HOEPNL+Arial,Bold"). */
2184
+ name: string;
2185
+ /** Font family name (e.g. "Arial"). */
2186
+ familyName: string;
2187
+ /** Weight 100-900 (400 = normal, 700 = bold). */
2188
+ weight: number;
2189
+ /** Whether the font is italic. */
2190
+ italic: boolean;
2191
+ /** Whether the font is monospaced (fixed-pitch). */
2192
+ monospaced: boolean;
2193
+ /** Whether the font data is embedded in the PDF. */
2194
+ embedded: boolean;
2195
+ }
2196
+ /**
2197
+ * A rich text run: consecutive characters sharing the same text object,
2198
+ * font, size, and color.
2199
+ *
2200
+ * @public
2201
+ */
2202
+ export interface PdfTextRun {
2203
+ /** The text content (UTF-8). */
2204
+ text: string;
2205
+ /** Bounding box in PDF page coordinates (points). */
2206
+ rect: Rect;
2207
+ /** Font metadata (uniform within the run). */
2208
+ font: PdfFontInfo;
2209
+ /** Font size in points. */
2210
+ fontSize: number;
2211
+ /** Fill color (RGBA). */
2212
+ color: PdfAlphaColor;
2213
+ /** Start character index in the text page. */
2214
+ charIndex: number;
2215
+ /** Number of characters in this run. */
2216
+ charCount: number;
2217
+ }
2218
+ /**
2219
+ * Rich text runs for a single page.
2220
+ *
2221
+ * @public
2222
+ */
2223
+ export interface PdfPageTextRuns {
2224
+ /** Text runs ordered by reading order. */
2225
+ runs: PdfTextRun[];
2226
+ }
2177
2227
  /**
2178
2228
  * form field value
2179
2229
  * @public
@@ -2645,6 +2695,26 @@ export interface PdfEngine<T = Blob> {
2645
2695
  * @returns task contains the rendered image or error
2646
2696
  */
2647
2697
  renderPageRect: (doc: PdfDocumentObject, page: PdfPageObject, rect: Rect, options?: PdfRenderPageOptions) => PdfTask<T>;
2698
+ /**
2699
+ * Render the specified pdf page and return raw pixel data (ImageDataLike)
2700
+ * without encoding to the output format T. Useful for AI/ML pipelines
2701
+ * that need direct pixel access.
2702
+ * @param doc - pdf document
2703
+ * @param page - pdf page
2704
+ * @param options - render options (imageType/imageQuality are ignored)
2705
+ * @returns task contains raw ImageDataLike or error
2706
+ */
2707
+ renderPageRaw: (doc: PdfDocumentObject, page: PdfPageObject, options?: PdfRenderPageOptions) => PdfTask<ImageDataLike>;
2708
+ /**
2709
+ * Render the specified rect of a pdf page and return raw pixel data
2710
+ * (ImageDataLike) without encoding to the output format T.
2711
+ * @param doc - pdf document
2712
+ * @param page - pdf page
2713
+ * @param rect - target rect in PDF coordinate space
2714
+ * @param options - render options (imageType/imageQuality are ignored)
2715
+ * @returns task contains raw ImageDataLike or error
2716
+ */
2717
+ renderPageRectRaw: (doc: PdfDocumentObject, page: PdfPageObject, rect: Rect, options?: PdfRenderPageOptions) => PdfTask<ImageDataLike>;
2648
2718
  /**
2649
2719
  * Render the thumbnail of specified pdf page
2650
2720
  * @param doc - pdf document
@@ -2829,6 +2899,13 @@ export interface PdfEngine<T = Blob> {
2829
2899
  * @returns task contains the geometry
2830
2900
  */
2831
2901
  getPageGeometry: (doc: PdfDocumentObject, page: PdfPageObject) => PdfTask<PdfPageGeometry>;
2902
+ /**
2903
+ * Get rich text runs for a page, grouped by text object with font and color info
2904
+ * @param doc - pdf document
2905
+ * @param page - pdf page
2906
+ * @returns task contains the text runs
2907
+ */
2908
+ getPageTextRuns: (doc: PdfDocumentObject, page: PdfPageObject) => PdfTask<PdfPageTextRuns>;
2832
2909
  /**
2833
2910
  * Merge multiple pdf documents
2834
2911
  * @param files - all the pdf files
@@ -2974,6 +3051,7 @@ export interface IPdfiumExecutor {
2974
3051
  getTextSlices(doc: PdfDocumentObject, slices: PageTextSlice[]): PdfTask<string[]>;
2975
3052
  getPageGlyphs(doc: PdfDocumentObject, page: PdfPageObject): PdfTask<PdfGlyphObject[]>;
2976
3053
  getPageGeometry(doc: PdfDocumentObject, page: PdfPageObject): PdfTask<PdfPageGeometry>;
3054
+ getPageTextRuns(doc: PdfDocumentObject, page: PdfPageObject): PdfTask<PdfPageTextRuns>;
2977
3055
  merge(files: PdfFile[]): PdfTask<PdfFile>;
2978
3056
  mergePages(mergeConfigs: Array<{
2979
3057
  docId: string;
@@ -0,0 +1,69 @@
1
+ import { Task } from './task';
2
+ /**
3
+ * Utility for composing sequential Task operations within an async function,
4
+ * while preserving abort propagation and optional progress forwarding.
5
+ *
6
+ * Bridges the gap between the Task "push" model (callbacks, abort, progress)
7
+ * and the async/await "pull" model, without losing Task benefits.
8
+ *
9
+ * @example
10
+ * ```ts
11
+ * function doWork(): Task<Result, MyError, MyProgress> {
12
+ * const task = new Task<Result, MyError, MyProgress>();
13
+ * const seq = new TaskSequence(task);
14
+ *
15
+ * seq.execute(
16
+ * async () => {
17
+ * const data = await seq.run(() => fetchDataAsTask());
18
+ * const result = await seq.runWithProgress(
19
+ * () => processAsTask(data),
20
+ * (childProgress) => ({ stage: 'processing', ...childProgress }),
21
+ * );
22
+ * task.resolve(result);
23
+ * },
24
+ * (err) => ({ type: 'failed', message: String(err) }),
25
+ * );
26
+ *
27
+ * return task;
28
+ * }
29
+ * ```
30
+ *
31
+ * @public
32
+ */
33
+ export declare class TaskSequence<TError, TProgress> {
34
+ private parentTask;
35
+ private activeChild;
36
+ private disposed;
37
+ constructor(parentTask: Task<any, TError, TProgress>);
38
+ /**
39
+ * Execute a child Task and return its result as a Promise.
40
+ *
41
+ * If the parent task has been aborted, throws `TaskAbortedError` immediately.
42
+ * If the parent task is aborted while the child is running, the child is aborted too.
43
+ */
44
+ run<R>(factory: () => Task<R, any, any>): Promise<R>;
45
+ /**
46
+ * Execute a child Task and return its result as a Promise,
47
+ * forwarding the child's progress events to the parent task
48
+ * through the provided mapper function.
49
+ *
50
+ * If the parent task has been aborted, throws `TaskAbortedError` immediately.
51
+ * If the parent task is aborted while the child is running, the child is aborted too.
52
+ */
53
+ runWithProgress<R, CP>(factory: () => Task<R, any, CP>, mapProgress: (childProgress: CP) => TProgress): Promise<R>;
54
+ /**
55
+ * Execute an async function body that uses `run()` / `runWithProgress()`,
56
+ * automatically handling abort and error routing to the parent task.
57
+ *
58
+ * - If the body throws `TaskAbortedError`, it is silently ignored
59
+ * (the parent task was already aborted via the abort override).
60
+ * - If the body throws `TaskRejectedError` (from a child task rejection
61
+ * via `run()` / `runWithProgress()`), its `.reason` is forwarded directly
62
+ * to the parent task, bypassing `mapError`.
63
+ * - Any other thrown error is mapped through `mapError` and used to
64
+ * reject the parent task. This handles unexpected runtime exceptions
65
+ * in the async body itself.
66
+ * - On success, the body is responsible for calling `parentTask.resolve()`.
67
+ */
68
+ execute(fn: () => Promise<void>, mapError: (err: unknown) => TError): void;
69
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@embedpdf/models",
3
- "version": "2.6.0",
3
+ "version": "2.6.1",
4
4
  "private": false,
5
5
  "description": "Shared type definitions, data models, and utility helpers (geometry, tasks, logging, PDF primitives) that underpin every package in the EmbedPDF ecosystem.",
6
6
  "type": "module",