@embedpdf/models 2.6.0 → 2.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/pdf.d.ts CHANGED
@@ -2090,19 +2090,34 @@ export interface SearchAllPagesResult {
2090
2090
  */
2091
2091
  export interface PdfGlyphObject {
2092
2092
  /**
2093
- * Origin of the glyph
2093
+ * Origin of the glyph (loose bounds from FPDFText_GetLooseCharBox)
2094
2094
  */
2095
2095
  origin: {
2096
2096
  x: number;
2097
2097
  y: number;
2098
2098
  };
2099
2099
  /**
2100
- * Size of the glyph
2100
+ * Size of the glyph (loose bounds from FPDFText_GetLooseCharBox)
2101
2101
  */
2102
2102
  size: {
2103
2103
  width: number;
2104
2104
  height: number;
2105
2105
  };
2106
+ /**
2107
+ * Tight bounds origin (from FPDFText_GetCharBox, closely surrounds the actual glyph shape).
2108
+ * Used for hit-testing to match Chrome's FPDFText_GetCharIndexAtPos behaviour.
2109
+ */
2110
+ tightOrigin?: {
2111
+ x: number;
2112
+ y: number;
2113
+ };
2114
+ /**
2115
+ * Tight bounds size (from FPDFText_GetCharBox)
2116
+ */
2117
+ tightSize?: {
2118
+ width: number;
2119
+ height: number;
2120
+ };
2106
2121
  /**
2107
2122
  * Whether the glyph is a space
2108
2123
  */
@@ -2119,25 +2134,42 @@ export interface PdfGlyphObject {
2119
2134
  */
2120
2135
  export interface PdfGlyphSlim {
2121
2136
  /**
2122
- * X coordinate of the glyph
2137
+ * X coordinate of the glyph (loose bounds from FPDFText_GetLooseCharBox)
2123
2138
  */
2124
2139
  x: number;
2125
2140
  /**
2126
- * Y coordinate of the glyph
2141
+ * Y coordinate of the glyph (loose bounds from FPDFText_GetLooseCharBox)
2127
2142
  */
2128
2143
  y: number;
2129
2144
  /**
2130
- * Width of the glyph
2145
+ * Width of the glyph (loose bounds from FPDFText_GetLooseCharBox)
2131
2146
  */
2132
2147
  width: number;
2133
2148
  /**
2134
- * Height of the glyph
2149
+ * Height of the glyph (loose bounds from FPDFText_GetLooseCharBox)
2135
2150
  */
2136
2151
  height: number;
2137
2152
  /**
2138
2153
  * Flags of the glyph
2139
2154
  */
2140
2155
  flags: number;
2156
+ /**
2157
+ * Tight X coordinate (from FPDFText_GetCharBox).
2158
+ * Used for hit-testing to match Chrome's FPDFText_GetCharIndexAtPos behaviour.
2159
+ */
2160
+ tightX?: number;
2161
+ /**
2162
+ * Tight Y coordinate (from FPDFText_GetCharBox)
2163
+ */
2164
+ tightY?: number;
2165
+ /**
2166
+ * Tight width (from FPDFText_GetCharBox)
2167
+ */
2168
+ tightWidth?: number;
2169
+ /**
2170
+ * Tight height (from FPDFText_GetCharBox)
2171
+ */
2172
+ tightHeight?: number;
2141
2173
  }
2142
2174
  /**
2143
2175
  * Run object
@@ -2162,6 +2194,10 @@ export interface PdfRun {
2162
2194
  * Glyphs of the run
2163
2195
  */
2164
2196
  glyphs: PdfGlyphSlim[];
2197
+ /**
2198
+ * Font size of the run (all glyphs in a run share the same font size)
2199
+ */
2200
+ fontSize?: number;
2165
2201
  }
2166
2202
  /**
2167
2203
  * Page geometry
@@ -2174,6 +2210,56 @@ export interface PdfPageGeometry {
2174
2210
  */
2175
2211
  runs: PdfRun[];
2176
2212
  }
2213
+ /**
2214
+ * Font information extracted from a PDF text object.
2215
+ *
2216
+ * @public
2217
+ */
2218
+ export interface PdfFontInfo {
2219
+ /** PostScript name (e.g. "HOEPNL+Arial,Bold"). */
2220
+ name: string;
2221
+ /** Font family name (e.g. "Arial"). */
2222
+ familyName: string;
2223
+ /** Weight 100-900 (400 = normal, 700 = bold). */
2224
+ weight: number;
2225
+ /** Whether the font is italic. */
2226
+ italic: boolean;
2227
+ /** Whether the font is monospaced (fixed-pitch). */
2228
+ monospaced: boolean;
2229
+ /** Whether the font data is embedded in the PDF. */
2230
+ embedded: boolean;
2231
+ }
2232
+ /**
2233
+ * A rich text run: consecutive characters sharing the same text object,
2234
+ * font, size, and color.
2235
+ *
2236
+ * @public
2237
+ */
2238
+ export interface PdfTextRun {
2239
+ /** The text content (UTF-8). */
2240
+ text: string;
2241
+ /** Bounding box in PDF page coordinates (points). */
2242
+ rect: Rect;
2243
+ /** Font metadata (uniform within the run). */
2244
+ font: PdfFontInfo;
2245
+ /** Font size in points. */
2246
+ fontSize: number;
2247
+ /** Fill color (RGBA). */
2248
+ color: PdfAlphaColor;
2249
+ /** Start character index in the text page. */
2250
+ charIndex: number;
2251
+ /** Number of characters in this run. */
2252
+ charCount: number;
2253
+ }
2254
+ /**
2255
+ * Rich text runs for a single page.
2256
+ *
2257
+ * @public
2258
+ */
2259
+ export interface PdfPageTextRuns {
2260
+ /** Text runs ordered by reading order. */
2261
+ runs: PdfTextRun[];
2262
+ }
2177
2263
  /**
2178
2264
  * form field value
2179
2265
  * @public
@@ -2645,6 +2731,26 @@ export interface PdfEngine<T = Blob> {
2645
2731
  * @returns task contains the rendered image or error
2646
2732
  */
2647
2733
  renderPageRect: (doc: PdfDocumentObject, page: PdfPageObject, rect: Rect, options?: PdfRenderPageOptions) => PdfTask<T>;
2734
+ /**
2735
+ * Render the specified pdf page and return raw pixel data (ImageDataLike)
2736
+ * without encoding to the output format T. Useful for AI/ML pipelines
2737
+ * that need direct pixel access.
2738
+ * @param doc - pdf document
2739
+ * @param page - pdf page
2740
+ * @param options - render options (imageType/imageQuality are ignored)
2741
+ * @returns task contains raw ImageDataLike or error
2742
+ */
2743
+ renderPageRaw: (doc: PdfDocumentObject, page: PdfPageObject, options?: PdfRenderPageOptions) => PdfTask<ImageDataLike>;
2744
+ /**
2745
+ * Render the specified rect of a pdf page and return raw pixel data
2746
+ * (ImageDataLike) without encoding to the output format T.
2747
+ * @param doc - pdf document
2748
+ * @param page - pdf page
2749
+ * @param rect - target rect in PDF coordinate space
2750
+ * @param options - render options (imageType/imageQuality are ignored)
2751
+ * @returns task contains raw ImageDataLike or error
2752
+ */
2753
+ renderPageRectRaw: (doc: PdfDocumentObject, page: PdfPageObject, rect: Rect, options?: PdfRenderPageOptions) => PdfTask<ImageDataLike>;
2648
2754
  /**
2649
2755
  * Render the thumbnail of specified pdf page
2650
2756
  * @param doc - pdf document
@@ -2829,6 +2935,13 @@ export interface PdfEngine<T = Blob> {
2829
2935
  * @returns task contains the geometry
2830
2936
  */
2831
2937
  getPageGeometry: (doc: PdfDocumentObject, page: PdfPageObject) => PdfTask<PdfPageGeometry>;
2938
+ /**
2939
+ * Get rich text runs for a page, grouped by text object with font and color info
2940
+ * @param doc - pdf document
2941
+ * @param page - pdf page
2942
+ * @returns task contains the text runs
2943
+ */
2944
+ getPageTextRuns: (doc: PdfDocumentObject, page: PdfPageObject) => PdfTask<PdfPageTextRuns>;
2832
2945
  /**
2833
2946
  * Merge multiple pdf documents
2834
2947
  * @param files - all the pdf files
@@ -2974,6 +3087,7 @@ export interface IPdfiumExecutor {
2974
3087
  getTextSlices(doc: PdfDocumentObject, slices: PageTextSlice[]): PdfTask<string[]>;
2975
3088
  getPageGlyphs(doc: PdfDocumentObject, page: PdfPageObject): PdfTask<PdfGlyphObject[]>;
2976
3089
  getPageGeometry(doc: PdfDocumentObject, page: PdfPageObject): PdfTask<PdfPageGeometry>;
3090
+ getPageTextRuns(doc: PdfDocumentObject, page: PdfPageObject): PdfTask<PdfPageTextRuns>;
2977
3091
  merge(files: PdfFile[]): PdfTask<PdfFile>;
2978
3092
  mergePages(mergeConfigs: Array<{
2979
3093
  docId: string;
@@ -0,0 +1,69 @@
1
+ import { Task } from './task';
2
+ /**
3
+ * Utility for composing sequential Task operations within an async function,
4
+ * while preserving abort propagation and optional progress forwarding.
5
+ *
6
+ * Bridges the gap between the Task "push" model (callbacks, abort, progress)
7
+ * and the async/await "pull" model, without losing Task benefits.
8
+ *
9
+ * @example
10
+ * ```ts
11
+ * function doWork(): Task<Result, MyError, MyProgress> {
12
+ * const task = new Task<Result, MyError, MyProgress>();
13
+ * const seq = new TaskSequence(task);
14
+ *
15
+ * seq.execute(
16
+ * async () => {
17
+ * const data = await seq.run(() => fetchDataAsTask());
18
+ * const result = await seq.runWithProgress(
19
+ * () => processAsTask(data),
20
+ * (childProgress) => ({ stage: 'processing', ...childProgress }),
21
+ * );
22
+ * task.resolve(result);
23
+ * },
24
+ * (err) => ({ type: 'failed', message: String(err) }),
25
+ * );
26
+ *
27
+ * return task;
28
+ * }
29
+ * ```
30
+ *
31
+ * @public
32
+ */
33
+ export declare class TaskSequence<TError, TProgress> {
34
+ private parentTask;
35
+ private activeChild;
36
+ private disposed;
37
+ constructor(parentTask: Task<any, TError, TProgress>);
38
+ /**
39
+ * Execute a child Task and return its result as a Promise.
40
+ *
41
+ * If the parent task has been aborted, throws `TaskAbortedError` immediately.
42
+ * If the parent task is aborted while the child is running, the child is aborted too.
43
+ */
44
+ run<R>(factory: () => Task<R, any, any>): Promise<R>;
45
+ /**
46
+ * Execute a child Task and return its result as a Promise,
47
+ * forwarding the child's progress events to the parent task
48
+ * through the provided mapper function.
49
+ *
50
+ * If the parent task has been aborted, throws `TaskAbortedError` immediately.
51
+ * If the parent task is aborted while the child is running, the child is aborted too.
52
+ */
53
+ runWithProgress<R, CP>(factory: () => Task<R, any, CP>, mapProgress: (childProgress: CP) => TProgress): Promise<R>;
54
+ /**
55
+ * Execute an async function body that uses `run()` / `runWithProgress()`,
56
+ * automatically handling abort and error routing to the parent task.
57
+ *
58
+ * - If the body throws `TaskAbortedError`, it is silently ignored
59
+ * (the parent task was already aborted via the abort override).
60
+ * - If the body throws `TaskRejectedError` (from a child task rejection
61
+ * via `run()` / `runWithProgress()`), its `.reason` is forwarded directly
62
+ * to the parent task, bypassing `mapError`.
63
+ * - Any other thrown error is mapped through `mapError` and used to
64
+ * reject the parent task. This handles unexpected runtime exceptions
65
+ * in the async body itself.
66
+ * - On success, the body is responsible for calling `parentTask.resolve()`.
67
+ */
68
+ execute(fn: () => Promise<void>, mapError: (err: unknown) => TError): void;
69
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@embedpdf/models",
3
- "version": "2.6.0",
3
+ "version": "2.6.2",
4
4
  "private": false,
5
5
  "description": "Shared type definitions, data models, and utility helpers (geometry, tasks, logging, PDF primitives) that underpin every package in the EmbedPDF ecosystem.",
6
6
  "type": "module",