@cj-tech-master/excelts 8.1.2 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +2 -2
  2. package/README_zh.md +2 -2
  3. package/dist/browser/modules/excel/cell.js +11 -7
  4. package/dist/browser/modules/excel/column.js +7 -6
  5. package/dist/browser/modules/excel/row.js +5 -1
  6. package/dist/browser/modules/excel/stream/worksheet-reader.js +3 -2
  7. package/dist/browser/modules/excel/utils/cell-format.js +64 -2
  8. package/dist/browser/modules/pdf/excel-bridge.d.ts +4 -3
  9. package/dist/browser/modules/pdf/excel-bridge.js +18 -5
  10. package/dist/browser/modules/pdf/index.d.ts +3 -3
  11. package/dist/browser/modules/pdf/index.js +3 -3
  12. package/dist/browser/modules/pdf/pdf.d.ts +7 -6
  13. package/dist/browser/modules/pdf/pdf.js +7 -6
  14. package/dist/browser/modules/pdf/reader/pdf-reader.d.ts +8 -7
  15. package/dist/browser/modules/pdf/reader/pdf-reader.js +81 -74
  16. package/dist/browser/modules/pdf/render/constants.d.ts +30 -0
  17. package/dist/browser/modules/pdf/render/constants.js +30 -0
  18. package/dist/browser/modules/pdf/render/layout-engine.d.ts +2 -1
  19. package/dist/browser/modules/pdf/render/layout-engine.js +359 -156
  20. package/dist/browser/modules/pdf/render/page-renderer.d.ts +2 -2
  21. package/dist/browser/modules/pdf/render/page-renderer.js +245 -107
  22. package/dist/browser/modules/pdf/render/pdf-exporter.d.ts +3 -2
  23. package/dist/browser/modules/pdf/render/pdf-exporter.js +145 -105
  24. package/dist/browser/modules/pdf/render/style-converter.js +27 -26
  25. package/dist/browser/modules/pdf/types.d.ts +8 -0
  26. package/dist/browser/utils/utils.base.d.ts +5 -0
  27. package/dist/browser/utils/utils.base.js +10 -0
  28. package/dist/cjs/modules/excel/cell.js +11 -7
  29. package/dist/cjs/modules/excel/column.js +7 -6
  30. package/dist/cjs/modules/excel/row.js +5 -1
  31. package/dist/cjs/modules/excel/stream/worksheet-reader.js +3 -2
  32. package/dist/cjs/modules/excel/utils/cell-format.js +64 -2
  33. package/dist/cjs/modules/pdf/excel-bridge.js +18 -5
  34. package/dist/cjs/modules/pdf/index.js +3 -3
  35. package/dist/cjs/modules/pdf/pdf.js +7 -6
  36. package/dist/cjs/modules/pdf/reader/pdf-reader.js +81 -74
  37. package/dist/cjs/modules/pdf/render/constants.js +33 -0
  38. package/dist/cjs/modules/pdf/render/layout-engine.js +359 -156
  39. package/dist/cjs/modules/pdf/render/page-renderer.js +245 -107
  40. package/dist/cjs/modules/pdf/render/pdf-exporter.js +145 -105
  41. package/dist/cjs/modules/pdf/render/style-converter.js +27 -26
  42. package/dist/cjs/utils/utils.base.js +11 -0
  43. package/dist/esm/modules/excel/cell.js +11 -7
  44. package/dist/esm/modules/excel/column.js +7 -6
  45. package/dist/esm/modules/excel/row.js +5 -1
  46. package/dist/esm/modules/excel/stream/worksheet-reader.js +3 -2
  47. package/dist/esm/modules/excel/utils/cell-format.js +64 -2
  48. package/dist/esm/modules/pdf/excel-bridge.js +18 -5
  49. package/dist/esm/modules/pdf/index.js +3 -3
  50. package/dist/esm/modules/pdf/pdf.js +7 -6
  51. package/dist/esm/modules/pdf/reader/pdf-reader.js +81 -74
  52. package/dist/esm/modules/pdf/render/constants.js +30 -0
  53. package/dist/esm/modules/pdf/render/layout-engine.js +359 -156
  54. package/dist/esm/modules/pdf/render/page-renderer.js +245 -107
  55. package/dist/esm/modules/pdf/render/pdf-exporter.js +145 -105
  56. package/dist/esm/modules/pdf/render/style-converter.js +27 -26
  57. package/dist/esm/utils/utils.base.js +10 -0
  58. package/dist/iife/excelts.iife.js +1022 -677
  59. package/dist/iife/excelts.iife.js.map +1 -1
  60. package/dist/iife/excelts.iife.min.js +48 -48
  61. package/dist/types/modules/pdf/excel-bridge.d.ts +4 -3
  62. package/dist/types/modules/pdf/index.d.ts +3 -3
  63. package/dist/types/modules/pdf/pdf.d.ts +7 -6
  64. package/dist/types/modules/pdf/reader/pdf-reader.d.ts +8 -7
  65. package/dist/types/modules/pdf/render/constants.d.ts +30 -0
  66. package/dist/types/modules/pdf/render/layout-engine.d.ts +2 -1
  67. package/dist/types/modules/pdf/render/page-renderer.d.ts +2 -2
  68. package/dist/types/modules/pdf/render/pdf-exporter.d.ts +3 -2
  69. package/dist/types/modules/pdf/types.d.ts +8 -0
  70. package/dist/types/utils/utils.base.d.ts +5 -0
  71. package/package.json +1 -1
@@ -522,6 +522,35 @@ function formatNumberPattern(val, fmt) {
522
522
  const decimalPlaces = decFmt.replace(/[^0#?]/g, "").length;
523
523
  // Round the value
524
524
  const roundedVal = roundTo(scaledVal, decimalPlaces);
525
+ // When value is zero and the format has no required '0' digit placeholders,
526
+ // '?' placeholders become spaces and '#' placeholders produce nothing.
527
+ // This handles accounting format zero sections like "-"?? → "- " (dash + spaces).
528
+ if (roundedVal === 0 && !intFmt.includes("0") && !decFmt.includes("0")) {
529
+ let result = "";
530
+ for (const ch of intFmt) {
531
+ if (ch === "?") {
532
+ result += " ";
533
+ }
534
+ else if (ch !== "#" && ch !== ",") {
535
+ // Preserve literal characters (already unquoted at this point)
536
+ result += ch;
537
+ }
538
+ }
539
+ if (decimalPlaces > 0) {
540
+ // Only emit the decimal point if the decimal format has '?' or '0' placeholders.
541
+ // Pure '#' decimal digits produce nothing for zero values.
542
+ const hasDecContent = /[0?]/.test(decFmt);
543
+ if (hasDecContent) {
544
+ result += ".";
545
+ for (const ch of decFmt) {
546
+ if (ch === "?") {
547
+ result += " ";
548
+ }
549
+ }
550
+ }
551
+ }
552
+ return sign + result;
553
+ }
525
554
  // Split into integer and decimal parts
526
555
  const [intPart, decPart = ""] = roundedVal.toString().split(".");
527
556
  // Check if format has literal characters mixed with digit placeholders (like "0-0", "000-0000")
@@ -561,16 +590,49 @@ function formatNumberPattern(val, fmt) {
561
590
  if (intFmt.includes(",")) {
562
591
  formattedInt = commaify(intPart);
563
592
  }
564
- // Pad integer with leading zeros if needed
593
+ // Pad integer with leading zeros/spaces if needed
594
+ // '0' placeholder → pad with "0", '?' placeholder → pad with " "
565
595
  const minIntDigits = (intFmt.match(/0/g) ?? []).length;
596
+ const totalIntSlots = (intFmt.match(/[0?]/g) ?? []).length;
566
597
  if (formattedInt.length < minIntDigits) {
567
598
  formattedInt = "0".repeat(minIntDigits - formattedInt.length) + formattedInt;
568
599
  }
600
+ if (formattedInt.length < totalIntSlots) {
601
+ formattedInt = " ".repeat(totalIntSlots - formattedInt.length) + formattedInt;
602
+ }
603
+ // '#' integer placeholder: suppress "0" when there are no required '0' or '?' digits
604
+ // and the integer value is zero (e.g. "#" format with value 0 → empty)
605
+ if (formattedInt === "0" && minIntDigits === 0 && totalIntSlots === 0) {
606
+ formattedInt = "";
607
+ }
569
608
  }
570
609
  // Format decimal part
571
610
  let formattedDec = "";
572
611
  if (decimalPlaces > 0) {
573
- formattedDec = "." + (decPart + "0".repeat(decimalPlaces)).substring(0, decimalPlaces);
612
+ const rawDec = (decPart + "0".repeat(decimalPlaces)).substring(0, decimalPlaces);
613
+ // Process each decimal digit position according to its placeholder:
614
+ // '0' → always show digit, '?' → show digit or space, '#' → show digit or nothing (trim trailing)
615
+ const decChars = rawDec.split("");
616
+ // Walk from the end: '#' trailing zeros are removed, '?' trailing zeros become spaces
617
+ for (let i = decFmt.length - 1; i >= 0; i--) {
618
+ if (i >= decChars.length) {
619
+ continue;
620
+ }
621
+ if (decFmt[i] === "#" && decChars[i] === "0") {
622
+ decChars[i] = "";
623
+ }
624
+ else if (decFmt[i] === "?" && decChars[i] === "0") {
625
+ decChars[i] = " ";
626
+ }
627
+ else {
628
+ break; // stop at first non-zero or '0' placeholder
629
+ }
630
+ }
631
+ const decStr = decChars.join("");
632
+ // Only emit decimal point if there is content after it
633
+ if (decStr.length > 0) {
634
+ formattedDec = "." + decStr;
635
+ }
574
636
  }
575
637
  return sign + formattedInt + formattedDec;
576
638
  }
@@ -12,7 +12,7 @@
12
12
  *
13
13
  * const workbook = new Workbook();
14
14
  * // ... build workbook ...
15
- * const pdf = excelToPdf(workbook);
15
+ * const pdf = await excelToPdf(workbook);
16
16
  * ```
17
17
  */
18
18
  Object.defineProperty(exports, "__esModule", { value: true });
@@ -30,12 +30,13 @@ const types_1 = require("./types");
30
30
  *
31
31
  * This is a convenience function that converts the Workbook to the PDF module's
32
32
  * data model and then generates the PDF.
33
+ * Yields to the event loop between each output page during layout and rendering.
33
34
  *
34
35
  * @param workbook - An Excel Workbook instance
35
36
  * @param options - PDF export options
36
- * @returns PDF file as a Uint8Array
37
+ * @returns Promise of PDF file as a Uint8Array
37
38
  */
38
- function excelToPdf(workbook, options) {
39
+ async function excelToPdf(workbook, options) {
39
40
  const pdfWorkbook = excelWorkbookToPdf(workbook);
40
41
  return (0, pdf_exporter_1.exportPdf)(pdfWorkbook, options);
41
42
  }
@@ -84,12 +85,24 @@ function convertSheet(ws, workbook) {
84
85
  continue;
85
86
  }
86
87
  const cells = new Map();
87
- row.eachCell({ includeEmpty: false }, cell => {
88
- cells.set(cell.col, convertCell(cell));
88
+ row.eachCell({ includeEmpty: true }, cell => {
89
+ const hasValue = cell.type !== enums_1.ValueType.Null && cell.type !== enums_1.ValueType.Merge;
90
+ const hasStyle = cell.style &&
91
+ ((cell.style.border &&
92
+ (cell.style.border.top ||
93
+ cell.style.border.right ||
94
+ cell.style.border.bottom ||
95
+ cell.style.border.left)) ||
96
+ cell.style.fill ||
97
+ cell.style.font);
98
+ if (hasValue || hasStyle) {
99
+ cells.set(cell.col, convertCell(cell));
100
+ }
89
101
  });
90
102
  rows.set(r, {
91
103
  hidden: row.hidden || undefined,
92
104
  height: row.height ?? undefined,
105
+ customHeight: row.customHeight || undefined,
93
106
  cells
94
107
  });
95
108
  }
@@ -8,7 +8,7 @@
8
8
  * ```typescript
9
9
  * import { pdf } from "excelts/pdf";
10
10
  *
11
- * const bytes = pdf([
11
+ * const bytes = await pdf([
12
12
  * ["Product", "Revenue"],
13
13
  * ["Widget", 1000],
14
14
  * ["Gadget", 2500]
@@ -23,14 +23,14 @@
23
23
  * const workbook = new Workbook();
24
24
  * const sheet = workbook.addWorksheet("Sales");
25
25
  * sheet.addRow(["Product", "Revenue"]);
26
- * const bytes = excelToPdf(workbook);
26
+ * const bytes = await excelToPdf(workbook);
27
27
  * ```
28
28
  *
29
29
  * @example Read PDF — extract text, images, and metadata:
30
30
  * ```typescript
31
31
  * import { readPdf } from "excelts/pdf";
32
32
  *
33
- * const result = readPdf(pdfBytes);
33
+ * const result = await readPdf(pdfBytes);
34
34
  * console.log(result.text); // All text
35
35
  * console.log(result.pages[0].text); // Page 1 text
36
36
  * console.log(result.pages[0].images); // Page 1 images
@@ -9,7 +9,7 @@
9
9
  * ```typescript
10
10
  * import { pdf } from "@cj-tech-master/excelts/pdf";
11
11
  *
12
- * const bytes = pdf([
12
+ * const bytes = await pdf([
13
13
  * ["Product", "Revenue"],
14
14
  * ["Widget", 1000],
15
15
  * ["Gadget", 2500]
@@ -18,7 +18,7 @@
18
18
  *
19
19
  * @example With options:
20
20
  * ```typescript
21
- * const bytes = pdf([
21
+ * const bytes = await pdf([
22
22
  * ["Name", "Score"],
23
23
  * ["Alice", 95],
24
24
  * ["Bob", 87]
@@ -27,7 +27,7 @@
27
27
  *
28
28
  * @example Multiple sheets:
29
29
  * ```typescript
30
- * const bytes = pdf({
30
+ * const bytes = await pdf({
31
31
  * sheets: [
32
32
  * { name: "Sales", data: [["Product", "Revenue"], ["Widget", 1000]] },
33
33
  * { name: "Costs", data: [["Item", "Amount"], ["Rent", 500]] }
@@ -37,7 +37,7 @@
37
37
  *
38
38
  * @example With column widths and styles:
39
39
  * ```typescript
40
- * const bytes = pdf({
40
+ * const bytes = await pdf({
41
41
  * name: "Report",
42
42
  * columns: [{ width: 25 }, { width: 15 }],
43
43
  * data: [
@@ -58,12 +58,13 @@ const pdf_exporter_1 = require("./render/pdf-exporter");
58
58
  * Generate a PDF.
59
59
  *
60
60
  * Accepts anything from a plain 2D array to a multi-sheet workbook.
61
+ * Yields to the event loop between each output page during layout and rendering.
61
62
  *
62
63
  * @param input - 2D array, sheet object, or workbook object
63
64
  * @param options - PDF export options (page size, margins, etc.)
64
- * @returns PDF file as Uint8Array
65
+ * @returns Promise of PDF file as Uint8Array
65
66
  */
66
- function pdf(input, options) {
67
+ async function pdf(input, options) {
67
68
  const workbook = normalizeInput(input);
68
69
  return (0, pdf_exporter_1.exportPdf)(workbook, options);
69
70
  }
@@ -17,18 +17,18 @@
17
17
  * - Cross-reference tables and streams (PDF 1.5+)
18
18
  * - Incremental updates and xref recovery
19
19
  *
20
- * @example Basic text extraction:
20
+ * @example Text extraction:
21
21
  * ```typescript
22
22
  * import { readPdf } from "excelts/pdf";
23
23
  *
24
- * const pdf = readPdf(pdfBytes);
24
+ * const pdf = await readPdf(pdfBytes);
25
25
  * console.log(pdf.text); // All text from all pages
26
26
  * console.log(pdf.pages[0].text); // Text from page 1
27
27
  * ```
28
28
  *
29
29
  * @example Image extraction:
30
30
  * ```typescript
31
- * const pdf = readPdf(pdfBytes);
31
+ * const pdf = await readPdf(pdfBytes);
32
32
  * for (const image of pdf.pages[0].images) {
33
33
  * console.log(image.format, image.width, image.height);
34
34
  * fs.writeFileSync(`image.${image.format}`, image.data);
@@ -37,7 +37,7 @@
37
37
  *
38
38
  * @example Metadata:
39
39
  * ```typescript
40
- * const pdf = readPdf(pdfBytes);
40
+ * const pdf = await readPdf(pdfBytes);
41
41
  * console.log(pdf.metadata.title);
42
42
  * console.log(pdf.metadata.author);
43
43
  * console.log(pdf.metadata.pageCount);
@@ -45,7 +45,7 @@
45
45
  *
46
46
  * @example Encrypted PDF:
47
47
  * ```typescript
48
- * const pdf = readPdf(pdfBytes, { password: "secret" });
48
+ * const pdf = await readPdf(pdfBytes, { password: "secret" });
49
49
  * ```
50
50
  */
51
51
  Object.defineProperty(exports, "__esModule", { value: true });
@@ -59,19 +59,36 @@ const annotation_extractor_1 = require("./annotation-extractor");
59
59
  const form_extractor_1 = require("./form-extractor");
60
60
  const metadata_reader_1 = require("./metadata-reader");
61
61
  const errors_1 = require("../errors");
62
+ const utils_base_1 = require("../../../utils/utils.base.js");
62
63
  // =============================================================================
63
64
  // Public API
64
65
  // =============================================================================
65
66
  /**
66
67
  * Read a PDF file and extract text, images, and metadata.
68
+ * Yields to the event loop between pages to avoid blocking.
67
69
  *
68
70
  * @param data - Raw PDF file bytes
69
71
  * @param options - Extraction options
70
- * @returns Extracted content
72
+ * @returns Promise of extracted content
71
73
  * @throws {PdfStructureError} If the PDF structure is invalid
72
74
  * @throws {PdfError} If decryption fails (wrong password)
73
75
  */
74
- function readPdf(data, options) {
76
+ async function readPdf(data, options) {
77
+ const { doc, opts, metadata, pagesInfo, pageIndicesToProcess } = prepareRead(data, options);
78
+ const pages = [];
79
+ for (let i = 0; i < pageIndicesToProcess.length; i++) {
80
+ const pageIdx = pageIndicesToProcess[i];
81
+ pages.push(processPage(pagesInfo[pageIdx].dict, pageIdx, doc, opts));
82
+ if (i < pageIndicesToProcess.length - 1) {
83
+ await (0, utils_base_1.yieldToEventLoop)();
84
+ }
85
+ }
86
+ return finalizeRead(pages, pagesInfo.length, metadata, opts, doc);
87
+ }
88
+ /**
89
+ * Shared setup: parse document, handle encryption, extract metadata, resolve pages.
90
+ */
91
+ function prepareRead(data, options) {
75
92
  const opts = {
76
93
  password: options?.password ?? "",
77
94
  pages: options?.pages,
@@ -81,86 +98,81 @@ function readPdf(data, options) {
81
98
  extractAnnotations: options?.extractAnnotations ?? true,
82
99
  extractFormFields: options?.extractFormFields ?? true
83
100
  };
84
- // Parse document structure
85
101
  const doc = new pdf_document_1.PdfDocument(data);
86
- // Handle encryption
87
102
  if ((0, pdf_decrypt_1.isEncrypted)(doc)) {
88
103
  const success = (0, pdf_decrypt_1.initDecryption)(doc, opts.password);
89
104
  if (!success) {
90
105
  throw new errors_1.PdfStructureError("Failed to decrypt PDF: incorrect password");
91
106
  }
92
107
  }
93
- // Extract metadata
94
108
  const metadata = opts.extractMetadata ? (0, metadata_reader_1.extractMetadata)(doc) : createEmptyMetadata();
95
- // Get pages (with object identity for correct decryption)
96
109
  const pagesInfo = doc.getPagesWithObjInfo();
97
110
  const pageIndicesToProcess = opts.pages
98
111
  ? opts.pages.map(p => p - 1).filter(p => p >= 0 && p < pagesInfo.length)
99
112
  : Array.from({ length: pagesInfo.length }, (_, i) => i);
100
- // Process each page
101
- const pages = [];
102
- for (const pageIdx of pageIndicesToProcess) {
103
- const { dict: pageDict } = pagesInfo[pageIdx];
104
- const pageNumber = pageIdx + 1;
105
- const warnings = [];
106
- // Extract text
107
- let text = "";
108
- let textLines = [];
109
- let textFragments = [];
110
- if (opts.extractText) {
111
- try {
112
- textFragments = (0, content_interpreter_1.extractTextFromPage)(pageDict, doc);
113
- text = (0, text_reconstruction_1.reconstructText)(textFragments);
114
- textLines = (0, text_reconstruction_1.reconstructTextLines)(textFragments);
115
- }
116
- catch (err) {
117
- const msg = err instanceof Error ? err.message : String(err);
118
- warnings.push(`Text extraction failed on page ${pageNumber}: ${msg}`);
119
- }
113
+ return { doc, opts, metadata, pagesInfo, pageIndicesToProcess };
114
+ }
115
+ /**
116
+ * Process a single page: extract text, images, annotations, and dimensions.
117
+ */
118
+ function processPage(pageDict, pageIdx, doc, opts) {
119
+ const pageNumber = pageIdx + 1;
120
+ const warnings = [];
121
+ let text = "";
122
+ let textLines = [];
123
+ let textFragments = [];
124
+ if (opts.extractText) {
125
+ try {
126
+ textFragments = (0, content_interpreter_1.extractTextFromPage)(pageDict, doc);
127
+ text = (0, text_reconstruction_1.reconstructText)(textFragments);
128
+ textLines = (0, text_reconstruction_1.reconstructTextLines)(textFragments);
120
129
  }
121
- // Extract images
122
- let images = [];
123
- if (opts.extractImages) {
124
- try {
125
- images = (0, image_extractor_1.extractImagesFromPage)(pageDict, doc);
126
- }
127
- catch (err) {
128
- const msg = err instanceof Error ? err.message : String(err);
129
- warnings.push(`Image extraction failed on page ${pageNumber}: ${msg}`);
130
- }
130
+ catch (err) {
131
+ const msg = err instanceof Error ? err.message : String(err);
132
+ warnings.push(`Text extraction failed on page ${pageNumber}: ${msg}`);
131
133
  }
132
- // Extract annotations
133
- let annotations = [];
134
- if (opts.extractAnnotations) {
135
- try {
136
- annotations = (0, annotation_extractor_1.extractAnnotationsFromPage)(pageDict, doc);
137
- }
138
- catch (err) {
139
- const msg = err instanceof Error ? err.message : String(err);
140
- warnings.push(`Annotation extraction failed on page ${pageNumber}: ${msg}`);
141
- }
134
+ }
135
+ let images = [];
136
+ if (opts.extractImages) {
137
+ try {
138
+ images = (0, image_extractor_1.extractImagesFromPage)(pageDict, doc);
139
+ }
140
+ catch (err) {
141
+ const msg = err instanceof Error ? err.message : String(err);
142
+ warnings.push(`Image extraction failed on page ${pageNumber}: ${msg}`);
143
+ }
144
+ }
145
+ let annotations = [];
146
+ if (opts.extractAnnotations) {
147
+ try {
148
+ annotations = (0, annotation_extractor_1.extractAnnotationsFromPage)(pageDict, doc);
149
+ }
150
+ catch (err) {
151
+ const msg = err instanceof Error ? err.message : String(err);
152
+ warnings.push(`Annotation extraction failed on page ${pageNumber}: ${msg}`);
142
153
  }
143
- // Get page dimensions
144
- const { width, height } = getPageDimensions(pageDict, doc);
145
- pages.push({
146
- pageNumber,
147
- text,
148
- textLines,
149
- textFragments,
150
- images,
151
- annotations,
152
- width,
153
- height,
154
- warnings
155
- });
156
154
  }
157
- // Concatenate all page text
155
+ const { width, height } = getPageDimensions(pageDict, doc);
156
+ return {
157
+ pageNumber,
158
+ text,
159
+ textLines,
160
+ textFragments,
161
+ images,
162
+ annotations,
163
+ width,
164
+ height,
165
+ warnings
166
+ };
167
+ }
168
+ /**
169
+ * Finalize: concatenate text, update metadata page count, extract form fields.
170
+ */
171
+ function finalizeRead(pages, totalPageCount, metadata, opts, doc) {
158
172
  const allText = pages.map(p => p.text).join("\n\n");
159
- // Update page count in metadata
160
173
  if (opts.extractMetadata) {
161
- metadata.pageCount = pagesInfo.length;
174
+ metadata.pageCount = totalPageCount;
162
175
  }
163
- // Extract form fields (document-level, not per-page)
164
176
  let formFields = [];
165
177
  if (opts.extractFormFields) {
166
178
  try {
@@ -170,12 +182,7 @@ function readPdf(data, options) {
170
182
  // Non-fatal — just return empty
171
183
  }
172
184
  }
173
- return {
174
- text: allText,
175
- pages,
176
- metadata,
177
- formFields
178
- };
185
+ return { text: allText, pages, metadata, formFields };
179
186
  }
180
187
  // =============================================================================
181
188
  // Helpers
@@ -0,0 +1,33 @@
1
+ "use strict";
2
+ /**
3
+ * Shared rendering constants used by both the layout engine and page renderer.
4
+ *
5
+ * Keeping these in one place ensures row-height computation and text rendering
6
+ * use exactly the same values, preventing clipped or overlapping content.
7
+ */
8
+ Object.defineProperty(exports, "__esModule", { value: true });
9
+ exports.PX_TO_PT = exports.EXCEL_COLUMN_PADDING_PX = exports.MAX_DIGIT_WIDTH_PX = exports.INDENT_WIDTH = exports.LINE_HEIGHT_FACTOR = exports.CELL_PADDING_V = exports.CELL_PADDING_H = void 0;
10
+ /** Horizontal cell padding in points (left + right = 2 × CELL_PADDING_H). */
11
+ exports.CELL_PADDING_H = 3;
12
+ /** Vertical cell padding in points (top + bottom = 2 × CELL_PADDING_V). */
13
+ exports.CELL_PADDING_V = 2;
14
+ /**
15
+ * Line-height multiplier applied to the font size.
16
+ *
17
+ * Excel's default row height for an 11pt font is 15pt, which after removing
18
+ * vertical padding (2 × 2 = 4pt) leaves 11pt × 1.0 — but Excel also adds
19
+ * internal leading. A factor of 1.2 matches standard PDF/typographic practice
20
+ * and keeps text readable without inflating row heights.
21
+ */
22
+ exports.LINE_HEIGHT_FACTOR = 1.2;
23
+ /** Width of one indent level in points (~3 characters at 11pt). */
24
+ exports.INDENT_WIDTH = 10;
25
+ /**
26
+ * Excel column widths are measured in characters of the default font's digit width.
27
+ * For Calibri 11pt (the default), maxDigitWidth ≈ 7 pixels at 96 DPI.
28
+ * Excel adds 5 pixels of padding per column (4px text margin + 1px gridline).
29
+ * To convert to PDF points: (charWidth × 7 + 5) × (72/96).
30
+ */
31
+ exports.MAX_DIGIT_WIDTH_PX = 7;
32
+ exports.EXCEL_COLUMN_PADDING_PX = 5;
33
+ exports.PX_TO_PT = 72 / 96; // 0.75