@cj-tech-master/excelts 9.1.0 → 9.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/README.md +16 -1
  2. package/dist/browser/modules/archive/compression/crc32.js +1 -1
  3. package/dist/browser/modules/archive/crypto/aes.d.ts +0 -8
  4. package/dist/browser/modules/archive/crypto/aes.js +1 -20
  5. package/dist/browser/modules/archive/crypto/index.d.ts +2 -1
  6. package/dist/browser/modules/archive/crypto/index.js +3 -1
  7. package/dist/browser/modules/csv/parse/row-processor.d.ts +1 -1
  8. package/dist/browser/modules/csv/worker/worker-script.generated.js +1 -1
  9. package/dist/browser/modules/excel/utils/cell-matrix.js +1 -0
  10. package/dist/browser/modules/excel/utils/encryptor.browser.d.ts +4 -5
  11. package/dist/browser/modules/excel/utils/encryptor.browser.js +7 -12
  12. package/dist/browser/modules/excel/utils/encryptor.d.ts +1 -1
  13. package/dist/browser/modules/excel/utils/encryptor.js +4 -7
  14. package/dist/browser/modules/pdf/builder/document-builder.d.ts +517 -0
  15. package/dist/browser/modules/pdf/builder/document-builder.js +1493 -0
  16. package/dist/browser/modules/pdf/builder/form-appearance.d.ts +56 -0
  17. package/dist/browser/modules/pdf/builder/form-appearance.js +140 -0
  18. package/dist/browser/modules/pdf/builder/image-utils.d.ts +39 -0
  19. package/dist/browser/modules/pdf/builder/image-utils.js +129 -0
  20. package/dist/browser/modules/pdf/builder/pdf-editor.d.ts +230 -0
  21. package/dist/browser/modules/pdf/builder/pdf-editor.js +1574 -0
  22. package/dist/browser/modules/pdf/builder/resource-merger.d.ts +41 -0
  23. package/dist/browser/modules/pdf/builder/resource-merger.js +258 -0
  24. package/dist/browser/modules/pdf/core/digital-signature.d.ts +109 -0
  25. package/dist/browser/modules/pdf/core/digital-signature.js +659 -0
  26. package/dist/browser/modules/pdf/core/encryption.js +8 -7
  27. package/dist/browser/modules/pdf/core/pdf-object.d.ts +11 -0
  28. package/dist/browser/modules/pdf/core/pdf-object.js +38 -0
  29. package/dist/browser/modules/pdf/core/pdf-stream.d.ts +32 -0
  30. package/dist/browser/modules/pdf/core/pdf-stream.js +66 -0
  31. package/dist/browser/modules/pdf/core/pdf-writer.d.ts +55 -1
  32. package/dist/browser/modules/pdf/core/pdf-writer.js +271 -6
  33. package/dist/browser/modules/pdf/core/pdfa.d.ts +62 -0
  34. package/dist/browser/modules/pdf/core/pdfa.js +261 -0
  35. package/dist/browser/modules/pdf/index.d.ts +11 -0
  36. package/dist/browser/modules/pdf/index.js +9 -0
  37. package/dist/browser/modules/pdf/reader/bookmark-extractor.d.ts +35 -0
  38. package/dist/browser/modules/pdf/reader/bookmark-extractor.js +324 -0
  39. package/dist/browser/modules/pdf/reader/pdf-decrypt.js +6 -5
  40. package/dist/browser/modules/pdf/reader/pdf-reader.d.ts +17 -0
  41. package/dist/browser/modules/pdf/reader/pdf-reader.js +26 -2
  42. package/dist/browser/modules/pdf/reader/table-extractor.d.ts +69 -0
  43. package/dist/browser/modules/pdf/reader/table-extractor.js +365 -0
  44. package/dist/browser/modules/pdf/render/layout-engine.d.ts +21 -1
  45. package/dist/browser/modules/pdf/render/layout-engine.js +112 -5
  46. package/dist/browser/modules/pdf/render/page-renderer.d.ts +2 -9
  47. package/dist/browser/modules/pdf/render/page-renderer.js +62 -103
  48. package/dist/browser/modules/pdf/render/pdf-exporter.js +2 -61
  49. package/dist/browser/modules/pdf/render/style-converter.d.ts +4 -0
  50. package/dist/browser/modules/pdf/render/style-converter.js +1 -1
  51. package/dist/browser/modules/pdf/types.d.ts +14 -1
  52. package/dist/browser/modules/stream/browser/readable.js +8 -2
  53. package/dist/browser/utils/crypto.browser.d.ts +64 -0
  54. package/dist/browser/{modules/pdf/core/crypto.js → utils/crypto.browser.js} +91 -101
  55. package/dist/browser/utils/crypto.d.ts +97 -0
  56. package/dist/browser/utils/crypto.js +209 -0
  57. package/dist/cjs/modules/archive/compression/crc32.js +1 -1
  58. package/dist/cjs/modules/archive/crypto/aes.js +2 -23
  59. package/dist/cjs/modules/archive/crypto/index.js +3 -1
  60. package/dist/cjs/modules/csv/worker/worker-script.generated.js +1 -1
  61. package/dist/cjs/modules/excel/utils/cell-matrix.js +1 -0
  62. package/dist/cjs/modules/excel/utils/encryptor.browser.js +7 -12
  63. package/dist/cjs/modules/excel/utils/encryptor.js +4 -10
  64. package/dist/cjs/modules/pdf/builder/document-builder.js +1532 -0
  65. package/dist/cjs/modules/pdf/builder/form-appearance.js +145 -0
  66. package/dist/cjs/modules/pdf/builder/image-utils.js +135 -0
  67. package/dist/cjs/modules/pdf/builder/pdf-editor.js +1612 -0
  68. package/dist/cjs/modules/pdf/builder/resource-merger.js +263 -0
  69. package/dist/cjs/modules/pdf/core/digital-signature.js +667 -0
  70. package/dist/cjs/modules/pdf/core/encryption.js +8 -7
  71. package/dist/cjs/modules/pdf/core/pdf-object.js +38 -0
  72. package/dist/cjs/modules/pdf/core/pdf-stream.js +66 -0
  73. package/dist/cjs/modules/pdf/core/pdf-writer.js +272 -6
  74. package/dist/cjs/modules/pdf/core/pdfa.js +266 -0
  75. package/dist/cjs/modules/pdf/index.js +19 -1
  76. package/dist/cjs/modules/pdf/reader/bookmark-extractor.js +327 -0
  77. package/dist/cjs/modules/pdf/reader/pdf-decrypt.js +6 -5
  78. package/dist/cjs/modules/pdf/reader/pdf-reader.js +26 -2
  79. package/dist/cjs/modules/pdf/reader/table-extractor.js +368 -0
  80. package/dist/cjs/modules/pdf/render/layout-engine.js +113 -4
  81. package/dist/cjs/modules/pdf/render/page-renderer.js +63 -105
  82. package/dist/cjs/modules/pdf/render/pdf-exporter.js +3 -62
  83. package/dist/cjs/modules/pdf/render/style-converter.js +1 -0
  84. package/dist/cjs/modules/stream/browser/readable.js +8 -2
  85. package/dist/cjs/{modules/pdf/core/crypto.js → utils/crypto.browser.js} +95 -102
  86. package/dist/cjs/utils/crypto.js +228 -0
  87. package/dist/esm/modules/archive/compression/crc32.js +1 -1
  88. package/dist/esm/modules/archive/crypto/aes.js +1 -20
  89. package/dist/esm/modules/archive/crypto/index.js +3 -1
  90. package/dist/esm/modules/csv/worker/worker-script.generated.js +1 -1
  91. package/dist/esm/modules/excel/utils/cell-matrix.js +1 -0
  92. package/dist/esm/modules/excel/utils/encryptor.browser.js +7 -12
  93. package/dist/esm/modules/excel/utils/encryptor.js +4 -7
  94. package/dist/esm/modules/pdf/builder/document-builder.js +1493 -0
  95. package/dist/esm/modules/pdf/builder/form-appearance.js +140 -0
  96. package/dist/esm/modules/pdf/builder/image-utils.js +129 -0
  97. package/dist/esm/modules/pdf/builder/pdf-editor.js +1574 -0
  98. package/dist/esm/modules/pdf/builder/resource-merger.js +258 -0
  99. package/dist/esm/modules/pdf/core/digital-signature.js +659 -0
  100. package/dist/esm/modules/pdf/core/encryption.js +8 -7
  101. package/dist/esm/modules/pdf/core/pdf-object.js +38 -0
  102. package/dist/esm/modules/pdf/core/pdf-stream.js +66 -0
  103. package/dist/esm/modules/pdf/core/pdf-writer.js +271 -6
  104. package/dist/esm/modules/pdf/core/pdfa.js +261 -0
  105. package/dist/esm/modules/pdf/index.js +9 -0
  106. package/dist/esm/modules/pdf/reader/bookmark-extractor.js +324 -0
  107. package/dist/esm/modules/pdf/reader/pdf-decrypt.js +6 -5
  108. package/dist/esm/modules/pdf/reader/pdf-reader.js +26 -2
  109. package/dist/esm/modules/pdf/reader/table-extractor.js +365 -0
  110. package/dist/esm/modules/pdf/render/layout-engine.js +112 -5
  111. package/dist/esm/modules/pdf/render/page-renderer.js +62 -103
  112. package/dist/esm/modules/pdf/render/pdf-exporter.js +2 -61
  113. package/dist/esm/modules/pdf/render/style-converter.js +1 -1
  114. package/dist/esm/modules/stream/browser/readable.js +8 -2
  115. package/dist/esm/{modules/pdf/core/crypto.js → utils/crypto.browser.js} +91 -101
  116. package/dist/esm/utils/crypto.js +209 -0
  117. package/dist/iife/excelts.iife.js +1248 -1074
  118. package/dist/iife/excelts.iife.js.map +1 -1
  119. package/dist/iife/excelts.iife.min.js +53 -54
  120. package/dist/types/modules/archive/crypto/aes.d.ts +0 -8
  121. package/dist/types/modules/archive/crypto/index.d.ts +2 -1
  122. package/dist/types/modules/csv/parse/row-processor.d.ts +1 -1
  123. package/dist/types/modules/excel/utils/encryptor.browser.d.ts +4 -5
  124. package/dist/types/modules/excel/utils/encryptor.d.ts +1 -1
  125. package/dist/types/modules/pdf/builder/document-builder.d.ts +517 -0
  126. package/dist/types/modules/pdf/builder/form-appearance.d.ts +56 -0
  127. package/dist/types/modules/pdf/builder/image-utils.d.ts +39 -0
  128. package/dist/types/modules/pdf/builder/pdf-editor.d.ts +230 -0
  129. package/dist/types/modules/pdf/builder/resource-merger.d.ts +41 -0
  130. package/dist/types/modules/pdf/core/digital-signature.d.ts +109 -0
  131. package/dist/types/modules/pdf/core/pdf-object.d.ts +11 -0
  132. package/dist/types/modules/pdf/core/pdf-stream.d.ts +32 -0
  133. package/dist/types/modules/pdf/core/pdf-writer.d.ts +55 -1
  134. package/dist/types/modules/pdf/core/pdfa.d.ts +62 -0
  135. package/dist/types/modules/pdf/index.d.ts +11 -0
  136. package/dist/types/modules/pdf/reader/bookmark-extractor.d.ts +35 -0
  137. package/dist/types/modules/pdf/reader/pdf-reader.d.ts +17 -0
  138. package/dist/types/modules/pdf/reader/table-extractor.d.ts +69 -0
  139. package/dist/types/modules/pdf/render/layout-engine.d.ts +21 -1
  140. package/dist/types/modules/pdf/render/page-renderer.d.ts +2 -9
  141. package/dist/types/modules/pdf/render/style-converter.d.ts +4 -0
  142. package/dist/types/modules/pdf/types.d.ts +14 -1
  143. package/dist/types/utils/crypto.browser.d.ts +64 -0
  144. package/dist/types/utils/crypto.d.ts +97 -0
  145. package/package.json +110 -111
  146. package/dist/browser/modules/pdf/core/crypto.d.ts +0 -65
  147. package/dist/types/modules/pdf/core/crypto.d.ts +0 -65
@@ -57,7 +57,9 @@ const text_reconstruction_1 = require("./text-reconstruction");
57
57
  const image_extractor_1 = require("./image-extractor");
58
58
  const annotation_extractor_1 = require("./annotation-extractor");
59
59
  const form_extractor_1 = require("./form-extractor");
60
+ const bookmark_extractor_1 = require("./bookmark-extractor");
60
61
  const metadata_reader_1 = require("./metadata-reader");
62
+ const table_extractor_1 = require("./table-extractor");
61
63
  const errors_1 = require("../errors");
62
64
  const utils_base_1 = require("../../../utils/utils.base.js");
63
65
  // =============================================================================
@@ -96,7 +98,9 @@ function prepareRead(data, options) {
96
98
  extractImages: options?.extractImages ?? true,
97
99
  extractMetadata: options?.extractMetadata ?? true,
98
100
  extractAnnotations: options?.extractAnnotations ?? true,
99
- extractFormFields: options?.extractFormFields ?? true
101
+ extractFormFields: options?.extractFormFields ?? true,
102
+ extractBookmarks: options?.extractBookmarks ?? true,
103
+ extractTables: options?.extractTables ?? false
100
104
  };
101
105
  const doc = new pdf_document_1.PdfDocument(data);
102
106
  if ((0, pdf_decrypt_1.isEncrypted)(doc)) {
@@ -153,6 +157,16 @@ function processPage(pageDict, pageIdx, doc, opts) {
153
157
  }
154
158
  }
155
159
  const { width, height } = getPageDimensions(pageDict, doc);
160
+ let tables = [];
161
+ if (opts.extractTables) {
162
+ try {
163
+ tables = (0, table_extractor_1.extractTables)(textFragments, width, height);
164
+ }
165
+ catch (err) {
166
+ const msg = err instanceof Error ? err.message : String(err);
167
+ warnings.push(`Table extraction failed on page ${pageNumber}: ${msg}`);
168
+ }
169
+ }
156
170
  return {
157
171
  pageNumber,
158
172
  text,
@@ -160,6 +174,7 @@ function processPage(pageDict, pageIdx, doc, opts) {
160
174
  textFragments,
161
175
  images,
162
176
  annotations,
177
+ tables,
163
178
  width,
164
179
  height,
165
180
  warnings
@@ -182,7 +197,16 @@ function finalizeRead(pages, totalPageCount, metadata, opts, doc) {
182
197
  // Non-fatal — just return empty
183
198
  }
184
199
  }
185
- return { text: allText, pages, metadata, formFields };
200
+ let bookmarks = [];
201
+ if (opts.extractBookmarks) {
202
+ try {
203
+ bookmarks = (0, bookmark_extractor_1.extractBookmarks)(doc);
204
+ }
205
+ catch {
206
+ // Non-fatal — just return empty
207
+ }
208
+ }
209
+ return { text: allText, pages, metadata, formFields, bookmarks };
186
210
  }
187
211
  // =============================================================================
188
212
  // Helpers
@@ -0,0 +1,368 @@
1
+ "use strict";
2
+ /**
3
+ * Table extraction from PDF pages using text fragment positioning.
4
+ *
5
+ * Detects tabular structures by analyzing the spatial layout of text fragments.
6
+ * Since PDF content streams typically render tables as positioned text (with or
7
+ * without drawn grid lines), this module uses a text-only heuristic:
8
+ *
9
+ * 1. Group fragments into lines by Y proximity
10
+ * 2. Detect column boundaries from consistent X-position clusters
11
+ * 3. Identify contiguous blocks of multi-column lines as tables
12
+ * 4. Map fragments to cells based on column/line membership
13
+ *
14
+ * @see content-interpreter.ts for TextFragment extraction
15
+ * @see text-reconstruction.ts for line grouping logic
16
+ */
17
+ Object.defineProperty(exports, "__esModule", { value: true });
18
+ exports.extractTables = extractTables;
19
+ // =============================================================================
20
+ // Constants
21
+ // =============================================================================
22
+ /**
23
+ * Minimum number of columns required to consider a block of lines as a table.
24
+ */
25
+ const MIN_TABLE_COLUMNS = 2;
26
+ /**
27
+ * Minimum number of consecutive multi-column lines to form a table.
28
+ */
29
+ const MIN_TABLE_ROWS = 2;
30
+ // =============================================================================
31
+ // Public API
32
+ // =============================================================================
33
+ /**
34
+ * Extract tables from a page's text fragments.
35
+ *
36
+ * Uses text positioning heuristics to detect tabular structures without
37
+ * relying on drawn lines or grid paths.
38
+ *
39
+ * @param fragments - Text fragments from `extractTextFromPage`
40
+ * @param pageWidth - Page width in points
41
+ * @param pageHeight - Page height in points
42
+ * @returns Array of detected tables
43
+ */
44
+ function extractTables(fragments, pageWidth, pageHeight) {
45
+ if (fragments.length === 0) {
46
+ return [];
47
+ }
48
+ // Filter to horizontal text only (vertical CJK tables are not handled here)
49
+ const horizontal = fragments.filter(f => !f.isVertical && f.text.trim().length > 0);
50
+ if (horizontal.length < MIN_TABLE_COLUMNS * MIN_TABLE_ROWS) {
51
+ return [];
52
+ }
53
+ // Step 1: Group fragments into lines by Y proximity
54
+ const lines = groupFragmentsIntoLines(horizontal);
55
+ if (lines.length < MIN_TABLE_ROWS) {
56
+ return [];
57
+ }
58
+ // Step 2: Detect column boundaries across lines
59
+ const columns = detectColumnBoundaries(lines, pageWidth);
60
+ if (columns.length < MIN_TABLE_COLUMNS) {
61
+ return [];
62
+ }
63
+ // Step 3: Identify contiguous runs of lines that form tables
64
+ const tableRanges = findTableRanges(lines, columns);
65
+ // Step 4: Build table structures
66
+ const tables = [];
67
+ for (const range of tableRanges) {
68
+ const table = buildTable(lines, columns, range.start, range.end, pageHeight);
69
+ if (table) {
70
+ tables.push(table);
71
+ }
72
+ }
73
+ return tables;
74
+ }
75
+ // =============================================================================
76
+ // Step 1: Group Fragments into Lines
77
+ // =============================================================================
78
+ /**
79
+ * Group text fragments into horizontal lines based on Y proximity.
80
+ * Returns lines sorted top-to-bottom (descending Y in PDF coordinates).
81
+ */
82
+ function groupFragmentsIntoLines(fragments) {
83
+ // Sort fragments top-to-bottom, then left-to-right
84
+ const sorted = [...fragments].sort((a, b) => {
85
+ const dy = b.y - a.y;
86
+ if (Math.abs(dy) > 1) {
87
+ return dy;
88
+ }
89
+ return a.x - b.x;
90
+ });
91
+ const lines = [];
92
+ let currentFragments = [sorted[0]];
93
+ let currentY = sorted[0].y;
94
+ for (let i = 1; i < sorted.length; i++) {
95
+ const f = sorted[i];
96
+ const avgFontSize = (currentFragments[0].fontSize + f.fontSize) / 2;
97
+ const threshold = Math.max(avgFontSize * 0.4, 2);
98
+ if (Math.abs(f.y - currentY) <= threshold) {
99
+ currentFragments.push(f);
100
+ }
101
+ else {
102
+ // Finalize previous line
103
+ currentFragments.sort((a, b) => a.x - b.x);
104
+ lines.push({
105
+ y: currentY,
106
+ fragments: currentFragments,
107
+ fontSize: currentFragments[0].fontSize
108
+ });
109
+ currentFragments = [f];
110
+ currentY = f.y;
111
+ }
112
+ }
113
+ // Finalize last line
114
+ if (currentFragments.length > 0) {
115
+ currentFragments.sort((a, b) => a.x - b.x);
116
+ lines.push({
117
+ y: currentY,
118
+ fragments: currentFragments,
119
+ fontSize: currentFragments[0].fontSize
120
+ });
121
+ }
122
+ return lines;
123
+ }
124
+ // =============================================================================
125
+ // Step 2: Detect Column Boundaries
126
+ // =============================================================================
127
+ /**
128
+ * Detect column boundaries by analyzing fragment positions across lines.
129
+ *
130
+ * Algorithm:
131
+ * 1. For each line, compute gaps between consecutive fragments
132
+ * 2. Find vertical divider positions that consistently fall in gaps across lines
133
+ * 3. When gaps don't perfectly align (e.g. right-aligned numbers), use
134
+ * fragment start positions to infer column boundaries
135
+ *
136
+ * This approach handles mixed alignment (left-aligned text headers with
137
+ * right-aligned numeric data) by looking at both gaps and start positions.
138
+ */
139
+ function detectColumnBoundaries(lines, _pageWidth) {
140
+ // Strategy: for each pair of lines, find the set of divider positions
141
+ // that would produce the same column count. Then pick the most common
142
+ // column count and find divider positions that work best.
143
+ // Step A: Determine the most common fragment count per line
144
+ const fragCounts = lines.map(l => l.fragments.length);
145
+ const countFreq = new Map();
146
+ for (const c of fragCounts) {
147
+ if (c >= MIN_TABLE_COLUMNS) {
148
+ countFreq.set(c, (countFreq.get(c) ?? 0) + 1);
149
+ }
150
+ }
151
+ if (countFreq.size === 0) {
152
+ return [];
153
+ }
154
+ // Find the most common fragment count (the "expected" number of columns)
155
+ let bestCount = 0;
156
+ let bestFreq = 0;
157
+ for (const [count, freq] of countFreq) {
158
+ if (freq > bestFreq || (freq === bestFreq && count > bestCount)) {
159
+ bestCount = count;
160
+ bestFreq = freq;
161
+ }
162
+ }
163
+ if (bestCount < MIN_TABLE_COLUMNS || bestFreq < MIN_TABLE_ROWS) {
164
+ return [];
165
+ }
166
+ // Step B: From lines with the expected fragment count, extract divider positions.
167
+ // For each such line, dividers are placed between consecutive fragments.
168
+ // Divider position = midpoint between fragment[i].rightEdge and fragment[i+1].x
169
+ // (or just the gap midpoint if there's a real gap; if they overlap, use the start
170
+ // of the next fragment).
171
+ const linesWithExpectedCount = lines.filter(l => l.fragments.length === bestCount);
172
+ // Collect divider positions for each gap index (0..bestCount-2)
173
+ // For each gap between column i and column i+1, we need a divider that:
174
+ // - Is to the right of all fragment[i] right-edges (across all lines)
175
+ // - Is to the left of all fragment[i+1] left-edges (across all lines)
176
+ // We compute the max right-edge of fragment[i] and min left-edge of fragment[i+1]
177
+ // across all matching lines, then place the divider at the midpoint.
178
+ const maxRightByIndex = Array.from({ length: bestCount - 1 }, () => -Infinity);
179
+ const minLeftByIndex = Array.from({ length: bestCount - 1 }, () => Infinity);
180
+ for (const line of linesWithExpectedCount) {
181
+ const frags = line.fragments;
182
+ for (let i = 0; i + 1 < frags.length; i++) {
183
+ const rightEdge = frags[i].x + frags[i].width;
184
+ const nextStart = frags[i + 1].x;
185
+ maxRightByIndex[i] = Math.max(maxRightByIndex[i], rightEdge);
186
+ minLeftByIndex[i] = Math.min(minLeftByIndex[i], nextStart);
187
+ }
188
+ }
189
+ // Place each divider between the max right of column i and min left of column i+1
190
+ const medianDividers = [];
191
+ for (let i = 0; i < bestCount - 1; i++) {
192
+ const maxRight = maxRightByIndex[i];
193
+ const minLeft = minLeftByIndex[i];
194
+ if (minLeft > maxRight) {
195
+ // Clean gap — place divider at midpoint
196
+ medianDividers.push((maxRight + minLeft) / 2);
197
+ }
198
+ else {
199
+ // Overlap — place divider at the left-edge of the next column's fragment
200
+ // (this handles right-aligned numbers that extend into the next column's space)
201
+ medianDividers.push(minLeft);
202
+ }
203
+ }
204
+ if (medianDividers.length < 1) {
205
+ return [];
206
+ }
207
+ // Step C: Build column boundaries from dividers
208
+ let globalLeft = Infinity;
209
+ let globalRight = -Infinity;
210
+ for (const line of lines) {
211
+ for (const f of line.fragments) {
212
+ globalLeft = Math.min(globalLeft, f.x);
213
+ globalRight = Math.max(globalRight, f.x + f.width);
214
+ }
215
+ }
216
+ const columns = [];
217
+ let prevRight = globalLeft;
218
+ for (const divider of medianDividers) {
219
+ columns.push({ left: prevRight, right: divider });
220
+ prevRight = divider;
221
+ }
222
+ columns.push({ left: prevRight, right: globalRight });
223
+ return columns.length >= MIN_TABLE_COLUMNS ? columns : [];
224
+ }
225
+ // =============================================================================
226
+ // Step 3: Find Contiguous Table Ranges
227
+ // =============================================================================
228
+ /**
229
+ * Identify contiguous runs of lines where most columns have content.
230
+ * Returns ranges of line indices that form table blocks.
231
+ */
232
+ function findTableRanges(lines, columns) {
233
+ // For each line, count how many columns contain at least one fragment
234
+ const lineColumnCounts = [];
235
+ for (const line of lines) {
236
+ const occupiedColumns = new Set();
237
+ for (const f of line.fragments) {
238
+ const colIdx = findColumnIndex(f.x, columns);
239
+ if (colIdx >= 0) {
240
+ occupiedColumns.add(colIdx);
241
+ }
242
+ }
243
+ lineColumnCounts.push(occupiedColumns.size);
244
+ }
245
+ // A line is "tabular" if it has fragments in at least 2 columns
246
+ const ranges = [];
247
+ let rangeStart = -1;
248
+ for (let i = 0; i < lineColumnCounts.length; i++) {
249
+ const isTabular = lineColumnCounts[i] >= MIN_TABLE_COLUMNS;
250
+ if (isTabular && rangeStart === -1) {
251
+ rangeStart = i;
252
+ }
253
+ else if (!isTabular && rangeStart !== -1) {
254
+ if (i - rangeStart >= MIN_TABLE_ROWS) {
255
+ ranges.push({ start: rangeStart, end: i - 1 });
256
+ }
257
+ rangeStart = -1;
258
+ }
259
+ }
260
+ // Close any open range
261
+ if (rangeStart !== -1 && lines.length - rangeStart >= MIN_TABLE_ROWS) {
262
+ ranges.push({ start: rangeStart, end: lines.length - 1 });
263
+ }
264
+ return ranges;
265
+ }
266
+ /**
267
+ * Find which column a given X position belongs to.
268
+ * Returns -1 if the position doesn't fall within any column.
269
+ */
270
+ function findColumnIndex(x, columns) {
271
+ for (let i = columns.length - 1; i >= 0; i--) {
272
+ if (x >= columns[i].left - 1) {
273
+ return i;
274
+ }
275
+ }
276
+ return -1;
277
+ }
278
+ // =============================================================================
279
+ // Step 4: Build Table Structure
280
+ // =============================================================================
281
+ /**
282
+ * Build a PdfTable from a range of lines and column boundaries.
283
+ */
284
+ function buildTable(lines, columns, startLine, endLine, _pageHeight) {
285
+ const rows = [];
286
+ for (let li = startLine; li <= endLine; li++) {
287
+ const line = lines[li];
288
+ const row = buildRow(line, columns, lines, li, startLine, endLine);
289
+ rows.push(row);
290
+ }
291
+ if (rows.length === 0) {
292
+ return null;
293
+ }
294
+ // Calculate table bounding box
295
+ const tableLines = lines.slice(startLine, endLine + 1);
296
+ const topY = tableLines[0].y;
297
+ const bottomY = tableLines[tableLines.length - 1].y;
298
+ const bottomFontSize = tableLines[tableLines.length - 1].fontSize;
299
+ const allX = [];
300
+ const allRightEdges = [];
301
+ for (const line of tableLines) {
302
+ for (const f of line.fragments) {
303
+ allX.push(f.x);
304
+ allRightEdges.push(f.x + f.width);
305
+ }
306
+ }
307
+ const tableX = allX.length > 0 ? Math.min(...allX) : 0;
308
+ const tableRight = allRightEdges.length > 0 ? Math.max(...allRightEdges) : 0;
309
+ const tableWidth = tableRight - tableX;
310
+ // Height: from top of first line to bottom of last line (including font height)
311
+ const tableHeight = topY - bottomY + bottomFontSize;
312
+ return {
313
+ rows,
314
+ x: tableX,
315
+ y: topY,
316
+ width: tableWidth,
317
+ height: tableHeight
318
+ };
319
+ }
320
+ /**
321
+ * Build a single table row by mapping fragments to columns.
322
+ */
323
+ function buildRow(line, columns, _allLines, _lineIdx, _startLine, _endLine) {
324
+ // Group fragments by column
325
+ const columnFragments = new Map();
326
+ for (const f of line.fragments) {
327
+ const colIdx = findColumnIndex(f.x, columns);
328
+ if (colIdx >= 0) {
329
+ const existing = columnFragments.get(colIdx) ?? [];
330
+ existing.push(f);
331
+ columnFragments.set(colIdx, existing);
332
+ }
333
+ }
334
+ // Build cells for each column
335
+ const cells = [];
336
+ for (let colIdx = 0; colIdx < columns.length; colIdx++) {
337
+ const col = columns[colIdx];
338
+ const frags = columnFragments.get(colIdx);
339
+ if (frags && frags.length > 0) {
340
+ // Concatenate text from all fragments in this cell
341
+ const text = frags.map(f => f.text).join(" ");
342
+ const cellX = frags[0].x;
343
+ const cellY = line.y;
344
+ const lastFrag = frags[frags.length - 1];
345
+ const cellRight = lastFrag.x + lastFrag.width;
346
+ const cellWidth = cellRight - cellX;
347
+ const cellHeight = line.fontSize;
348
+ cells.push({
349
+ text: text.trim(),
350
+ x: cellX,
351
+ y: cellY,
352
+ width: cellWidth,
353
+ height: cellHeight
354
+ });
355
+ }
356
+ else {
357
+ // Empty cell
358
+ cells.push({
359
+ text: "",
360
+ x: col.left,
361
+ y: line.y,
362
+ width: col.right - col.left,
363
+ height: line.fontSize
364
+ });
365
+ }
366
+ }
367
+ return { cells };
368
+ }
@@ -20,6 +20,8 @@
20
20
  Object.defineProperty(exports, "__esModule", { value: true });
21
21
  exports.layoutSheet = layoutSheet;
22
22
  exports.paginateRows = paginateRows;
23
+ exports.borderPrecedence = borderPrecedence;
24
+ exports.resolveSharedBorders = resolveSharedBorders;
23
25
  const types_1 = require("../types");
24
26
  const font_manager_1 = require("../font/font-manager");
25
27
  const style_converter_1 = require("./style-converter");
@@ -244,6 +246,9 @@ function buildPageLayout(ctx, rowPage, colGroup, currentPageCount, sheet, option
244
246
  cellGrid.set(`${ri}:${gci}`, layoutCell);
245
247
  }
246
248
  }
249
+ // Resolve shared borders: on each shared edge between adjacent cells, keep
250
+ // only the winning border for drawing but preserve insets for both cells.
251
+ resolveSharedBorders(cellGrid, rowPage.length, colGroup.length);
247
252
  // Compute text overflow widths for non-wrapped cells
248
253
  computeTextOverflows(cellGrid, rowPage, colGroup, visibleRows, visibleCols, groupColWidths, mergeMap, fontManager);
249
254
  return {
@@ -402,7 +407,16 @@ function computeRowHeights(sheet, scaleFactor, printRange, fontManager, options)
402
407
  const fontSize = getCellFontSize(cell);
403
408
  const wrapLineCount = countWrapLines(cell, fontSize, scaleFactor, sheet, fontManager, options);
404
409
  const lineHeight = fontSize * constants_1.LINE_HEIGHT_FACTOR;
405
- const neededHeight = fontSize + (wrapLineCount - 1) * lineHeight + constants_1.CELL_PADDING_V * 2;
410
+ // Account for border width: half of each border extends inward
411
+ const borderTop = cell.style?.border?.top?.style
412
+ ? (0, style_converter_1.borderStyleToLineWidth)(cell.style.border.top.style) / 2
413
+ : 0;
414
+ const borderBottom = cell.style?.border?.bottom?.style
415
+ ? (0, style_converter_1.borderStyleToLineWidth)(cell.style.border.bottom.style) / 2
416
+ : 0;
417
+ const neededHeight = fontSize +
418
+ (wrapLineCount - 1) * lineHeight +
419
+ (constants_1.CELL_PADDING_V + borderTop + borderBottom) * 2;
406
420
  if (neededHeight > height) {
407
421
  height = neededHeight;
408
422
  }
@@ -447,7 +461,13 @@ function countWrapLines(cell, fontSize, scaleFactor, sheet, fontManager, options
447
461
  const colWidth = col?.width ?? DEFAULT_COLUMN_WIDTH;
448
462
  const scaledColPts = (colWidth * constants_1.MAX_DIGIT_WIDTH_PX + constants_1.EXCEL_COLUMN_PADDING_PX) * constants_1.PX_TO_PT * scaleFactor;
449
463
  const indent = cell.style.alignment.indent ?? 0;
450
- const padding = constants_1.CELL_PADDING_H * 2 + indent * constants_1.INDENT_WIDTH;
464
+ const borderLeft = cell.style?.border?.left?.style
465
+ ? (0, style_converter_1.borderStyleToLineWidth)(cell.style.border.left.style) / 2
466
+ : 0;
467
+ const borderRight = cell.style?.border?.right?.style
468
+ ? (0, style_converter_1.borderStyleToLineWidth)(cell.style.border.right.style) / 2
469
+ : 0;
470
+ const padding = constants_1.CELL_PADDING_H + borderLeft + (constants_1.CELL_PADDING_H + borderRight) + indent * constants_1.INDENT_WIDTH;
451
471
  const effectiveWidth = Math.max(scaledColPts - padding, 1);
452
472
  const scaledFontSize = fontSize * scaleFactor;
453
473
  const fontProps = (0, style_converter_1.extractFontProperties)(cell.style.font, options.defaultFontFamily, options.defaultFontSize);
@@ -641,6 +661,7 @@ function buildLayoutCell(cell, x, y, width, height, colSpan, rowSpan, options, f
641
661
  }
642
662
  // Rich text runs
643
663
  const richText = buildRichTextRuns(cell, options, fontManager, scaleFactor);
664
+ const borders = (0, style_converter_1.excelBordersToPdf)(style.border);
644
665
  return {
645
666
  text,
646
667
  rect: { x, y, width, height },
@@ -655,7 +676,13 @@ function buildLayoutCell(cell, x, y, width, height, colSpan, rowSpan, options, f
655
676
  horizontalAlign: resolveHorizontalAlign(style.alignment, cell?.type, cell?.result),
656
677
  verticalAlign: (0, style_converter_1.excelVAlignToPdf)(style.alignment),
657
678
  wrapText: style.alignment?.wrapText ?? false,
658
- borders: (0, style_converter_1.excelBordersToPdf)(style.border),
679
+ borders,
680
+ borderInsets: {
681
+ top: (borders.top?.width ?? 0) / 2,
682
+ right: (borders.right?.width ?? 0) / 2,
683
+ bottom: (borders.bottom?.width ?? 0) / 2,
684
+ left: (borders.left?.width ?? 0) / 2
685
+ },
659
686
  colSpan,
660
687
  rowSpan,
661
688
  hyperlink: cell?.hyperlink ?? null,
@@ -666,6 +693,84 @@ function buildLayoutCell(cell, x, y, width, height, colSpan, rowSpan, options, f
666
693
  };
667
694
  }
668
695
  // =============================================================================
696
+ // Shared-Edge Border Resolution
697
+ // =============================================================================
698
+ /**
699
+ * Border precedence weight.
700
+ *
701
+ * When two adjacent cells both declare a border on a shared edge the winning
702
+ * border is chosen by: 1. thicker wins, 2. solid beats dashed,
703
+ * 3. double beats single, 4. darker colour wins (tie-break).
704
+ *
705
+ * Returns a numeric score – higher score wins.
706
+ */
707
+ function borderPrecedence(b) {
708
+ let score = b.width * 1000; // width dominates
709
+ if (b.dashPattern.length === 0) {
710
+ score += 100; // solid beats dashed
711
+ }
712
+ if (b.isDouble) {
713
+ score += 50; // double beats single
714
+ }
715
+ // Darker colour = lower sum of RGB → higher score
716
+ const brightness = b.color.r + b.color.g + b.color.b;
717
+ score += (3 - brightness) * 10; // max RGB sum = 3 → adds up to 30
718
+ return score;
719
+ }
720
+ /**
721
+ * Resolve shared borders between adjacent cells.
722
+ *
723
+ * For each shared edge, determine the winning border (by precedence), then:
724
+ * - The cell that "owns" the winning border keeps it in `borders` for drawing.
725
+ * - The losing cell has that border side set to `null` (it won't draw).
726
+ * - Both cells' `borderInsets` are updated to reflect the winning border's
727
+ * half-width, so text padding accounts for the line that is actually there.
728
+ */
729
+ function resolveSharedBorders(cellGrid, rowCount, colCount) {
730
+ for (let ri = 0; ri < rowCount; ri++) {
731
+ for (let gci = 0; gci < colCount; gci++) {
732
+ const cell = cellGrid.get(`${ri}:${gci}`);
733
+ if (!cell) {
734
+ continue;
735
+ }
736
+ // Horizontal shared edge: this cell's right border vs right neighbour's left
737
+ if (cell.borders.right) {
738
+ const rightNeighbor = cellGrid.get(`${ri}:${gci + 1}`);
739
+ if (rightNeighbor?.borders.left) {
740
+ const myScore = borderPrecedence(cell.borders.right);
741
+ const theirScore = borderPrecedence(rightNeighbor.borders.left);
742
+ if (theirScore > myScore) {
743
+ // Neighbour wins — this cell stops drawing, but its inset = winner's half-width
744
+ cell.borderInsets.right = rightNeighbor.borders.left.width / 2;
745
+ cell.borders.right = null;
746
+ }
747
+ else {
748
+ // This cell wins (or tie) — neighbour stops drawing
749
+ rightNeighbor.borderInsets.left = cell.borders.right.width / 2;
750
+ rightNeighbor.borders.left = null;
751
+ }
752
+ }
753
+ }
754
+ // Vertical shared edge: this cell's bottom border vs below neighbour's top
755
+ if (cell.borders.bottom) {
756
+ const belowNeighbor = cellGrid.get(`${ri + 1}:${gci}`);
757
+ if (belowNeighbor?.borders.top) {
758
+ const myScore = borderPrecedence(cell.borders.bottom);
759
+ const theirScore = borderPrecedence(belowNeighbor.borders.top);
760
+ if (theirScore > myScore) {
761
+ cell.borderInsets.bottom = belowNeighbor.borders.top.width / 2;
762
+ cell.borders.bottom = null;
763
+ }
764
+ else {
765
+ belowNeighbor.borderInsets.top = cell.borders.bottom.width / 2;
766
+ belowNeighbor.borders.top = null;
767
+ }
768
+ }
769
+ }
770
+ }
771
+ }
772
+ }
773
+ // =============================================================================
669
774
  // Image Placement
670
775
  // =============================================================================
671
776
  /**
@@ -746,6 +851,7 @@ function propagateMergeBorders(layoutCell, mergeInfo, wsRowNumber, wsColNumber,
746
851
  const converted = (0, style_converter_1.excelBordersToPdf)({ right: rightCellData.style.border.right });
747
852
  if (converted.right) {
748
853
  layoutCell.borders.right = converted.right;
854
+ layoutCell.borderInsets.right = converted.right.width / 2;
749
855
  }
750
856
  }
751
857
  }
@@ -756,6 +862,7 @@ function propagateMergeBorders(layoutCell, mergeInfo, wsRowNumber, wsColNumber,
756
862
  const converted = (0, style_converter_1.excelBordersToPdf)({ bottom: bottomCellData.style.border.bottom });
757
863
  if (converted.bottom) {
758
864
  layoutCell.borders.bottom = converted.bottom;
865
+ layoutCell.borderInsets.bottom = converted.bottom.width / 2;
759
866
  }
760
867
  }
761
868
  }
@@ -785,7 +892,9 @@ function computeTextOverflows(cellGrid, rowPage, colGroup, visibleRows, visibleC
785
892
  ? fontManager.getEmbeddedResourceName()
786
893
  : fontManager.ensureFont((0, font_manager_1.resolvePdfFontName)(cell.fontFamily, cell.bold, cell.italic));
787
894
  const textWidth = fontManager.measureText(cell.text, resourceName, cell.fontSize);
788
- const cellContentWidth = cell.rect.width - constants_1.CELL_PADDING_H * 2;
895
+ const cellContentWidth = cell.rect.width -
896
+ (constants_1.CELL_PADDING_H + cell.borderInsets.left) -
897
+ (constants_1.CELL_PADDING_H + cell.borderInsets.right);
789
898
  if (textWidth <= cellContentWidth) {
790
899
  continue;
791
900
  }