@cj-tech-master/excelts 9.1.0 → 9.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/README.md +16 -1
  2. package/dist/browser/modules/archive/compression/crc32.js +1 -1
  3. package/dist/browser/modules/archive/crypto/aes.d.ts +0 -8
  4. package/dist/browser/modules/archive/crypto/aes.js +1 -20
  5. package/dist/browser/modules/archive/crypto/index.d.ts +2 -1
  6. package/dist/browser/modules/archive/crypto/index.js +3 -1
  7. package/dist/browser/modules/csv/parse/row-processor.d.ts +1 -1
  8. package/dist/browser/modules/csv/worker/worker-script.generated.js +1 -1
  9. package/dist/browser/modules/excel/utils/cell-matrix.js +1 -0
  10. package/dist/browser/modules/excel/utils/encryptor.browser.d.ts +4 -5
  11. package/dist/browser/modules/excel/utils/encryptor.browser.js +7 -12
  12. package/dist/browser/modules/excel/utils/encryptor.d.ts +1 -1
  13. package/dist/browser/modules/excel/utils/encryptor.js +4 -7
  14. package/dist/browser/modules/pdf/builder/document-builder.d.ts +517 -0
  15. package/dist/browser/modules/pdf/builder/document-builder.js +1493 -0
  16. package/dist/browser/modules/pdf/builder/form-appearance.d.ts +56 -0
  17. package/dist/browser/modules/pdf/builder/form-appearance.js +140 -0
  18. package/dist/browser/modules/pdf/builder/image-utils.d.ts +39 -0
  19. package/dist/browser/modules/pdf/builder/image-utils.js +129 -0
  20. package/dist/browser/modules/pdf/builder/pdf-editor.d.ts +230 -0
  21. package/dist/browser/modules/pdf/builder/pdf-editor.js +1574 -0
  22. package/dist/browser/modules/pdf/builder/resource-merger.d.ts +41 -0
  23. package/dist/browser/modules/pdf/builder/resource-merger.js +258 -0
  24. package/dist/browser/modules/pdf/core/digital-signature.d.ts +109 -0
  25. package/dist/browser/modules/pdf/core/digital-signature.js +659 -0
  26. package/dist/browser/modules/pdf/core/encryption.js +8 -7
  27. package/dist/browser/modules/pdf/core/pdf-object.d.ts +11 -0
  28. package/dist/browser/modules/pdf/core/pdf-object.js +38 -0
  29. package/dist/browser/modules/pdf/core/pdf-stream.d.ts +32 -0
  30. package/dist/browser/modules/pdf/core/pdf-stream.js +66 -0
  31. package/dist/browser/modules/pdf/core/pdf-writer.d.ts +55 -1
  32. package/dist/browser/modules/pdf/core/pdf-writer.js +271 -6
  33. package/dist/browser/modules/pdf/core/pdfa.d.ts +62 -0
  34. package/dist/browser/modules/pdf/core/pdfa.js +261 -0
  35. package/dist/browser/modules/pdf/index.d.ts +11 -0
  36. package/dist/browser/modules/pdf/index.js +9 -0
  37. package/dist/browser/modules/pdf/reader/bookmark-extractor.d.ts +35 -0
  38. package/dist/browser/modules/pdf/reader/bookmark-extractor.js +324 -0
  39. package/dist/browser/modules/pdf/reader/pdf-decrypt.js +6 -5
  40. package/dist/browser/modules/pdf/reader/pdf-reader.d.ts +17 -0
  41. package/dist/browser/modules/pdf/reader/pdf-reader.js +26 -2
  42. package/dist/browser/modules/pdf/reader/table-extractor.d.ts +69 -0
  43. package/dist/browser/modules/pdf/reader/table-extractor.js +365 -0
  44. package/dist/browser/modules/pdf/render/layout-engine.d.ts +21 -1
  45. package/dist/browser/modules/pdf/render/layout-engine.js +112 -5
  46. package/dist/browser/modules/pdf/render/page-renderer.d.ts +2 -9
  47. package/dist/browser/modules/pdf/render/page-renderer.js +62 -103
  48. package/dist/browser/modules/pdf/render/pdf-exporter.js +2 -61
  49. package/dist/browser/modules/pdf/render/style-converter.d.ts +4 -0
  50. package/dist/browser/modules/pdf/render/style-converter.js +1 -1
  51. package/dist/browser/modules/pdf/types.d.ts +14 -1
  52. package/dist/browser/modules/stream/browser/readable.js +8 -2
  53. package/dist/browser/utils/crypto.browser.d.ts +64 -0
  54. package/dist/browser/{modules/pdf/core/crypto.js → utils/crypto.browser.js} +91 -101
  55. package/dist/browser/utils/crypto.d.ts +97 -0
  56. package/dist/browser/utils/crypto.js +209 -0
  57. package/dist/cjs/modules/archive/compression/crc32.js +1 -1
  58. package/dist/cjs/modules/archive/crypto/aes.js +2 -23
  59. package/dist/cjs/modules/archive/crypto/index.js +3 -1
  60. package/dist/cjs/modules/csv/worker/worker-script.generated.js +1 -1
  61. package/dist/cjs/modules/excel/utils/cell-matrix.js +1 -0
  62. package/dist/cjs/modules/excel/utils/encryptor.browser.js +7 -12
  63. package/dist/cjs/modules/excel/utils/encryptor.js +4 -10
  64. package/dist/cjs/modules/pdf/builder/document-builder.js +1532 -0
  65. package/dist/cjs/modules/pdf/builder/form-appearance.js +145 -0
  66. package/dist/cjs/modules/pdf/builder/image-utils.js +135 -0
  67. package/dist/cjs/modules/pdf/builder/pdf-editor.js +1612 -0
  68. package/dist/cjs/modules/pdf/builder/resource-merger.js +263 -0
  69. package/dist/cjs/modules/pdf/core/digital-signature.js +667 -0
  70. package/dist/cjs/modules/pdf/core/encryption.js +8 -7
  71. package/dist/cjs/modules/pdf/core/pdf-object.js +38 -0
  72. package/dist/cjs/modules/pdf/core/pdf-stream.js +66 -0
  73. package/dist/cjs/modules/pdf/core/pdf-writer.js +272 -6
  74. package/dist/cjs/modules/pdf/core/pdfa.js +266 -0
  75. package/dist/cjs/modules/pdf/index.js +19 -1
  76. package/dist/cjs/modules/pdf/reader/bookmark-extractor.js +327 -0
  77. package/dist/cjs/modules/pdf/reader/pdf-decrypt.js +6 -5
  78. package/dist/cjs/modules/pdf/reader/pdf-reader.js +26 -2
  79. package/dist/cjs/modules/pdf/reader/table-extractor.js +368 -0
  80. package/dist/cjs/modules/pdf/render/layout-engine.js +113 -4
  81. package/dist/cjs/modules/pdf/render/page-renderer.js +63 -105
  82. package/dist/cjs/modules/pdf/render/pdf-exporter.js +3 -62
  83. package/dist/cjs/modules/pdf/render/style-converter.js +1 -0
  84. package/dist/cjs/modules/stream/browser/readable.js +8 -2
  85. package/dist/cjs/{modules/pdf/core/crypto.js → utils/crypto.browser.js} +95 -102
  86. package/dist/cjs/utils/crypto.js +228 -0
  87. package/dist/esm/modules/archive/compression/crc32.js +1 -1
  88. package/dist/esm/modules/archive/crypto/aes.js +1 -20
  89. package/dist/esm/modules/archive/crypto/index.js +3 -1
  90. package/dist/esm/modules/csv/worker/worker-script.generated.js +1 -1
  91. package/dist/esm/modules/excel/utils/cell-matrix.js +1 -0
  92. package/dist/esm/modules/excel/utils/encryptor.browser.js +7 -12
  93. package/dist/esm/modules/excel/utils/encryptor.js +4 -7
  94. package/dist/esm/modules/pdf/builder/document-builder.js +1493 -0
  95. package/dist/esm/modules/pdf/builder/form-appearance.js +140 -0
  96. package/dist/esm/modules/pdf/builder/image-utils.js +129 -0
  97. package/dist/esm/modules/pdf/builder/pdf-editor.js +1574 -0
  98. package/dist/esm/modules/pdf/builder/resource-merger.js +258 -0
  99. package/dist/esm/modules/pdf/core/digital-signature.js +659 -0
  100. package/dist/esm/modules/pdf/core/encryption.js +8 -7
  101. package/dist/esm/modules/pdf/core/pdf-object.js +38 -0
  102. package/dist/esm/modules/pdf/core/pdf-stream.js +66 -0
  103. package/dist/esm/modules/pdf/core/pdf-writer.js +271 -6
  104. package/dist/esm/modules/pdf/core/pdfa.js +261 -0
  105. package/dist/esm/modules/pdf/index.js +9 -0
  106. package/dist/esm/modules/pdf/reader/bookmark-extractor.js +324 -0
  107. package/dist/esm/modules/pdf/reader/pdf-decrypt.js +6 -5
  108. package/dist/esm/modules/pdf/reader/pdf-reader.js +26 -2
  109. package/dist/esm/modules/pdf/reader/table-extractor.js +365 -0
  110. package/dist/esm/modules/pdf/render/layout-engine.js +112 -5
  111. package/dist/esm/modules/pdf/render/page-renderer.js +62 -103
  112. package/dist/esm/modules/pdf/render/pdf-exporter.js +2 -61
  113. package/dist/esm/modules/pdf/render/style-converter.js +1 -1
  114. package/dist/esm/modules/stream/browser/readable.js +8 -2
  115. package/dist/esm/{modules/pdf/core/crypto.js → utils/crypto.browser.js} +91 -101
  116. package/dist/esm/utils/crypto.js +209 -0
  117. package/dist/iife/excelts.iife.js +1248 -1074
  118. package/dist/iife/excelts.iife.js.map +1 -1
  119. package/dist/iife/excelts.iife.min.js +53 -54
  120. package/dist/types/modules/archive/crypto/aes.d.ts +0 -8
  121. package/dist/types/modules/archive/crypto/index.d.ts +2 -1
  122. package/dist/types/modules/csv/parse/row-processor.d.ts +1 -1
  123. package/dist/types/modules/excel/utils/encryptor.browser.d.ts +4 -5
  124. package/dist/types/modules/excel/utils/encryptor.d.ts +1 -1
  125. package/dist/types/modules/pdf/builder/document-builder.d.ts +517 -0
  126. package/dist/types/modules/pdf/builder/form-appearance.d.ts +56 -0
  127. package/dist/types/modules/pdf/builder/image-utils.d.ts +39 -0
  128. package/dist/types/modules/pdf/builder/pdf-editor.d.ts +230 -0
  129. package/dist/types/modules/pdf/builder/resource-merger.d.ts +41 -0
  130. package/dist/types/modules/pdf/core/digital-signature.d.ts +109 -0
  131. package/dist/types/modules/pdf/core/pdf-object.d.ts +11 -0
  132. package/dist/types/modules/pdf/core/pdf-stream.d.ts +32 -0
  133. package/dist/types/modules/pdf/core/pdf-writer.d.ts +55 -1
  134. package/dist/types/modules/pdf/core/pdfa.d.ts +62 -0
  135. package/dist/types/modules/pdf/index.d.ts +11 -0
  136. package/dist/types/modules/pdf/reader/bookmark-extractor.d.ts +35 -0
  137. package/dist/types/modules/pdf/reader/pdf-reader.d.ts +17 -0
  138. package/dist/types/modules/pdf/reader/table-extractor.d.ts +69 -0
  139. package/dist/types/modules/pdf/render/layout-engine.d.ts +21 -1
  140. package/dist/types/modules/pdf/render/page-renderer.d.ts +2 -9
  141. package/dist/types/modules/pdf/render/style-converter.d.ts +4 -0
  142. package/dist/types/modules/pdf/types.d.ts +14 -1
  143. package/dist/types/utils/crypto.browser.d.ts +64 -0
  144. package/dist/types/utils/crypto.d.ts +97 -0
  145. package/package.json +110 -111
  146. package/dist/browser/modules/pdf/core/crypto.d.ts +0 -65
  147. package/dist/types/modules/pdf/core/crypto.d.ts +0 -65
@@ -54,7 +54,9 @@ import { reconstructText, reconstructTextLines } from "./text-reconstruction.js"
54
54
  import { extractImagesFromPage } from "./image-extractor.js";
55
55
  import { extractAnnotationsFromPage } from "./annotation-extractor.js";
56
56
  import { extractFormFields } from "./form-extractor.js";
57
+ import { extractBookmarks } from "./bookmark-extractor.js";
57
58
  import { extractMetadata } from "./metadata-reader.js";
59
+ import { extractTables } from "./table-extractor.js";
58
60
  import { PdfStructureError } from "../errors.js";
59
61
  import { yieldToEventLoop } from "../../../utils/utils.base.js";
60
62
  // =============================================================================
@@ -93,7 +95,9 @@ function prepareRead(data, options) {
93
95
  extractImages: options?.extractImages ?? true,
94
96
  extractMetadata: options?.extractMetadata ?? true,
95
97
  extractAnnotations: options?.extractAnnotations ?? true,
96
- extractFormFields: options?.extractFormFields ?? true
98
+ extractFormFields: options?.extractFormFields ?? true,
99
+ extractBookmarks: options?.extractBookmarks ?? true,
100
+ extractTables: options?.extractTables ?? false
97
101
  };
98
102
  const doc = new PdfDocument(data);
99
103
  if (isEncrypted(doc)) {
@@ -150,6 +154,16 @@ function processPage(pageDict, pageIdx, doc, opts) {
150
154
  }
151
155
  }
152
156
  const { width, height } = getPageDimensions(pageDict, doc);
157
+ let tables = [];
158
+ if (opts.extractTables) {
159
+ try {
160
+ tables = extractTables(textFragments, width, height);
161
+ }
162
+ catch (err) {
163
+ const msg = err instanceof Error ? err.message : String(err);
164
+ warnings.push(`Table extraction failed on page ${pageNumber}: ${msg}`);
165
+ }
166
+ }
153
167
  return {
154
168
  pageNumber,
155
169
  text,
@@ -157,6 +171,7 @@ function processPage(pageDict, pageIdx, doc, opts) {
157
171
  textFragments,
158
172
  images,
159
173
  annotations,
174
+ tables,
160
175
  width,
161
176
  height,
162
177
  warnings
@@ -179,7 +194,16 @@ function finalizeRead(pages, totalPageCount, metadata, opts, doc) {
179
194
  // Non-fatal — just return empty
180
195
  }
181
196
  }
182
- return { text: allText, pages, metadata, formFields };
197
+ let bookmarks = [];
198
+ if (opts.extractBookmarks) {
199
+ try {
200
+ bookmarks = extractBookmarks(doc);
201
+ }
202
+ catch {
203
+ // Non-fatal — just return empty
204
+ }
205
+ }
206
+ return { text: allText, pages, metadata, formFields, bookmarks };
183
207
  }
184
208
  // =============================================================================
185
209
  // Helpers
@@ -0,0 +1,365 @@
1
+ /**
2
+ * Table extraction from PDF pages using text fragment positioning.
3
+ *
4
+ * Detects tabular structures by analyzing the spatial layout of text fragments.
5
+ * Since PDF content streams typically render tables as positioned text (with or
6
+ * without drawn grid lines), this module uses a text-only heuristic:
7
+ *
8
+ * 1. Group fragments into lines by Y proximity
9
+ * 2. Detect column boundaries from consistent X-position clusters
10
+ * 3. Identify contiguous blocks of multi-column lines as tables
11
+ * 4. Map fragments to cells based on column/line membership
12
+ *
13
+ * @see content-interpreter.ts for TextFragment extraction
14
+ * @see text-reconstruction.ts for line grouping logic
15
+ */
16
+ // =============================================================================
17
+ // Constants
18
+ // =============================================================================
19
+ /**
20
+ * Minimum number of columns required to consider a block of lines as a table.
21
+ */
22
+ const MIN_TABLE_COLUMNS = 2;
23
+ /**
24
+ * Minimum number of consecutive multi-column lines to form a table.
25
+ */
26
+ const MIN_TABLE_ROWS = 2;
27
+ // =============================================================================
28
+ // Public API
29
+ // =============================================================================
30
+ /**
31
+ * Extract tables from a page's text fragments.
32
+ *
33
+ * Uses text positioning heuristics to detect tabular structures without
34
+ * relying on drawn lines or grid paths.
35
+ *
36
+ * @param fragments - Text fragments from `extractTextFromPage`
37
+ * @param pageWidth - Page width in points
38
+ * @param pageHeight - Page height in points
39
+ * @returns Array of detected tables
40
+ */
41
+ export function extractTables(fragments, pageWidth, pageHeight) {
42
+ if (fragments.length === 0) {
43
+ return [];
44
+ }
45
+ // Filter to horizontal text only (vertical CJK tables are not handled here)
46
+ const horizontal = fragments.filter(f => !f.isVertical && f.text.trim().length > 0);
47
+ if (horizontal.length < MIN_TABLE_COLUMNS * MIN_TABLE_ROWS) {
48
+ return [];
49
+ }
50
+ // Step 1: Group fragments into lines by Y proximity
51
+ const lines = groupFragmentsIntoLines(horizontal);
52
+ if (lines.length < MIN_TABLE_ROWS) {
53
+ return [];
54
+ }
55
+ // Step 2: Detect column boundaries across lines
56
+ const columns = detectColumnBoundaries(lines, pageWidth);
57
+ if (columns.length < MIN_TABLE_COLUMNS) {
58
+ return [];
59
+ }
60
+ // Step 3: Identify contiguous runs of lines that form tables
61
+ const tableRanges = findTableRanges(lines, columns);
62
+ // Step 4: Build table structures
63
+ const tables = [];
64
+ for (const range of tableRanges) {
65
+ const table = buildTable(lines, columns, range.start, range.end, pageHeight);
66
+ if (table) {
67
+ tables.push(table);
68
+ }
69
+ }
70
+ return tables;
71
+ }
72
+ // =============================================================================
73
+ // Step 1: Group Fragments into Lines
74
+ // =============================================================================
75
+ /**
76
+ * Group text fragments into horizontal lines based on Y proximity.
77
+ * Returns lines sorted top-to-bottom (descending Y in PDF coordinates).
78
+ */
79
+ function groupFragmentsIntoLines(fragments) {
80
+ // Sort fragments top-to-bottom, then left-to-right
81
+ const sorted = [...fragments].sort((a, b) => {
82
+ const dy = b.y - a.y;
83
+ if (Math.abs(dy) > 1) {
84
+ return dy;
85
+ }
86
+ return a.x - b.x;
87
+ });
88
+ const lines = [];
89
+ let currentFragments = [sorted[0]];
90
+ let currentY = sorted[0].y;
91
+ for (let i = 1; i < sorted.length; i++) {
92
+ const f = sorted[i];
93
+ const avgFontSize = (currentFragments[0].fontSize + f.fontSize) / 2;
94
+ const threshold = Math.max(avgFontSize * 0.4, 2);
95
+ if (Math.abs(f.y - currentY) <= threshold) {
96
+ currentFragments.push(f);
97
+ }
98
+ else {
99
+ // Finalize previous line
100
+ currentFragments.sort((a, b) => a.x - b.x);
101
+ lines.push({
102
+ y: currentY,
103
+ fragments: currentFragments,
104
+ fontSize: currentFragments[0].fontSize
105
+ });
106
+ currentFragments = [f];
107
+ currentY = f.y;
108
+ }
109
+ }
110
+ // Finalize last line
111
+ if (currentFragments.length > 0) {
112
+ currentFragments.sort((a, b) => a.x - b.x);
113
+ lines.push({
114
+ y: currentY,
115
+ fragments: currentFragments,
116
+ fontSize: currentFragments[0].fontSize
117
+ });
118
+ }
119
+ return lines;
120
+ }
121
+ // =============================================================================
122
+ // Step 2: Detect Column Boundaries
123
+ // =============================================================================
124
+ /**
125
+ * Detect column boundaries by analyzing fragment positions across lines.
126
+ *
127
+ * Algorithm:
128
+ * 1. For each line, compute gaps between consecutive fragments
129
+ * 2. Find vertical divider positions that consistently fall in gaps across lines
130
+ * 3. When gaps don't perfectly align (e.g. right-aligned numbers), use
131
+ * fragment start positions to infer column boundaries
132
+ *
133
+ * This approach handles mixed alignment (left-aligned text headers with
134
+ * right-aligned numeric data) by looking at both gaps and start positions.
135
+ */
136
+ function detectColumnBoundaries(lines, _pageWidth) {
137
+ // Strategy: for each pair of lines, find the set of divider positions
138
+ // that would produce the same column count. Then pick the most common
139
+ // column count and find divider positions that work best.
140
+ // Step A: Determine the most common fragment count per line
141
+ const fragCounts = lines.map(l => l.fragments.length);
142
+ const countFreq = new Map();
143
+ for (const c of fragCounts) {
144
+ if (c >= MIN_TABLE_COLUMNS) {
145
+ countFreq.set(c, (countFreq.get(c) ?? 0) + 1);
146
+ }
147
+ }
148
+ if (countFreq.size === 0) {
149
+ return [];
150
+ }
151
+ // Find the most common fragment count (the "expected" number of columns)
152
+ let bestCount = 0;
153
+ let bestFreq = 0;
154
+ for (const [count, freq] of countFreq) {
155
+ if (freq > bestFreq || (freq === bestFreq && count > bestCount)) {
156
+ bestCount = count;
157
+ bestFreq = freq;
158
+ }
159
+ }
160
+ if (bestCount < MIN_TABLE_COLUMNS || bestFreq < MIN_TABLE_ROWS) {
161
+ return [];
162
+ }
163
+ // Step B: From lines with the expected fragment count, extract divider positions.
164
+ // For each such line, dividers are placed between consecutive fragments.
165
+ // Divider position = midpoint between fragment[i].rightEdge and fragment[i+1].x
166
+ // (or just the gap midpoint if there's a real gap; if they overlap, use the start
167
+ // of the next fragment).
168
+ const linesWithExpectedCount = lines.filter(l => l.fragments.length === bestCount);
169
+ // Collect divider positions for each gap index (0..bestCount-2)
170
+ // For each gap between column i and column i+1, we need a divider that:
171
+ // - Is to the right of all fragment[i] right-edges (across all lines)
172
+ // - Is to the left of all fragment[i+1] left-edges (across all lines)
173
+ // We compute the max right-edge of fragment[i] and min left-edge of fragment[i+1]
174
+ // across all matching lines, then place the divider at the midpoint.
175
+ const maxRightByIndex = Array.from({ length: bestCount - 1 }, () => -Infinity);
176
+ const minLeftByIndex = Array.from({ length: bestCount - 1 }, () => Infinity);
177
+ for (const line of linesWithExpectedCount) {
178
+ const frags = line.fragments;
179
+ for (let i = 0; i + 1 < frags.length; i++) {
180
+ const rightEdge = frags[i].x + frags[i].width;
181
+ const nextStart = frags[i + 1].x;
182
+ maxRightByIndex[i] = Math.max(maxRightByIndex[i], rightEdge);
183
+ minLeftByIndex[i] = Math.min(minLeftByIndex[i], nextStart);
184
+ }
185
+ }
186
+ // Place each divider between the max right of column i and min left of column i+1
187
+ const medianDividers = [];
188
+ for (let i = 0; i < bestCount - 1; i++) {
189
+ const maxRight = maxRightByIndex[i];
190
+ const minLeft = minLeftByIndex[i];
191
+ if (minLeft > maxRight) {
192
+ // Clean gap — place divider at midpoint
193
+ medianDividers.push((maxRight + minLeft) / 2);
194
+ }
195
+ else {
196
+ // Overlap — place divider at the left-edge of the next column's fragment
197
+ // (this handles right-aligned numbers that extend into the next column's space)
198
+ medianDividers.push(minLeft);
199
+ }
200
+ }
201
+ if (medianDividers.length < 1) {
202
+ return [];
203
+ }
204
+ // Step C: Build column boundaries from dividers
205
+ let globalLeft = Infinity;
206
+ let globalRight = -Infinity;
207
+ for (const line of lines) {
208
+ for (const f of line.fragments) {
209
+ globalLeft = Math.min(globalLeft, f.x);
210
+ globalRight = Math.max(globalRight, f.x + f.width);
211
+ }
212
+ }
213
+ const columns = [];
214
+ let prevRight = globalLeft;
215
+ for (const divider of medianDividers) {
216
+ columns.push({ left: prevRight, right: divider });
217
+ prevRight = divider;
218
+ }
219
+ columns.push({ left: prevRight, right: globalRight });
220
+ return columns.length >= MIN_TABLE_COLUMNS ? columns : [];
221
+ }
222
+ // =============================================================================
223
+ // Step 3: Find Contiguous Table Ranges
224
+ // =============================================================================
225
+ /**
226
+ * Identify contiguous runs of lines where most columns have content.
227
+ * Returns ranges of line indices that form table blocks.
228
+ */
229
+ function findTableRanges(lines, columns) {
230
+ // For each line, count how many columns contain at least one fragment
231
+ const lineColumnCounts = [];
232
+ for (const line of lines) {
233
+ const occupiedColumns = new Set();
234
+ for (const f of line.fragments) {
235
+ const colIdx = findColumnIndex(f.x, columns);
236
+ if (colIdx >= 0) {
237
+ occupiedColumns.add(colIdx);
238
+ }
239
+ }
240
+ lineColumnCounts.push(occupiedColumns.size);
241
+ }
242
+ // A line is "tabular" if it has fragments in at least 2 columns
243
+ const ranges = [];
244
+ let rangeStart = -1;
245
+ for (let i = 0; i < lineColumnCounts.length; i++) {
246
+ const isTabular = lineColumnCounts[i] >= MIN_TABLE_COLUMNS;
247
+ if (isTabular && rangeStart === -1) {
248
+ rangeStart = i;
249
+ }
250
+ else if (!isTabular && rangeStart !== -1) {
251
+ if (i - rangeStart >= MIN_TABLE_ROWS) {
252
+ ranges.push({ start: rangeStart, end: i - 1 });
253
+ }
254
+ rangeStart = -1;
255
+ }
256
+ }
257
+ // Close any open range
258
+ if (rangeStart !== -1 && lines.length - rangeStart >= MIN_TABLE_ROWS) {
259
+ ranges.push({ start: rangeStart, end: lines.length - 1 });
260
+ }
261
+ return ranges;
262
+ }
263
+ /**
264
+ * Find which column a given X position belongs to.
265
+ * Returns -1 if the position doesn't fall within any column.
266
+ */
267
+ function findColumnIndex(x, columns) {
268
+ for (let i = columns.length - 1; i >= 0; i--) {
269
+ if (x >= columns[i].left - 1) {
270
+ return i;
271
+ }
272
+ }
273
+ return -1;
274
+ }
275
+ // =============================================================================
276
+ // Step 4: Build Table Structure
277
+ // =============================================================================
278
+ /**
279
+ * Build a PdfTable from a range of lines and column boundaries.
280
+ */
281
+ function buildTable(lines, columns, startLine, endLine, _pageHeight) {
282
+ const rows = [];
283
+ for (let li = startLine; li <= endLine; li++) {
284
+ const line = lines[li];
285
+ const row = buildRow(line, columns, lines, li, startLine, endLine);
286
+ rows.push(row);
287
+ }
288
+ if (rows.length === 0) {
289
+ return null;
290
+ }
291
+ // Calculate table bounding box
292
+ const tableLines = lines.slice(startLine, endLine + 1);
293
+ const topY = tableLines[0].y;
294
+ const bottomY = tableLines[tableLines.length - 1].y;
295
+ const bottomFontSize = tableLines[tableLines.length - 1].fontSize;
296
+ const allX = [];
297
+ const allRightEdges = [];
298
+ for (const line of tableLines) {
299
+ for (const f of line.fragments) {
300
+ allX.push(f.x);
301
+ allRightEdges.push(f.x + f.width);
302
+ }
303
+ }
304
+ const tableX = allX.length > 0 ? Math.min(...allX) : 0;
305
+ const tableRight = allRightEdges.length > 0 ? Math.max(...allRightEdges) : 0;
306
+ const tableWidth = tableRight - tableX;
307
+ // Height: from top of first line to bottom of last line (including font height)
308
+ const tableHeight = topY - bottomY + bottomFontSize;
309
+ return {
310
+ rows,
311
+ x: tableX,
312
+ y: topY,
313
+ width: tableWidth,
314
+ height: tableHeight
315
+ };
316
+ }
317
+ /**
318
+ * Build a single table row by mapping fragments to columns.
319
+ */
320
+ function buildRow(line, columns, _allLines, _lineIdx, _startLine, _endLine) {
321
+ // Group fragments by column
322
+ const columnFragments = new Map();
323
+ for (const f of line.fragments) {
324
+ const colIdx = findColumnIndex(f.x, columns);
325
+ if (colIdx >= 0) {
326
+ const existing = columnFragments.get(colIdx) ?? [];
327
+ existing.push(f);
328
+ columnFragments.set(colIdx, existing);
329
+ }
330
+ }
331
+ // Build cells for each column
332
+ const cells = [];
333
+ for (let colIdx = 0; colIdx < columns.length; colIdx++) {
334
+ const col = columns[colIdx];
335
+ const frags = columnFragments.get(colIdx);
336
+ if (frags && frags.length > 0) {
337
+ // Concatenate text from all fragments in this cell
338
+ const text = frags.map(f => f.text).join(" ");
339
+ const cellX = frags[0].x;
340
+ const cellY = line.y;
341
+ const lastFrag = frags[frags.length - 1];
342
+ const cellRight = lastFrag.x + lastFrag.width;
343
+ const cellWidth = cellRight - cellX;
344
+ const cellHeight = line.fontSize;
345
+ cells.push({
346
+ text: text.trim(),
347
+ x: cellX,
348
+ y: cellY,
349
+ width: cellWidth,
350
+ height: cellHeight
351
+ });
352
+ }
353
+ else {
354
+ // Empty cell
355
+ cells.push({
356
+ text: "",
357
+ x: col.left,
358
+ y: line.y,
359
+ width: col.right - col.left,
360
+ height: line.fontSize
361
+ });
362
+ }
363
+ }
364
+ return { cells };
365
+ }
@@ -18,7 +18,7 @@
18
18
  */
19
19
  import { PdfCellType } from "../types.js";
20
20
  import { resolvePdfFontName } from "../font/font-manager.js";
21
- import { extractFontProperties, excelFillToPdfColor, excelBordersToPdf, excelHAlignToPdf, excelVAlignToPdf } from "./style-converter.js";
21
+ import { extractFontProperties, excelFillToPdfColor, excelBordersToPdf, excelHAlignToPdf, excelVAlignToPdf, borderStyleToLineWidth } from "./style-converter.js";
22
22
  import { wrapTextLines } from "./page-renderer.js";
23
23
  import { CELL_PADDING_H, CELL_PADDING_V, LINE_HEIGHT_FACTOR, INDENT_WIDTH, MAX_DIGIT_WIDTH_PX, EXCEL_COLUMN_PADDING_PX, PX_TO_PT } from "./constants.js";
24
24
  import { yieldToEventLoop } from "../../../utils/utils.base.js";
@@ -240,6 +240,9 @@ function buildPageLayout(ctx, rowPage, colGroup, currentPageCount, sheet, option
240
240
  cellGrid.set(`${ri}:${gci}`, layoutCell);
241
241
  }
242
242
  }
243
+ // Resolve shared borders: on each shared edge between adjacent cells, keep
244
+ // only the winning border for drawing but preserve insets for both cells.
245
+ resolveSharedBorders(cellGrid, rowPage.length, colGroup.length);
243
246
  // Compute text overflow widths for non-wrapped cells
244
247
  computeTextOverflows(cellGrid, rowPage, colGroup, visibleRows, visibleCols, groupColWidths, mergeMap, fontManager);
245
248
  return {
@@ -398,7 +401,16 @@ function computeRowHeights(sheet, scaleFactor, printRange, fontManager, options)
398
401
  const fontSize = getCellFontSize(cell);
399
402
  const wrapLineCount = countWrapLines(cell, fontSize, scaleFactor, sheet, fontManager, options);
400
403
  const lineHeight = fontSize * LINE_HEIGHT_FACTOR;
401
- const neededHeight = fontSize + (wrapLineCount - 1) * lineHeight + CELL_PADDING_V * 2;
404
+ // Account for border width: half of each border extends inward
405
+ const borderTop = cell.style?.border?.top?.style
406
+ ? borderStyleToLineWidth(cell.style.border.top.style) / 2
407
+ : 0;
408
+ const borderBottom = cell.style?.border?.bottom?.style
409
+ ? borderStyleToLineWidth(cell.style.border.bottom.style) / 2
410
+ : 0;
411
+ const neededHeight = fontSize +
412
+ (wrapLineCount - 1) * lineHeight +
413
+ (CELL_PADDING_V + borderTop + borderBottom) * 2;
402
414
  if (neededHeight > height) {
403
415
  height = neededHeight;
404
416
  }
@@ -443,7 +455,13 @@ function countWrapLines(cell, fontSize, scaleFactor, sheet, fontManager, options
443
455
  const colWidth = col?.width ?? DEFAULT_COLUMN_WIDTH;
444
456
  const scaledColPts = (colWidth * MAX_DIGIT_WIDTH_PX + EXCEL_COLUMN_PADDING_PX) * PX_TO_PT * scaleFactor;
445
457
  const indent = cell.style.alignment.indent ?? 0;
446
- const padding = CELL_PADDING_H * 2 + indent * INDENT_WIDTH;
458
+ const borderLeft = cell.style?.border?.left?.style
459
+ ? borderStyleToLineWidth(cell.style.border.left.style) / 2
460
+ : 0;
461
+ const borderRight = cell.style?.border?.right?.style
462
+ ? borderStyleToLineWidth(cell.style.border.right.style) / 2
463
+ : 0;
464
+ const padding = CELL_PADDING_H + borderLeft + (CELL_PADDING_H + borderRight) + indent * INDENT_WIDTH;
447
465
  const effectiveWidth = Math.max(scaledColPts - padding, 1);
448
466
  const scaledFontSize = fontSize * scaleFactor;
449
467
  const fontProps = extractFontProperties(cell.style.font, options.defaultFontFamily, options.defaultFontSize);
@@ -637,6 +655,7 @@ function buildLayoutCell(cell, x, y, width, height, colSpan, rowSpan, options, f
637
655
  }
638
656
  // Rich text runs
639
657
  const richText = buildRichTextRuns(cell, options, fontManager, scaleFactor);
658
+ const borders = excelBordersToPdf(style.border);
640
659
  return {
641
660
  text,
642
661
  rect: { x, y, width, height },
@@ -651,7 +670,13 @@ function buildLayoutCell(cell, x, y, width, height, colSpan, rowSpan, options, f
651
670
  horizontalAlign: resolveHorizontalAlign(style.alignment, cell?.type, cell?.result),
652
671
  verticalAlign: excelVAlignToPdf(style.alignment),
653
672
  wrapText: style.alignment?.wrapText ?? false,
654
- borders: excelBordersToPdf(style.border),
673
+ borders,
674
+ borderInsets: {
675
+ top: (borders.top?.width ?? 0) / 2,
676
+ right: (borders.right?.width ?? 0) / 2,
677
+ bottom: (borders.bottom?.width ?? 0) / 2,
678
+ left: (borders.left?.width ?? 0) / 2
679
+ },
655
680
  colSpan,
656
681
  rowSpan,
657
682
  hyperlink: cell?.hyperlink ?? null,
@@ -662,6 +687,84 @@ function buildLayoutCell(cell, x, y, width, height, colSpan, rowSpan, options, f
662
687
  };
663
688
  }
664
689
  // =============================================================================
690
+ // Shared-Edge Border Resolution
691
+ // =============================================================================
692
+ /**
693
+ * Border precedence weight.
694
+ *
695
+ * When two adjacent cells both declare a border on a shared edge the winning
696
+ * border is chosen by: 1. thicker wins, 2. solid beats dashed,
697
+ * 3. double beats single, 4. darker colour wins (tie-break).
698
+ *
699
+ * Returns a numeric score – higher score wins.
700
+ */
701
+ export function borderPrecedence(b) {
702
+ let score = b.width * 1000; // width dominates
703
+ if (b.dashPattern.length === 0) {
704
+ score += 100; // solid beats dashed
705
+ }
706
+ if (b.isDouble) {
707
+ score += 50; // double beats single
708
+ }
709
+ // Darker colour = lower sum of RGB → higher score
710
+ const brightness = b.color.r + b.color.g + b.color.b;
711
+ score += (3 - brightness) * 10; // max RGB sum = 3 → adds up to 30
712
+ return score;
713
+ }
714
+ /**
715
+ * Resolve shared borders between adjacent cells.
716
+ *
717
+ * For each shared edge, determine the winning border (by precedence), then:
718
+ * - The cell that "owns" the winning border keeps it in `borders` for drawing.
719
+ * - The losing cell has that border side set to `null` (it won't draw).
720
+ * - Both cells' `borderInsets` are updated to reflect the winning border's
721
+ * half-width, so text padding accounts for the line that is actually there.
722
+ */
723
+ export function resolveSharedBorders(cellGrid, rowCount, colCount) {
724
+ for (let ri = 0; ri < rowCount; ri++) {
725
+ for (let gci = 0; gci < colCount; gci++) {
726
+ const cell = cellGrid.get(`${ri}:${gci}`);
727
+ if (!cell) {
728
+ continue;
729
+ }
730
+ // Horizontal shared edge: this cell's right border vs right neighbour's left
731
+ if (cell.borders.right) {
732
+ const rightNeighbor = cellGrid.get(`${ri}:${gci + 1}`);
733
+ if (rightNeighbor?.borders.left) {
734
+ const myScore = borderPrecedence(cell.borders.right);
735
+ const theirScore = borderPrecedence(rightNeighbor.borders.left);
736
+ if (theirScore > myScore) {
737
+ // Neighbour wins — this cell stops drawing, but its inset = winner's half-width
738
+ cell.borderInsets.right = rightNeighbor.borders.left.width / 2;
739
+ cell.borders.right = null;
740
+ }
741
+ else {
742
+ // This cell wins (or tie) — neighbour stops drawing
743
+ rightNeighbor.borderInsets.left = cell.borders.right.width / 2;
744
+ rightNeighbor.borders.left = null;
745
+ }
746
+ }
747
+ }
748
+ // Vertical shared edge: this cell's bottom border vs below neighbour's top
749
+ if (cell.borders.bottom) {
750
+ const belowNeighbor = cellGrid.get(`${ri + 1}:${gci}`);
751
+ if (belowNeighbor?.borders.top) {
752
+ const myScore = borderPrecedence(cell.borders.bottom);
753
+ const theirScore = borderPrecedence(belowNeighbor.borders.top);
754
+ if (theirScore > myScore) {
755
+ cell.borderInsets.bottom = belowNeighbor.borders.top.width / 2;
756
+ cell.borders.bottom = null;
757
+ }
758
+ else {
759
+ belowNeighbor.borderInsets.top = cell.borders.bottom.width / 2;
760
+ belowNeighbor.borders.top = null;
761
+ }
762
+ }
763
+ }
764
+ }
765
+ }
766
+ }
767
+ // =============================================================================
665
768
  // Image Placement
666
769
  // =============================================================================
667
770
  /**
@@ -742,6 +845,7 @@ function propagateMergeBorders(layoutCell, mergeInfo, wsRowNumber, wsColNumber,
742
845
  const converted = excelBordersToPdf({ right: rightCellData.style.border.right });
743
846
  if (converted.right) {
744
847
  layoutCell.borders.right = converted.right;
848
+ layoutCell.borderInsets.right = converted.right.width / 2;
745
849
  }
746
850
  }
747
851
  }
@@ -752,6 +856,7 @@ function propagateMergeBorders(layoutCell, mergeInfo, wsRowNumber, wsColNumber,
752
856
  const converted = excelBordersToPdf({ bottom: bottomCellData.style.border.bottom });
753
857
  if (converted.bottom) {
754
858
  layoutCell.borders.bottom = converted.bottom;
859
+ layoutCell.borderInsets.bottom = converted.bottom.width / 2;
755
860
  }
756
861
  }
757
862
  }
@@ -781,7 +886,9 @@ function computeTextOverflows(cellGrid, rowPage, colGroup, visibleRows, visibleC
781
886
  ? fontManager.getEmbeddedResourceName()
782
887
  : fontManager.ensureFont(resolvePdfFontName(cell.fontFamily, cell.bold, cell.italic));
783
888
  const textWidth = fontManager.measureText(cell.text, resourceName, cell.fontSize);
784
- const cellContentWidth = cell.rect.width - CELL_PADDING_H * 2;
889
+ const cellContentWidth = cell.rect.width -
890
+ (CELL_PADDING_H + cell.borderInsets.left) -
891
+ (CELL_PADDING_H + cell.borderInsets.right);
785
892
  if (textWidth <= cellContentWidth) {
786
893
  continue;
787
894
  }