@cj-tech-master/excelts 9.1.0 → 9.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -1
- package/dist/browser/modules/archive/compression/crc32.js +1 -1
- package/dist/browser/modules/archive/crypto/aes.d.ts +0 -8
- package/dist/browser/modules/archive/crypto/aes.js +1 -20
- package/dist/browser/modules/archive/crypto/index.d.ts +2 -1
- package/dist/browser/modules/archive/crypto/index.js +3 -1
- package/dist/browser/modules/csv/parse/row-processor.d.ts +1 -1
- package/dist/browser/modules/csv/worker/worker-script.generated.js +1 -1
- package/dist/browser/modules/excel/utils/cell-matrix.js +1 -0
- package/dist/browser/modules/excel/utils/encryptor.browser.d.ts +4 -5
- package/dist/browser/modules/excel/utils/encryptor.browser.js +7 -12
- package/dist/browser/modules/excel/utils/encryptor.d.ts +1 -1
- package/dist/browser/modules/excel/utils/encryptor.js +4 -7
- package/dist/browser/modules/pdf/builder/document-builder.d.ts +517 -0
- package/dist/browser/modules/pdf/builder/document-builder.js +1493 -0
- package/dist/browser/modules/pdf/builder/form-appearance.d.ts +56 -0
- package/dist/browser/modules/pdf/builder/form-appearance.js +140 -0
- package/dist/browser/modules/pdf/builder/image-utils.d.ts +39 -0
- package/dist/browser/modules/pdf/builder/image-utils.js +129 -0
- package/dist/browser/modules/pdf/builder/pdf-editor.d.ts +230 -0
- package/dist/browser/modules/pdf/builder/pdf-editor.js +1574 -0
- package/dist/browser/modules/pdf/builder/resource-merger.d.ts +41 -0
- package/dist/browser/modules/pdf/builder/resource-merger.js +258 -0
- package/dist/browser/modules/pdf/core/digital-signature.d.ts +109 -0
- package/dist/browser/modules/pdf/core/digital-signature.js +659 -0
- package/dist/browser/modules/pdf/core/encryption.js +8 -7
- package/dist/browser/modules/pdf/core/pdf-object.d.ts +11 -0
- package/dist/browser/modules/pdf/core/pdf-object.js +38 -0
- package/dist/browser/modules/pdf/core/pdf-stream.d.ts +32 -0
- package/dist/browser/modules/pdf/core/pdf-stream.js +66 -0
- package/dist/browser/modules/pdf/core/pdf-writer.d.ts +55 -1
- package/dist/browser/modules/pdf/core/pdf-writer.js +271 -6
- package/dist/browser/modules/pdf/core/pdfa.d.ts +62 -0
- package/dist/browser/modules/pdf/core/pdfa.js +261 -0
- package/dist/browser/modules/pdf/index.d.ts +11 -0
- package/dist/browser/modules/pdf/index.js +9 -0
- package/dist/browser/modules/pdf/reader/bookmark-extractor.d.ts +35 -0
- package/dist/browser/modules/pdf/reader/bookmark-extractor.js +324 -0
- package/dist/browser/modules/pdf/reader/pdf-decrypt.js +6 -5
- package/dist/browser/modules/pdf/reader/pdf-reader.d.ts +17 -0
- package/dist/browser/modules/pdf/reader/pdf-reader.js +26 -2
- package/dist/browser/modules/pdf/reader/table-extractor.d.ts +69 -0
- package/dist/browser/modules/pdf/reader/table-extractor.js +365 -0
- package/dist/browser/modules/pdf/render/layout-engine.d.ts +21 -1
- package/dist/browser/modules/pdf/render/layout-engine.js +112 -5
- package/dist/browser/modules/pdf/render/page-renderer.d.ts +2 -9
- package/dist/browser/modules/pdf/render/page-renderer.js +62 -103
- package/dist/browser/modules/pdf/render/pdf-exporter.js +2 -61
- package/dist/browser/modules/pdf/render/style-converter.d.ts +4 -0
- package/dist/browser/modules/pdf/render/style-converter.js +1 -1
- package/dist/browser/modules/pdf/types.d.ts +14 -1
- package/dist/browser/modules/stream/browser/readable.js +8 -2
- package/dist/browser/utils/crypto.browser.d.ts +64 -0
- package/dist/browser/{modules/pdf/core/crypto.js → utils/crypto.browser.js} +91 -101
- package/dist/browser/utils/crypto.d.ts +97 -0
- package/dist/browser/utils/crypto.js +209 -0
- package/dist/cjs/modules/archive/compression/crc32.js +1 -1
- package/dist/cjs/modules/archive/crypto/aes.js +2 -23
- package/dist/cjs/modules/archive/crypto/index.js +3 -1
- package/dist/cjs/modules/csv/worker/worker-script.generated.js +1 -1
- package/dist/cjs/modules/excel/utils/cell-matrix.js +1 -0
- package/dist/cjs/modules/excel/utils/encryptor.browser.js +7 -12
- package/dist/cjs/modules/excel/utils/encryptor.js +4 -10
- package/dist/cjs/modules/pdf/builder/document-builder.js +1532 -0
- package/dist/cjs/modules/pdf/builder/form-appearance.js +145 -0
- package/dist/cjs/modules/pdf/builder/image-utils.js +135 -0
- package/dist/cjs/modules/pdf/builder/pdf-editor.js +1612 -0
- package/dist/cjs/modules/pdf/builder/resource-merger.js +263 -0
- package/dist/cjs/modules/pdf/core/digital-signature.js +667 -0
- package/dist/cjs/modules/pdf/core/encryption.js +8 -7
- package/dist/cjs/modules/pdf/core/pdf-object.js +38 -0
- package/dist/cjs/modules/pdf/core/pdf-stream.js +66 -0
- package/dist/cjs/modules/pdf/core/pdf-writer.js +272 -6
- package/dist/cjs/modules/pdf/core/pdfa.js +266 -0
- package/dist/cjs/modules/pdf/index.js +19 -1
- package/dist/cjs/modules/pdf/reader/bookmark-extractor.js +327 -0
- package/dist/cjs/modules/pdf/reader/pdf-decrypt.js +6 -5
- package/dist/cjs/modules/pdf/reader/pdf-reader.js +26 -2
- package/dist/cjs/modules/pdf/reader/table-extractor.js +368 -0
- package/dist/cjs/modules/pdf/render/layout-engine.js +113 -4
- package/dist/cjs/modules/pdf/render/page-renderer.js +63 -105
- package/dist/cjs/modules/pdf/render/pdf-exporter.js +3 -62
- package/dist/cjs/modules/pdf/render/style-converter.js +1 -0
- package/dist/cjs/modules/stream/browser/readable.js +8 -2
- package/dist/cjs/{modules/pdf/core/crypto.js → utils/crypto.browser.js} +95 -102
- package/dist/cjs/utils/crypto.js +228 -0
- package/dist/esm/modules/archive/compression/crc32.js +1 -1
- package/dist/esm/modules/archive/crypto/aes.js +1 -20
- package/dist/esm/modules/archive/crypto/index.js +3 -1
- package/dist/esm/modules/csv/worker/worker-script.generated.js +1 -1
- package/dist/esm/modules/excel/utils/cell-matrix.js +1 -0
- package/dist/esm/modules/excel/utils/encryptor.browser.js +7 -12
- package/dist/esm/modules/excel/utils/encryptor.js +4 -7
- package/dist/esm/modules/pdf/builder/document-builder.js +1493 -0
- package/dist/esm/modules/pdf/builder/form-appearance.js +140 -0
- package/dist/esm/modules/pdf/builder/image-utils.js +129 -0
- package/dist/esm/modules/pdf/builder/pdf-editor.js +1574 -0
- package/dist/esm/modules/pdf/builder/resource-merger.js +258 -0
- package/dist/esm/modules/pdf/core/digital-signature.js +659 -0
- package/dist/esm/modules/pdf/core/encryption.js +8 -7
- package/dist/esm/modules/pdf/core/pdf-object.js +38 -0
- package/dist/esm/modules/pdf/core/pdf-stream.js +66 -0
- package/dist/esm/modules/pdf/core/pdf-writer.js +271 -6
- package/dist/esm/modules/pdf/core/pdfa.js +261 -0
- package/dist/esm/modules/pdf/index.js +9 -0
- package/dist/esm/modules/pdf/reader/bookmark-extractor.js +324 -0
- package/dist/esm/modules/pdf/reader/pdf-decrypt.js +6 -5
- package/dist/esm/modules/pdf/reader/pdf-reader.js +26 -2
- package/dist/esm/modules/pdf/reader/table-extractor.js +365 -0
- package/dist/esm/modules/pdf/render/layout-engine.js +112 -5
- package/dist/esm/modules/pdf/render/page-renderer.js +62 -103
- package/dist/esm/modules/pdf/render/pdf-exporter.js +2 -61
- package/dist/esm/modules/pdf/render/style-converter.js +1 -1
- package/dist/esm/modules/stream/browser/readable.js +8 -2
- package/dist/esm/{modules/pdf/core/crypto.js → utils/crypto.browser.js} +91 -101
- package/dist/esm/utils/crypto.js +209 -0
- package/dist/iife/excelts.iife.js +1248 -1074
- package/dist/iife/excelts.iife.js.map +1 -1
- package/dist/iife/excelts.iife.min.js +53 -54
- package/dist/types/modules/archive/crypto/aes.d.ts +0 -8
- package/dist/types/modules/archive/crypto/index.d.ts +2 -1
- package/dist/types/modules/csv/parse/row-processor.d.ts +1 -1
- package/dist/types/modules/excel/utils/encryptor.browser.d.ts +4 -5
- package/dist/types/modules/excel/utils/encryptor.d.ts +1 -1
- package/dist/types/modules/pdf/builder/document-builder.d.ts +517 -0
- package/dist/types/modules/pdf/builder/form-appearance.d.ts +56 -0
- package/dist/types/modules/pdf/builder/image-utils.d.ts +39 -0
- package/dist/types/modules/pdf/builder/pdf-editor.d.ts +230 -0
- package/dist/types/modules/pdf/builder/resource-merger.d.ts +41 -0
- package/dist/types/modules/pdf/core/digital-signature.d.ts +109 -0
- package/dist/types/modules/pdf/core/pdf-object.d.ts +11 -0
- package/dist/types/modules/pdf/core/pdf-stream.d.ts +32 -0
- package/dist/types/modules/pdf/core/pdf-writer.d.ts +55 -1
- package/dist/types/modules/pdf/core/pdfa.d.ts +62 -0
- package/dist/types/modules/pdf/index.d.ts +11 -0
- package/dist/types/modules/pdf/reader/bookmark-extractor.d.ts +35 -0
- package/dist/types/modules/pdf/reader/pdf-reader.d.ts +17 -0
- package/dist/types/modules/pdf/reader/table-extractor.d.ts +69 -0
- package/dist/types/modules/pdf/render/layout-engine.d.ts +21 -1
- package/dist/types/modules/pdf/render/page-renderer.d.ts +2 -9
- package/dist/types/modules/pdf/render/style-converter.d.ts +4 -0
- package/dist/types/modules/pdf/types.d.ts +14 -1
- package/dist/types/utils/crypto.browser.d.ts +64 -0
- package/dist/types/utils/crypto.d.ts +97 -0
- package/package.json +110 -111
- package/dist/browser/modules/pdf/core/crypto.d.ts +0 -65
- package/dist/types/modules/pdf/core/crypto.d.ts +0 -65
|
@@ -57,7 +57,9 @@ const text_reconstruction_1 = require("./text-reconstruction");
|
|
|
57
57
|
const image_extractor_1 = require("./image-extractor");
|
|
58
58
|
const annotation_extractor_1 = require("./annotation-extractor");
|
|
59
59
|
const form_extractor_1 = require("./form-extractor");
|
|
60
|
+
const bookmark_extractor_1 = require("./bookmark-extractor");
|
|
60
61
|
const metadata_reader_1 = require("./metadata-reader");
|
|
62
|
+
const table_extractor_1 = require("./table-extractor");
|
|
61
63
|
const errors_1 = require("../errors");
|
|
62
64
|
const utils_base_1 = require("../../../utils/utils.base.js");
|
|
63
65
|
// =============================================================================
|
|
@@ -96,7 +98,9 @@ function prepareRead(data, options) {
|
|
|
96
98
|
extractImages: options?.extractImages ?? true,
|
|
97
99
|
extractMetadata: options?.extractMetadata ?? true,
|
|
98
100
|
extractAnnotations: options?.extractAnnotations ?? true,
|
|
99
|
-
extractFormFields: options?.extractFormFields ?? true
|
|
101
|
+
extractFormFields: options?.extractFormFields ?? true,
|
|
102
|
+
extractBookmarks: options?.extractBookmarks ?? true,
|
|
103
|
+
extractTables: options?.extractTables ?? false
|
|
100
104
|
};
|
|
101
105
|
const doc = new pdf_document_1.PdfDocument(data);
|
|
102
106
|
if ((0, pdf_decrypt_1.isEncrypted)(doc)) {
|
|
@@ -153,6 +157,16 @@ function processPage(pageDict, pageIdx, doc, opts) {
|
|
|
153
157
|
}
|
|
154
158
|
}
|
|
155
159
|
const { width, height } = getPageDimensions(pageDict, doc);
|
|
160
|
+
let tables = [];
|
|
161
|
+
if (opts.extractTables) {
|
|
162
|
+
try {
|
|
163
|
+
tables = (0, table_extractor_1.extractTables)(textFragments, width, height);
|
|
164
|
+
}
|
|
165
|
+
catch (err) {
|
|
166
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
167
|
+
warnings.push(`Table extraction failed on page ${pageNumber}: ${msg}`);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
156
170
|
return {
|
|
157
171
|
pageNumber,
|
|
158
172
|
text,
|
|
@@ -160,6 +174,7 @@ function processPage(pageDict, pageIdx, doc, opts) {
|
|
|
160
174
|
textFragments,
|
|
161
175
|
images,
|
|
162
176
|
annotations,
|
|
177
|
+
tables,
|
|
163
178
|
width,
|
|
164
179
|
height,
|
|
165
180
|
warnings
|
|
@@ -182,7 +197,16 @@ function finalizeRead(pages, totalPageCount, metadata, opts, doc) {
|
|
|
182
197
|
// Non-fatal — just return empty
|
|
183
198
|
}
|
|
184
199
|
}
|
|
185
|
-
|
|
200
|
+
let bookmarks = [];
|
|
201
|
+
if (opts.extractBookmarks) {
|
|
202
|
+
try {
|
|
203
|
+
bookmarks = (0, bookmark_extractor_1.extractBookmarks)(doc);
|
|
204
|
+
}
|
|
205
|
+
catch {
|
|
206
|
+
// Non-fatal — just return empty
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
return { text: allText, pages, metadata, formFields, bookmarks };
|
|
186
210
|
}
|
|
187
211
|
// =============================================================================
|
|
188
212
|
// Helpers
|
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Table extraction from PDF pages using text fragment positioning.
|
|
4
|
+
*
|
|
5
|
+
* Detects tabular structures by analyzing the spatial layout of text fragments.
|
|
6
|
+
* Since PDF content streams typically render tables as positioned text (with or
|
|
7
|
+
* without drawn grid lines), this module uses a text-only heuristic:
|
|
8
|
+
*
|
|
9
|
+
* 1. Group fragments into lines by Y proximity
|
|
10
|
+
* 2. Detect column boundaries from consistent X-position clusters
|
|
11
|
+
* 3. Identify contiguous blocks of multi-column lines as tables
|
|
12
|
+
* 4. Map fragments to cells based on column/line membership
|
|
13
|
+
*
|
|
14
|
+
* @see content-interpreter.ts for TextFragment extraction
|
|
15
|
+
* @see text-reconstruction.ts for line grouping logic
|
|
16
|
+
*/
|
|
17
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
18
|
+
exports.extractTables = extractTables;
|
|
19
|
+
// =============================================================================
|
|
20
|
+
// Constants
|
|
21
|
+
// =============================================================================
|
|
22
|
+
/**
|
|
23
|
+
* Minimum number of columns required to consider a block of lines as a table.
|
|
24
|
+
*/
|
|
25
|
+
const MIN_TABLE_COLUMNS = 2;
|
|
26
|
+
/**
|
|
27
|
+
* Minimum number of consecutive multi-column lines to form a table.
|
|
28
|
+
*/
|
|
29
|
+
const MIN_TABLE_ROWS = 2;
|
|
30
|
+
// =============================================================================
|
|
31
|
+
// Public API
|
|
32
|
+
// =============================================================================
|
|
33
|
+
/**
|
|
34
|
+
* Extract tables from a page's text fragments.
|
|
35
|
+
*
|
|
36
|
+
* Uses text positioning heuristics to detect tabular structures without
|
|
37
|
+
* relying on drawn lines or grid paths.
|
|
38
|
+
*
|
|
39
|
+
* @param fragments - Text fragments from `extractTextFromPage`
|
|
40
|
+
* @param pageWidth - Page width in points
|
|
41
|
+
* @param pageHeight - Page height in points
|
|
42
|
+
* @returns Array of detected tables
|
|
43
|
+
*/
|
|
44
|
+
function extractTables(fragments, pageWidth, pageHeight) {
|
|
45
|
+
if (fragments.length === 0) {
|
|
46
|
+
return [];
|
|
47
|
+
}
|
|
48
|
+
// Filter to horizontal text only (vertical CJK tables are not handled here)
|
|
49
|
+
const horizontal = fragments.filter(f => !f.isVertical && f.text.trim().length > 0);
|
|
50
|
+
if (horizontal.length < MIN_TABLE_COLUMNS * MIN_TABLE_ROWS) {
|
|
51
|
+
return [];
|
|
52
|
+
}
|
|
53
|
+
// Step 1: Group fragments into lines by Y proximity
|
|
54
|
+
const lines = groupFragmentsIntoLines(horizontal);
|
|
55
|
+
if (lines.length < MIN_TABLE_ROWS) {
|
|
56
|
+
return [];
|
|
57
|
+
}
|
|
58
|
+
// Step 2: Detect column boundaries across lines
|
|
59
|
+
const columns = detectColumnBoundaries(lines, pageWidth);
|
|
60
|
+
if (columns.length < MIN_TABLE_COLUMNS) {
|
|
61
|
+
return [];
|
|
62
|
+
}
|
|
63
|
+
// Step 3: Identify contiguous runs of lines that form tables
|
|
64
|
+
const tableRanges = findTableRanges(lines, columns);
|
|
65
|
+
// Step 4: Build table structures
|
|
66
|
+
const tables = [];
|
|
67
|
+
for (const range of tableRanges) {
|
|
68
|
+
const table = buildTable(lines, columns, range.start, range.end, pageHeight);
|
|
69
|
+
if (table) {
|
|
70
|
+
tables.push(table);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return tables;
|
|
74
|
+
}
|
|
75
|
+
// =============================================================================
|
|
76
|
+
// Step 1: Group Fragments into Lines
|
|
77
|
+
// =============================================================================
|
|
78
|
+
/**
|
|
79
|
+
* Group text fragments into horizontal lines based on Y proximity.
|
|
80
|
+
* Returns lines sorted top-to-bottom (descending Y in PDF coordinates).
|
|
81
|
+
*/
|
|
82
|
+
function groupFragmentsIntoLines(fragments) {
|
|
83
|
+
// Sort fragments top-to-bottom, then left-to-right
|
|
84
|
+
const sorted = [...fragments].sort((a, b) => {
|
|
85
|
+
const dy = b.y - a.y;
|
|
86
|
+
if (Math.abs(dy) > 1) {
|
|
87
|
+
return dy;
|
|
88
|
+
}
|
|
89
|
+
return a.x - b.x;
|
|
90
|
+
});
|
|
91
|
+
const lines = [];
|
|
92
|
+
let currentFragments = [sorted[0]];
|
|
93
|
+
let currentY = sorted[0].y;
|
|
94
|
+
for (let i = 1; i < sorted.length; i++) {
|
|
95
|
+
const f = sorted[i];
|
|
96
|
+
const avgFontSize = (currentFragments[0].fontSize + f.fontSize) / 2;
|
|
97
|
+
const threshold = Math.max(avgFontSize * 0.4, 2);
|
|
98
|
+
if (Math.abs(f.y - currentY) <= threshold) {
|
|
99
|
+
currentFragments.push(f);
|
|
100
|
+
}
|
|
101
|
+
else {
|
|
102
|
+
// Finalize previous line
|
|
103
|
+
currentFragments.sort((a, b) => a.x - b.x);
|
|
104
|
+
lines.push({
|
|
105
|
+
y: currentY,
|
|
106
|
+
fragments: currentFragments,
|
|
107
|
+
fontSize: currentFragments[0].fontSize
|
|
108
|
+
});
|
|
109
|
+
currentFragments = [f];
|
|
110
|
+
currentY = f.y;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
// Finalize last line
|
|
114
|
+
if (currentFragments.length > 0) {
|
|
115
|
+
currentFragments.sort((a, b) => a.x - b.x);
|
|
116
|
+
lines.push({
|
|
117
|
+
y: currentY,
|
|
118
|
+
fragments: currentFragments,
|
|
119
|
+
fontSize: currentFragments[0].fontSize
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
return lines;
|
|
123
|
+
}
|
|
124
|
+
// =============================================================================
|
|
125
|
+
// Step 2: Detect Column Boundaries
|
|
126
|
+
// =============================================================================
|
|
127
|
+
/**
|
|
128
|
+
* Detect column boundaries by analyzing fragment positions across lines.
|
|
129
|
+
*
|
|
130
|
+
* Algorithm:
|
|
131
|
+
* 1. For each line, compute gaps between consecutive fragments
|
|
132
|
+
* 2. Find vertical divider positions that consistently fall in gaps across lines
|
|
133
|
+
* 3. When gaps don't perfectly align (e.g. right-aligned numbers), use
|
|
134
|
+
* fragment start positions to infer column boundaries
|
|
135
|
+
*
|
|
136
|
+
* This approach handles mixed alignment (left-aligned text headers with
|
|
137
|
+
* right-aligned numeric data) by looking at both gaps and start positions.
|
|
138
|
+
*/
|
|
139
|
+
function detectColumnBoundaries(lines, _pageWidth) {
|
|
140
|
+
// Strategy: for each pair of lines, find the set of divider positions
|
|
141
|
+
// that would produce the same column count. Then pick the most common
|
|
142
|
+
// column count and find divider positions that work best.
|
|
143
|
+
// Step A: Determine the most common fragment count per line
|
|
144
|
+
const fragCounts = lines.map(l => l.fragments.length);
|
|
145
|
+
const countFreq = new Map();
|
|
146
|
+
for (const c of fragCounts) {
|
|
147
|
+
if (c >= MIN_TABLE_COLUMNS) {
|
|
148
|
+
countFreq.set(c, (countFreq.get(c) ?? 0) + 1);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
if (countFreq.size === 0) {
|
|
152
|
+
return [];
|
|
153
|
+
}
|
|
154
|
+
// Find the most common fragment count (the "expected" number of columns)
|
|
155
|
+
let bestCount = 0;
|
|
156
|
+
let bestFreq = 0;
|
|
157
|
+
for (const [count, freq] of countFreq) {
|
|
158
|
+
if (freq > bestFreq || (freq === bestFreq && count > bestCount)) {
|
|
159
|
+
bestCount = count;
|
|
160
|
+
bestFreq = freq;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
if (bestCount < MIN_TABLE_COLUMNS || bestFreq < MIN_TABLE_ROWS) {
|
|
164
|
+
return [];
|
|
165
|
+
}
|
|
166
|
+
// Step B: From lines with the expected fragment count, extract divider positions.
|
|
167
|
+
// For each such line, dividers are placed between consecutive fragments.
|
|
168
|
+
// Divider position = midpoint between fragment[i].rightEdge and fragment[i+1].x
|
|
169
|
+
// (or just the gap midpoint if there's a real gap; if they overlap, use the start
|
|
170
|
+
// of the next fragment).
|
|
171
|
+
const linesWithExpectedCount = lines.filter(l => l.fragments.length === bestCount);
|
|
172
|
+
// Collect divider positions for each gap index (0..bestCount-2)
|
|
173
|
+
// For each gap between column i and column i+1, we need a divider that:
|
|
174
|
+
// - Is to the right of all fragment[i] right-edges (across all lines)
|
|
175
|
+
// - Is to the left of all fragment[i+1] left-edges (across all lines)
|
|
176
|
+
// We compute the max right-edge of fragment[i] and min left-edge of fragment[i+1]
|
|
177
|
+
// across all matching lines, then place the divider at the midpoint.
|
|
178
|
+
const maxRightByIndex = Array.from({ length: bestCount - 1 }, () => -Infinity);
|
|
179
|
+
const minLeftByIndex = Array.from({ length: bestCount - 1 }, () => Infinity);
|
|
180
|
+
for (const line of linesWithExpectedCount) {
|
|
181
|
+
const frags = line.fragments;
|
|
182
|
+
for (let i = 0; i + 1 < frags.length; i++) {
|
|
183
|
+
const rightEdge = frags[i].x + frags[i].width;
|
|
184
|
+
const nextStart = frags[i + 1].x;
|
|
185
|
+
maxRightByIndex[i] = Math.max(maxRightByIndex[i], rightEdge);
|
|
186
|
+
minLeftByIndex[i] = Math.min(minLeftByIndex[i], nextStart);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
// Place each divider between the max right of column i and min left of column i+1
|
|
190
|
+
const medianDividers = [];
|
|
191
|
+
for (let i = 0; i < bestCount - 1; i++) {
|
|
192
|
+
const maxRight = maxRightByIndex[i];
|
|
193
|
+
const minLeft = minLeftByIndex[i];
|
|
194
|
+
if (minLeft > maxRight) {
|
|
195
|
+
// Clean gap — place divider at midpoint
|
|
196
|
+
medianDividers.push((maxRight + minLeft) / 2);
|
|
197
|
+
}
|
|
198
|
+
else {
|
|
199
|
+
// Overlap — place divider at the left-edge of the next column's fragment
|
|
200
|
+
// (this handles right-aligned numbers that extend into the next column's space)
|
|
201
|
+
medianDividers.push(minLeft);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
if (medianDividers.length < 1) {
|
|
205
|
+
return [];
|
|
206
|
+
}
|
|
207
|
+
// Step C: Build column boundaries from dividers
|
|
208
|
+
let globalLeft = Infinity;
|
|
209
|
+
let globalRight = -Infinity;
|
|
210
|
+
for (const line of lines) {
|
|
211
|
+
for (const f of line.fragments) {
|
|
212
|
+
globalLeft = Math.min(globalLeft, f.x);
|
|
213
|
+
globalRight = Math.max(globalRight, f.x + f.width);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
const columns = [];
|
|
217
|
+
let prevRight = globalLeft;
|
|
218
|
+
for (const divider of medianDividers) {
|
|
219
|
+
columns.push({ left: prevRight, right: divider });
|
|
220
|
+
prevRight = divider;
|
|
221
|
+
}
|
|
222
|
+
columns.push({ left: prevRight, right: globalRight });
|
|
223
|
+
return columns.length >= MIN_TABLE_COLUMNS ? columns : [];
|
|
224
|
+
}
|
|
225
|
+
// =============================================================================
|
|
226
|
+
// Step 3: Find Contiguous Table Ranges
|
|
227
|
+
// =============================================================================
|
|
228
|
+
/**
|
|
229
|
+
* Identify contiguous runs of lines where most columns have content.
|
|
230
|
+
* Returns ranges of line indices that form table blocks.
|
|
231
|
+
*/
|
|
232
|
+
function findTableRanges(lines, columns) {
|
|
233
|
+
// For each line, count how many columns contain at least one fragment
|
|
234
|
+
const lineColumnCounts = [];
|
|
235
|
+
for (const line of lines) {
|
|
236
|
+
const occupiedColumns = new Set();
|
|
237
|
+
for (const f of line.fragments) {
|
|
238
|
+
const colIdx = findColumnIndex(f.x, columns);
|
|
239
|
+
if (colIdx >= 0) {
|
|
240
|
+
occupiedColumns.add(colIdx);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
lineColumnCounts.push(occupiedColumns.size);
|
|
244
|
+
}
|
|
245
|
+
// A line is "tabular" if it has fragments in at least 2 columns
|
|
246
|
+
const ranges = [];
|
|
247
|
+
let rangeStart = -1;
|
|
248
|
+
for (let i = 0; i < lineColumnCounts.length; i++) {
|
|
249
|
+
const isTabular = lineColumnCounts[i] >= MIN_TABLE_COLUMNS;
|
|
250
|
+
if (isTabular && rangeStart === -1) {
|
|
251
|
+
rangeStart = i;
|
|
252
|
+
}
|
|
253
|
+
else if (!isTabular && rangeStart !== -1) {
|
|
254
|
+
if (i - rangeStart >= MIN_TABLE_ROWS) {
|
|
255
|
+
ranges.push({ start: rangeStart, end: i - 1 });
|
|
256
|
+
}
|
|
257
|
+
rangeStart = -1;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
// Close any open range
|
|
261
|
+
if (rangeStart !== -1 && lines.length - rangeStart >= MIN_TABLE_ROWS) {
|
|
262
|
+
ranges.push({ start: rangeStart, end: lines.length - 1 });
|
|
263
|
+
}
|
|
264
|
+
return ranges;
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Find which column a given X position belongs to.
|
|
268
|
+
* Returns -1 if the position doesn't fall within any column.
|
|
269
|
+
*/
|
|
270
|
+
function findColumnIndex(x, columns) {
|
|
271
|
+
for (let i = columns.length - 1; i >= 0; i--) {
|
|
272
|
+
if (x >= columns[i].left - 1) {
|
|
273
|
+
return i;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
return -1;
|
|
277
|
+
}
|
|
278
|
+
// =============================================================================
|
|
279
|
+
// Step 4: Build Table Structure
|
|
280
|
+
// =============================================================================
|
|
281
|
+
/**
|
|
282
|
+
* Build a PdfTable from a range of lines and column boundaries.
|
|
283
|
+
*/
|
|
284
|
+
function buildTable(lines, columns, startLine, endLine, _pageHeight) {
|
|
285
|
+
const rows = [];
|
|
286
|
+
for (let li = startLine; li <= endLine; li++) {
|
|
287
|
+
const line = lines[li];
|
|
288
|
+
const row = buildRow(line, columns, lines, li, startLine, endLine);
|
|
289
|
+
rows.push(row);
|
|
290
|
+
}
|
|
291
|
+
if (rows.length === 0) {
|
|
292
|
+
return null;
|
|
293
|
+
}
|
|
294
|
+
// Calculate table bounding box
|
|
295
|
+
const tableLines = lines.slice(startLine, endLine + 1);
|
|
296
|
+
const topY = tableLines[0].y;
|
|
297
|
+
const bottomY = tableLines[tableLines.length - 1].y;
|
|
298
|
+
const bottomFontSize = tableLines[tableLines.length - 1].fontSize;
|
|
299
|
+
const allX = [];
|
|
300
|
+
const allRightEdges = [];
|
|
301
|
+
for (const line of tableLines) {
|
|
302
|
+
for (const f of line.fragments) {
|
|
303
|
+
allX.push(f.x);
|
|
304
|
+
allRightEdges.push(f.x + f.width);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
const tableX = allX.length > 0 ? Math.min(...allX) : 0;
|
|
308
|
+
const tableRight = allRightEdges.length > 0 ? Math.max(...allRightEdges) : 0;
|
|
309
|
+
const tableWidth = tableRight - tableX;
|
|
310
|
+
// Height: from top of first line to bottom of last line (including font height)
|
|
311
|
+
const tableHeight = topY - bottomY + bottomFontSize;
|
|
312
|
+
return {
|
|
313
|
+
rows,
|
|
314
|
+
x: tableX,
|
|
315
|
+
y: topY,
|
|
316
|
+
width: tableWidth,
|
|
317
|
+
height: tableHeight
|
|
318
|
+
};
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Build a single table row by mapping fragments to columns.
|
|
322
|
+
*/
|
|
323
|
+
function buildRow(line, columns, _allLines, _lineIdx, _startLine, _endLine) {
|
|
324
|
+
// Group fragments by column
|
|
325
|
+
const columnFragments = new Map();
|
|
326
|
+
for (const f of line.fragments) {
|
|
327
|
+
const colIdx = findColumnIndex(f.x, columns);
|
|
328
|
+
if (colIdx >= 0) {
|
|
329
|
+
const existing = columnFragments.get(colIdx) ?? [];
|
|
330
|
+
existing.push(f);
|
|
331
|
+
columnFragments.set(colIdx, existing);
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
// Build cells for each column
|
|
335
|
+
const cells = [];
|
|
336
|
+
for (let colIdx = 0; colIdx < columns.length; colIdx++) {
|
|
337
|
+
const col = columns[colIdx];
|
|
338
|
+
const frags = columnFragments.get(colIdx);
|
|
339
|
+
if (frags && frags.length > 0) {
|
|
340
|
+
// Concatenate text from all fragments in this cell
|
|
341
|
+
const text = frags.map(f => f.text).join(" ");
|
|
342
|
+
const cellX = frags[0].x;
|
|
343
|
+
const cellY = line.y;
|
|
344
|
+
const lastFrag = frags[frags.length - 1];
|
|
345
|
+
const cellRight = lastFrag.x + lastFrag.width;
|
|
346
|
+
const cellWidth = cellRight - cellX;
|
|
347
|
+
const cellHeight = line.fontSize;
|
|
348
|
+
cells.push({
|
|
349
|
+
text: text.trim(),
|
|
350
|
+
x: cellX,
|
|
351
|
+
y: cellY,
|
|
352
|
+
width: cellWidth,
|
|
353
|
+
height: cellHeight
|
|
354
|
+
});
|
|
355
|
+
}
|
|
356
|
+
else {
|
|
357
|
+
// Empty cell
|
|
358
|
+
cells.push({
|
|
359
|
+
text: "",
|
|
360
|
+
x: col.left,
|
|
361
|
+
y: line.y,
|
|
362
|
+
width: col.right - col.left,
|
|
363
|
+
height: line.fontSize
|
|
364
|
+
});
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
return { cells };
|
|
368
|
+
}
|
|
@@ -20,6 +20,8 @@
|
|
|
20
20
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
21
21
|
exports.layoutSheet = layoutSheet;
|
|
22
22
|
exports.paginateRows = paginateRows;
|
|
23
|
+
exports.borderPrecedence = borderPrecedence;
|
|
24
|
+
exports.resolveSharedBorders = resolveSharedBorders;
|
|
23
25
|
const types_1 = require("../types");
|
|
24
26
|
const font_manager_1 = require("../font/font-manager");
|
|
25
27
|
const style_converter_1 = require("./style-converter");
|
|
@@ -244,6 +246,9 @@ function buildPageLayout(ctx, rowPage, colGroup, currentPageCount, sheet, option
|
|
|
244
246
|
cellGrid.set(`${ri}:${gci}`, layoutCell);
|
|
245
247
|
}
|
|
246
248
|
}
|
|
249
|
+
// Resolve shared borders: on each shared edge between adjacent cells, keep
|
|
250
|
+
// only the winning border for drawing but preserve insets for both cells.
|
|
251
|
+
resolveSharedBorders(cellGrid, rowPage.length, colGroup.length);
|
|
247
252
|
// Compute text overflow widths for non-wrapped cells
|
|
248
253
|
computeTextOverflows(cellGrid, rowPage, colGroup, visibleRows, visibleCols, groupColWidths, mergeMap, fontManager);
|
|
249
254
|
return {
|
|
@@ -402,7 +407,16 @@ function computeRowHeights(sheet, scaleFactor, printRange, fontManager, options)
|
|
|
402
407
|
const fontSize = getCellFontSize(cell);
|
|
403
408
|
const wrapLineCount = countWrapLines(cell, fontSize, scaleFactor, sheet, fontManager, options);
|
|
404
409
|
const lineHeight = fontSize * constants_1.LINE_HEIGHT_FACTOR;
|
|
405
|
-
|
|
410
|
+
// Account for border width: half of each border extends inward
|
|
411
|
+
const borderTop = cell.style?.border?.top?.style
|
|
412
|
+
? (0, style_converter_1.borderStyleToLineWidth)(cell.style.border.top.style) / 2
|
|
413
|
+
: 0;
|
|
414
|
+
const borderBottom = cell.style?.border?.bottom?.style
|
|
415
|
+
? (0, style_converter_1.borderStyleToLineWidth)(cell.style.border.bottom.style) / 2
|
|
416
|
+
: 0;
|
|
417
|
+
const neededHeight = fontSize +
|
|
418
|
+
(wrapLineCount - 1) * lineHeight +
|
|
419
|
+
(constants_1.CELL_PADDING_V + borderTop + borderBottom) * 2;
|
|
406
420
|
if (neededHeight > height) {
|
|
407
421
|
height = neededHeight;
|
|
408
422
|
}
|
|
@@ -447,7 +461,13 @@ function countWrapLines(cell, fontSize, scaleFactor, sheet, fontManager, options
|
|
|
447
461
|
const colWidth = col?.width ?? DEFAULT_COLUMN_WIDTH;
|
|
448
462
|
const scaledColPts = (colWidth * constants_1.MAX_DIGIT_WIDTH_PX + constants_1.EXCEL_COLUMN_PADDING_PX) * constants_1.PX_TO_PT * scaleFactor;
|
|
449
463
|
const indent = cell.style.alignment.indent ?? 0;
|
|
450
|
-
const
|
|
464
|
+
const borderLeft = cell.style?.border?.left?.style
|
|
465
|
+
? (0, style_converter_1.borderStyleToLineWidth)(cell.style.border.left.style) / 2
|
|
466
|
+
: 0;
|
|
467
|
+
const borderRight = cell.style?.border?.right?.style
|
|
468
|
+
? (0, style_converter_1.borderStyleToLineWidth)(cell.style.border.right.style) / 2
|
|
469
|
+
: 0;
|
|
470
|
+
const padding = constants_1.CELL_PADDING_H + borderLeft + (constants_1.CELL_PADDING_H + borderRight) + indent * constants_1.INDENT_WIDTH;
|
|
451
471
|
const effectiveWidth = Math.max(scaledColPts - padding, 1);
|
|
452
472
|
const scaledFontSize = fontSize * scaleFactor;
|
|
453
473
|
const fontProps = (0, style_converter_1.extractFontProperties)(cell.style.font, options.defaultFontFamily, options.defaultFontSize);
|
|
@@ -641,6 +661,7 @@ function buildLayoutCell(cell, x, y, width, height, colSpan, rowSpan, options, f
|
|
|
641
661
|
}
|
|
642
662
|
// Rich text runs
|
|
643
663
|
const richText = buildRichTextRuns(cell, options, fontManager, scaleFactor);
|
|
664
|
+
const borders = (0, style_converter_1.excelBordersToPdf)(style.border);
|
|
644
665
|
return {
|
|
645
666
|
text,
|
|
646
667
|
rect: { x, y, width, height },
|
|
@@ -655,7 +676,13 @@ function buildLayoutCell(cell, x, y, width, height, colSpan, rowSpan, options, f
|
|
|
655
676
|
horizontalAlign: resolveHorizontalAlign(style.alignment, cell?.type, cell?.result),
|
|
656
677
|
verticalAlign: (0, style_converter_1.excelVAlignToPdf)(style.alignment),
|
|
657
678
|
wrapText: style.alignment?.wrapText ?? false,
|
|
658
|
-
borders
|
|
679
|
+
borders,
|
|
680
|
+
borderInsets: {
|
|
681
|
+
top: (borders.top?.width ?? 0) / 2,
|
|
682
|
+
right: (borders.right?.width ?? 0) / 2,
|
|
683
|
+
bottom: (borders.bottom?.width ?? 0) / 2,
|
|
684
|
+
left: (borders.left?.width ?? 0) / 2
|
|
685
|
+
},
|
|
659
686
|
colSpan,
|
|
660
687
|
rowSpan,
|
|
661
688
|
hyperlink: cell?.hyperlink ?? null,
|
|
@@ -666,6 +693,84 @@ function buildLayoutCell(cell, x, y, width, height, colSpan, rowSpan, options, f
|
|
|
666
693
|
};
|
|
667
694
|
}
|
|
668
695
|
// =============================================================================
|
|
696
|
+
// Shared-Edge Border Resolution
|
|
697
|
+
// =============================================================================
|
|
698
|
+
/**
|
|
699
|
+
* Border precedence weight.
|
|
700
|
+
*
|
|
701
|
+
* When two adjacent cells both declare a border on a shared edge the winning
|
|
702
|
+
* border is chosen by: 1. thicker wins, 2. solid beats dashed,
|
|
703
|
+
* 3. double beats single, 4. darker colour wins (tie-break).
|
|
704
|
+
*
|
|
705
|
+
* Returns a numeric score – higher score wins.
|
|
706
|
+
*/
|
|
707
|
+
function borderPrecedence(b) {
|
|
708
|
+
let score = b.width * 1000; // width dominates
|
|
709
|
+
if (b.dashPattern.length === 0) {
|
|
710
|
+
score += 100; // solid beats dashed
|
|
711
|
+
}
|
|
712
|
+
if (b.isDouble) {
|
|
713
|
+
score += 50; // double beats single
|
|
714
|
+
}
|
|
715
|
+
// Darker colour = lower sum of RGB → higher score
|
|
716
|
+
const brightness = b.color.r + b.color.g + b.color.b;
|
|
717
|
+
score += (3 - brightness) * 10; // max RGB sum = 3 → adds up to 30
|
|
718
|
+
return score;
|
|
719
|
+
}
|
|
720
|
+
/**
|
|
721
|
+
* Resolve shared borders between adjacent cells.
|
|
722
|
+
*
|
|
723
|
+
* For each shared edge, determine the winning border (by precedence), then:
|
|
724
|
+
* - The cell that "owns" the winning border keeps it in `borders` for drawing.
|
|
725
|
+
* - The losing cell has that border side set to `null` (it won't draw).
|
|
726
|
+
* - Both cells' `borderInsets` are updated to reflect the winning border's
|
|
727
|
+
* half-width, so text padding accounts for the line that is actually there.
|
|
728
|
+
*/
|
|
729
|
+
function resolveSharedBorders(cellGrid, rowCount, colCount) {
|
|
730
|
+
for (let ri = 0; ri < rowCount; ri++) {
|
|
731
|
+
for (let gci = 0; gci < colCount; gci++) {
|
|
732
|
+
const cell = cellGrid.get(`${ri}:${gci}`);
|
|
733
|
+
if (!cell) {
|
|
734
|
+
continue;
|
|
735
|
+
}
|
|
736
|
+
// Horizontal shared edge: this cell's right border vs right neighbour's left
|
|
737
|
+
if (cell.borders.right) {
|
|
738
|
+
const rightNeighbor = cellGrid.get(`${ri}:${gci + 1}`);
|
|
739
|
+
if (rightNeighbor?.borders.left) {
|
|
740
|
+
const myScore = borderPrecedence(cell.borders.right);
|
|
741
|
+
const theirScore = borderPrecedence(rightNeighbor.borders.left);
|
|
742
|
+
if (theirScore > myScore) {
|
|
743
|
+
// Neighbour wins — this cell stops drawing, but its inset = winner's half-width
|
|
744
|
+
cell.borderInsets.right = rightNeighbor.borders.left.width / 2;
|
|
745
|
+
cell.borders.right = null;
|
|
746
|
+
}
|
|
747
|
+
else {
|
|
748
|
+
// This cell wins (or tie) — neighbour stops drawing
|
|
749
|
+
rightNeighbor.borderInsets.left = cell.borders.right.width / 2;
|
|
750
|
+
rightNeighbor.borders.left = null;
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
// Vertical shared edge: this cell's bottom border vs below neighbour's top
|
|
755
|
+
if (cell.borders.bottom) {
|
|
756
|
+
const belowNeighbor = cellGrid.get(`${ri + 1}:${gci}`);
|
|
757
|
+
if (belowNeighbor?.borders.top) {
|
|
758
|
+
const myScore = borderPrecedence(cell.borders.bottom);
|
|
759
|
+
const theirScore = borderPrecedence(belowNeighbor.borders.top);
|
|
760
|
+
if (theirScore > myScore) {
|
|
761
|
+
cell.borderInsets.bottom = belowNeighbor.borders.top.width / 2;
|
|
762
|
+
cell.borders.bottom = null;
|
|
763
|
+
}
|
|
764
|
+
else {
|
|
765
|
+
belowNeighbor.borderInsets.top = cell.borders.bottom.width / 2;
|
|
766
|
+
belowNeighbor.borders.top = null;
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
// =============================================================================
|
|
669
774
|
// Image Placement
|
|
670
775
|
// =============================================================================
|
|
671
776
|
/**
|
|
@@ -746,6 +851,7 @@ function propagateMergeBorders(layoutCell, mergeInfo, wsRowNumber, wsColNumber,
|
|
|
746
851
|
const converted = (0, style_converter_1.excelBordersToPdf)({ right: rightCellData.style.border.right });
|
|
747
852
|
if (converted.right) {
|
|
748
853
|
layoutCell.borders.right = converted.right;
|
|
854
|
+
layoutCell.borderInsets.right = converted.right.width / 2;
|
|
749
855
|
}
|
|
750
856
|
}
|
|
751
857
|
}
|
|
@@ -756,6 +862,7 @@ function propagateMergeBorders(layoutCell, mergeInfo, wsRowNumber, wsColNumber,
|
|
|
756
862
|
const converted = (0, style_converter_1.excelBordersToPdf)({ bottom: bottomCellData.style.border.bottom });
|
|
757
863
|
if (converted.bottom) {
|
|
758
864
|
layoutCell.borders.bottom = converted.bottom;
|
|
865
|
+
layoutCell.borderInsets.bottom = converted.bottom.width / 2;
|
|
759
866
|
}
|
|
760
867
|
}
|
|
761
868
|
}
|
|
@@ -785,7 +892,9 @@ function computeTextOverflows(cellGrid, rowPage, colGroup, visibleRows, visibleC
|
|
|
785
892
|
? fontManager.getEmbeddedResourceName()
|
|
786
893
|
: fontManager.ensureFont((0, font_manager_1.resolvePdfFontName)(cell.fontFamily, cell.bold, cell.italic));
|
|
787
894
|
const textWidth = fontManager.measureText(cell.text, resourceName, cell.fontSize);
|
|
788
|
-
const cellContentWidth = cell.rect.width -
|
|
895
|
+
const cellContentWidth = cell.rect.width -
|
|
896
|
+
(constants_1.CELL_PADDING_H + cell.borderInsets.left) -
|
|
897
|
+
(constants_1.CELL_PADDING_H + cell.borderInsets.right);
|
|
789
898
|
if (textWidth <= cellContentWidth) {
|
|
790
899
|
continue;
|
|
791
900
|
}
|