file2md 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +293 -0
- package/dist/index.d.ts +33 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +153 -0
- package/dist/index.js.map +1 -0
- package/dist/parsers/docx-parser.d.ts +20 -0
- package/dist/parsers/docx-parser.d.ts.map +1 -0
- package/dist/parsers/docx-parser.js +237 -0
- package/dist/parsers/docx-parser.js.map +1 -0
- package/dist/parsers/pdf-parser.d.ts +8 -0
- package/dist/parsers/pdf-parser.d.ts.map +1 -0
- package/dist/parsers/pdf-parser.js +98 -0
- package/dist/parsers/pdf-parser.js.map +1 -0
- package/dist/parsers/pptx-parser.d.ts +21 -0
- package/dist/parsers/pptx-parser.d.ts.map +1 -0
- package/dist/parsers/pptx-parser.js +264 -0
- package/dist/parsers/pptx-parser.js.map +1 -0
- package/dist/parsers/xlsx-parser.d.ts +19 -0
- package/dist/parsers/xlsx-parser.d.ts.map +1 -0
- package/dist/parsers/xlsx-parser.js +267 -0
- package/dist/parsers/xlsx-parser.js.map +1 -0
- package/dist/types/errors.d.ts +52 -0
- package/dist/types/errors.d.ts.map +1 -0
- package/dist/types/errors.js +76 -0
- package/dist/types/errors.js.map +1 -0
- package/dist/types/index.d.ts +5 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +5 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/interfaces.d.ts +228 -0
- package/dist/types/interfaces.d.ts.map +1 -0
- package/dist/types/interfaces.js +10 -0
- package/dist/types/interfaces.js.map +1 -0
- package/dist/utils/chart-extractor.d.ts +44 -0
- package/dist/utils/chart-extractor.d.ts.map +1 -0
- package/dist/utils/chart-extractor.js +258 -0
- package/dist/utils/chart-extractor.js.map +1 -0
- package/dist/utils/image-extractor.d.ts +50 -0
- package/dist/utils/image-extractor.d.ts.map +1 -0
- package/dist/utils/image-extractor.js +136 -0
- package/dist/utils/image-extractor.js.map +1 -0
- package/dist/utils/layout-parser.d.ts +55 -0
- package/dist/utils/layout-parser.d.ts.map +1 -0
- package/dist/utils/layout-parser.js +244 -0
- package/dist/utils/layout-parser.js.map +1 -0
- package/dist/utils/pdf-extractor.d.ts +46 -0
- package/dist/utils/pdf-extractor.d.ts.map +1 -0
- package/dist/utils/pdf-extractor.js +235 -0
- package/dist/utils/pdf-extractor.js.map +1 -0
- package/package.json +70 -0
@@ -0,0 +1,136 @@
|
|
1
|
+
import fs from 'node:fs';
|
2
|
+
import path from 'node:path';
|
3
|
+
import { ImageExtractionError } from '../types/errors.js';
|
4
|
+
export class ImageExtractor {
|
5
|
+
outputDir;
|
6
|
+
imageCounter = 0;
|
7
|
+
extractedImages = new Map();
|
8
|
+
constructor(outputDir = 'images') {
|
9
|
+
this.outputDir = outputDir;
|
10
|
+
// Create images directory if it doesn't exist
|
11
|
+
if (!fs.existsSync(this.outputDir)) {
|
12
|
+
fs.mkdirSync(this.outputDir, { recursive: true });
|
13
|
+
}
|
14
|
+
}
|
15
|
+
/**
|
16
|
+
* Extract images from a ZIP archive (DOCX, XLSX, PPTX)
|
17
|
+
*/
|
18
|
+
async extractImagesFromZip(zip, basePath = '') {
|
19
|
+
const images = [];
|
20
|
+
zip.forEach((relativePath, file) => {
|
21
|
+
// Check for image files in common locations
|
22
|
+
if (this.isImageFile(relativePath)) {
|
23
|
+
images.push({
|
24
|
+
path: relativePath,
|
25
|
+
file: file,
|
26
|
+
basePath: basePath
|
27
|
+
});
|
28
|
+
}
|
29
|
+
});
|
30
|
+
const extractedImages = [];
|
31
|
+
for (const img of images) {
|
32
|
+
try {
|
33
|
+
const imageData = await img.file.async('nodebuffer');
|
34
|
+
const savedPath = await this.saveImage(imageData, img.path, img.basePath);
|
35
|
+
if (savedPath) {
|
36
|
+
extractedImages.push({
|
37
|
+
originalPath: img.path,
|
38
|
+
savedPath: savedPath,
|
39
|
+
basePath: img.basePath,
|
40
|
+
format: this.getImageFormat(img.path),
|
41
|
+
size: imageData.length
|
42
|
+
});
|
43
|
+
}
|
44
|
+
}
|
45
|
+
catch (error) {
|
46
|
+
console.warn(`Failed to extract image ${img.path}:`, error instanceof Error ? error.message : 'Unknown error');
|
47
|
+
}
|
48
|
+
}
|
49
|
+
return extractedImages;
|
50
|
+
}
|
51
|
+
/**
|
52
|
+
* Save an image buffer to disk
|
53
|
+
*/
|
54
|
+
async saveImage(buffer, originalPath, basePath = '') {
|
55
|
+
this.imageCounter++;
|
56
|
+
const ext = path.extname(originalPath) || '.png';
|
57
|
+
const filename = `image_${this.imageCounter}${ext}`;
|
58
|
+
const fullPath = path.join(this.outputDir, filename);
|
59
|
+
try {
|
60
|
+
fs.writeFileSync(fullPath, buffer);
|
61
|
+
// Store mapping for reference lookup
|
62
|
+
const key = basePath + originalPath;
|
63
|
+
this.extractedImages.set(key, filename);
|
64
|
+
return filename;
|
65
|
+
}
|
66
|
+
catch (error) {
|
67
|
+
const message = error instanceof Error ? error.message : 'Unknown error';
|
68
|
+
throw new ImageExtractionError(`Failed to save image ${filename}: ${message}`, error);
|
69
|
+
}
|
70
|
+
}
|
71
|
+
/**
|
72
|
+
* Check if a file path represents an image
|
73
|
+
*/
|
74
|
+
isImageFile(filePath) {
|
75
|
+
const imageExtensions = ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.svg', '.emf', '.wmf'];
|
76
|
+
const ext = path.extname(filePath).toLowerCase();
|
77
|
+
return imageExtensions.includes(ext) ||
|
78
|
+
filePath.includes('/media/') ||
|
79
|
+
filePath.includes('/images/') ||
|
80
|
+
filePath.includes('\\media\\') ||
|
81
|
+
filePath.includes('\\images\\');
|
82
|
+
}
|
83
|
+
/**
|
84
|
+
* Get image format from file extension
|
85
|
+
*/
|
86
|
+
getImageFormat(filePath) {
|
87
|
+
const ext = path.extname(filePath).toLowerCase();
|
88
|
+
return ext.startsWith('.') ? ext.slice(1) : 'unknown';
|
89
|
+
}
|
90
|
+
/**
|
91
|
+
* Get markdown reference for an image by its original path
|
92
|
+
*/
|
93
|
+
getImageReference(originalPath, basePath = '') {
|
94
|
+
const key = basePath + originalPath;
|
95
|
+
const savedFilename = this.extractedImages.get(key);
|
96
|
+
if (savedFilename) {
|
97
|
+
return ``;
|
98
|
+
}
|
99
|
+
return null;
|
100
|
+
}
|
101
|
+
/**
|
102
|
+
* Create markdown image reference
|
103
|
+
*/
|
104
|
+
getImageMarkdown(description = 'Image', imagePath) {
|
105
|
+
if (imagePath) {
|
106
|
+
return ``;
|
107
|
+
}
|
108
|
+
return ``;
|
109
|
+
}
|
110
|
+
/**
|
111
|
+
* Reset the image counter and clear extracted images map
|
112
|
+
*/
|
113
|
+
reset() {
|
114
|
+
this.imageCounter = 0;
|
115
|
+
this.extractedImages.clear();
|
116
|
+
}
|
117
|
+
/**
|
118
|
+
* Get the output directory for images
|
119
|
+
*/
|
120
|
+
get imageDirectory() {
|
121
|
+
return this.outputDir;
|
122
|
+
}
|
123
|
+
/**
|
124
|
+
* Get the current image counter
|
125
|
+
*/
|
126
|
+
get currentImageCount() {
|
127
|
+
return this.imageCounter;
|
128
|
+
}
|
129
|
+
/**
|
130
|
+
* Get all extracted image mappings
|
131
|
+
*/
|
132
|
+
get extractedImageMappings() {
|
133
|
+
return this.extractedImages;
|
134
|
+
}
|
135
|
+
}
|
136
|
+
//# sourceMappingURL=image-extractor.js.map
|
@@ -0,0 +1 @@
|
|
1
|
+
{"version":3,"file":"image-extractor.js","sourceRoot":"","sources":["../../src/utils/image-extractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAK7B,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAE1D,MAAM,OAAO,cAAc;IACR,SAAS,CAAS;IAC3B,YAAY,GAAW,CAAC,CAAC;IAChB,eAAe,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE7D,YAAY,YAAoB,QAAQ;QACtC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAE3B,8CAA8C;QAC9C,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;YACnC,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,oBAAoB,CAAC,GAAU,EAAE,WAAmB,EAAE;QAC1D,MAAM,MAAM,GAIP,EAAE,CAAC;QAER,GAAG,CAAC,OAAO,CAAC,CAAC,YAAY,EAAE,IAAI,EAAE,EAAE;YACjC,4CAA4C;YAC5C,IAAI,IAAI,CAAC,WAAW,CAAC,YAAY,CAAC,EAAE,CAAC;gBACnC,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,YAAY;oBAClB,IAAI,EAAE,IAAI;oBACV,QAAQ,EAAE,QAAQ;iBACnB,CAAC,CAAC;YACL,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,MAAM,eAAe,GAAgB,EAAE,CAAC;QACxC,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;YACzB,IAAI,CAAC;gBACH,MAAM,SAAS,GAAG,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;gBACrD,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,QAAQ,CAAC,CAAC;gBAC1E,IAAI,SAAS,EAAE,CAAC;oBACd,eAAe,CAAC,IAAI,CAAC;wBACnB,YAAY,EAAE,GAAG,CAAC,IAAI;wBACtB,SAAS,EAAE,SAAS;wBACpB,QAAQ,EAAE,GAAG,CAAC,QAAQ;wBACtB,MAAM,EAAE,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,IAAI,CAAC;wBACrC,IAAI,EAAE,SAAS,CAAC,MAAM;qBACvB,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAAC,OAAO,KAAc,EAAE,CAAC;gBACxB,OAAO,CAAC,IAAI,CAAC,2BAA2B,GAAG,CAAC,IAAI,GAAG,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;YACjH,CAAC;QACH,CAAC;QAED,OAAO,eAAe,CAAC;IACzB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS,CAAC,MAAc,EAAE,YAAoB,EAAE,WAAmB,EAAE;QACzE,IAAI,CAAC,YAAY,EAAE,CAAC;QACpB,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,IAAI,MAAM,CAAC;QACjD,MAAM,QAAQ,GAAG,SAAS,IAAI,CAAC,YAAY,GAAG,GAAG,EAAE,CAAC;QACpD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAErD,IAAI,CAAC;YACH,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;YAEnC,qCAAqC;YACrC,MAAM,GAAG,GAAG,QAAQ,GAAG,YAAY,CAAC;YACpC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;YAExC,OAAO,QAAQ,CAAC;QAClB,CAAC;QAAC,OAAO,KAAc,EAAE,CAAC;YACxB,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YACzE,MAAM,IAAI,oBAAoB,CAAC,wBAAwB,QAAQ,KAAK,OAAO,EAAE,EAAE,KAAc,CAAC,CAAC;QACjG,CAAC;IACH,CAAC;IAED;;OAEG;IACH,WAAW,CAAC,QAAgB;QAC1B,MAAM,eAAe,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;QACnG,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;QACjD,OAAO,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC;YAC7B,QAAQ,CAAC,QAAQ,CAAC,SAAS,CAAC;YAC5B,QAAQ,CAAC,QAAQ,CAAC,UAAU,CAAC;YAC7B,QAAQ,CAAC,QAAQ,CAAC,WAAW,CAAC;YAC9B,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;IACzC,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAgB;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;QACjD,OAAO,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IACxD,CAAC;IAED;;OAEG;IACH,iBAAiB,CAAC,YAAoB,EAAE,WAAmB,EAAE;QAC3D,MAAM,GAAG,GAAG,QAAQ,GAAG,YAAY,CAAC;QACpC,MAAM,aAAa,GAAG,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACpD,IAAI,aAAa,EAAE,CAAC;YAClB,OAAO,YAAY,IAAI,CAAC,SAAS,IAAI,aAAa,GAAG,CAAC;QACxD,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,gBAAgB,CAAC,cAAsB,OAAO,EAAE,SAAkB;QAChE,IAAI,SAAS,EAAE,CAAC;YACd,OAAO,KAAK,WAAW,KAAK,IAAI,CAAC,SAAS,IAAI,SAAS,GAAG,CAAC;QAC7D,CAAC;QACD,OAAO,KAAK,WAAW,oBAAoB,CAAC;IAC9C,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;QACtB,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;IAC/B,CAAC;IAED;;OAEG;IACH,IAAI,cAAc;QAChB,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,IAAI,iBAAiB;QACnB,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,IAAI,sBAAsB;QACxB,OAAO,IAAI,CAAC,eAAe,CAAC;IAC9B,CAAC;CACF"}
|
@@ -0,0 +1,55 @@
|
|
1
|
+
import type { TableData, ListData, Position } from '../types/interfaces.js';
|
2
|
+
export interface TableFormatOptions {
|
3
|
+
readonly preserveAlignment?: boolean;
|
4
|
+
readonly showBorders?: boolean;
|
5
|
+
readonly preserveColors?: boolean;
|
6
|
+
}
|
7
|
+
export interface ColumnData {
|
8
|
+
readonly content: string;
|
9
|
+
}
|
10
|
+
export declare class LayoutParser {
|
11
|
+
private tableCounter;
|
12
|
+
/**
|
13
|
+
* Parse an advanced table with merged cells and styling
|
14
|
+
*/
|
15
|
+
parseAdvancedTable(tableData: TableData, options?: TableFormatOptions): string;
|
16
|
+
/**
|
17
|
+
* Parse lists with proper nesting
|
18
|
+
*/
|
19
|
+
parseList(listData: ListData): string;
|
20
|
+
/**
|
21
|
+
* Create text box representation
|
22
|
+
*/
|
23
|
+
createTextBox(content: string, position?: Position): string;
|
24
|
+
/**
|
25
|
+
* Create multi-column layout approximation
|
26
|
+
*/
|
27
|
+
createColumns(columns: readonly ColumnData[]): string;
|
28
|
+
/**
|
29
|
+
* Parse headers and footers
|
30
|
+
*/
|
31
|
+
parseHeaderFooter(content: string, type?: 'header' | 'footer'): string;
|
32
|
+
/**
|
33
|
+
* Create divider/separator
|
34
|
+
*/
|
35
|
+
createDivider(style?: 'simple' | 'thick' | 'dashed' | 'dotted'): string;
|
36
|
+
/**
|
37
|
+
* Calculate relative positioning for layout elements
|
38
|
+
*/
|
39
|
+
calculateRelativePosition<T extends {
|
40
|
+
position?: Position;
|
41
|
+
}>(elements: readonly T[]): T[];
|
42
|
+
/**
|
43
|
+
* Format text with approximate font sizes using headers
|
44
|
+
*/
|
45
|
+
formatWithSize(text: string, fontSize: number | string): string;
|
46
|
+
/**
|
47
|
+
* Reset internal counters
|
48
|
+
*/
|
49
|
+
reset(): void;
|
50
|
+
/**
|
51
|
+
* Get current table counter
|
52
|
+
*/
|
53
|
+
get currentTableCount(): number;
|
54
|
+
}
|
55
|
+
//# sourceMappingURL=layout-parser.d.ts.map
|
@@ -0,0 +1 @@
|
|
1
|
+
{"version":3,"file":"layout-parser.d.ts","sourceRoot":"","sources":["../../src/utils/layout-parser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,SAAS,EAGT,QAAQ,EAER,QAAQ,EAGT,MAAM,wBAAwB,CAAC;AAEhC,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,OAAO,CAAC;IACrC,QAAQ,CAAC,WAAW,CAAC,EAAE,OAAO,CAAC;IAC/B,QAAQ,CAAC,cAAc,CAAC,EAAE,OAAO,CAAC;CACnC;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B;AAED,qBAAa,YAAY;IACvB,OAAO,CAAC,YAAY,CAAa;IAEjC;;OAEG;IACH,kBAAkB,CAAC,SAAS,EAAE,SAAS,EAAE,OAAO,GAAE,kBAAuB,GAAG,MAAM;IAuGlF;;OAEG;IACH,SAAS,CAAC,QAAQ,EAAE,QAAQ,GAAG,MAAM;IA8BrC;;OAEG;IACH,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,QAAQ,GAAG,MAAM;IAmB3D;;OAEG;IACH,aAAa,CAAC,OAAO,EAAE,SAAS,UAAU,EAAE,GAAG,MAAM;IAuCrD;;OAEG;IACH,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,GAAE,QAAQ,GAAG,QAAmB,GAAG,MAAM;IAOhF;;OAEG;IACH,aAAa,CAAC,KAAK,GAAE,QAAQ,GAAG,OAAO,GAAG,QAAQ,GAAG,QAAmB,GAAG,MAAM;IAajF;;OAEG;IACH,yBAAyB,CAAC,CAAC,SAAS;QAAE,QAAQ,CAAC,EAAE,QAAQ,CAAA;KAAE,EAAE,QAAQ,EAAE,SAAS,CAAC,EAAE,GAAG,CAAC,EAAE;IAgBzF;;OAEG;IACH,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM;IAe/D;;OAEG;IACH,KAAK,IAAI,IAAI;IAIb;;OAEG;IACH,IAAI,iBAAiB,IAAI,MAAM,CAE9B;CACF"}
|
@@ -0,0 +1,244 @@
|
|
1
|
+
export class LayoutParser {
|
2
|
+
tableCounter = 0;
|
3
|
+
/**
|
4
|
+
* Parse an advanced table with merged cells and styling
|
5
|
+
*/
|
6
|
+
parseAdvancedTable(tableData, options = {}) {
|
7
|
+
if (!tableData.rows || tableData.rows.length === 0) {
|
8
|
+
return '';
|
9
|
+
}
|
10
|
+
const { preserveAlignment = true, showBorders = true, preserveColors = false } = options;
|
11
|
+
let markdown = '';
|
12
|
+
const rows = tableData.rows;
|
13
|
+
const colCount = Math.max(...rows.map(row => row.cells ? row.cells.length : 0));
|
14
|
+
// Process each row
|
15
|
+
for (let rowIndex = 0; rowIndex < rows.length; rowIndex++) {
|
16
|
+
const row = rows[rowIndex];
|
17
|
+
let rowMarkdown = '|';
|
18
|
+
if (!row.cells)
|
19
|
+
continue;
|
20
|
+
// Process each cell
|
21
|
+
for (let colIndex = 0; colIndex < colCount; colIndex++) {
|
22
|
+
const cell = row.cells[colIndex];
|
23
|
+
if (!cell) {
|
24
|
+
rowMarkdown += ' |';
|
25
|
+
continue;
|
26
|
+
}
|
27
|
+
let cellContent = cell.text || '';
|
28
|
+
// Handle merged cells
|
29
|
+
if (cell.merged) {
|
30
|
+
if (cell.colSpan > 1) {
|
31
|
+
// For horizontal merge, add extra columns
|
32
|
+
cellContent += ' '.repeat(Math.max(0, cell.colSpan - 1) * 3);
|
33
|
+
}
|
34
|
+
// Note: Markdown doesn't support rowspan, so we approximate
|
35
|
+
}
|
36
|
+
// Apply text formatting
|
37
|
+
if (cell.bold)
|
38
|
+
cellContent = `**${cellContent}**`;
|
39
|
+
if (cell.italic)
|
40
|
+
cellContent = `*${cellContent}*`;
|
41
|
+
// Apply alignment (approximate with spaces)
|
42
|
+
if (preserveAlignment && cell.alignment) {
|
43
|
+
const cellWidth = Math.max(cellContent.length, 10);
|
44
|
+
switch (cell.alignment) {
|
45
|
+
case 'center': {
|
46
|
+
const padding = Math.floor((cellWidth - cellContent.length) / 2);
|
47
|
+
cellContent = ' '.repeat(padding) + cellContent + ' '.repeat(padding);
|
48
|
+
break;
|
49
|
+
}
|
50
|
+
case 'right': {
|
51
|
+
cellContent = cellContent.padStart(cellWidth);
|
52
|
+
break;
|
53
|
+
}
|
54
|
+
// 'left' and 'justify' use default formatting
|
55
|
+
}
|
56
|
+
}
|
57
|
+
// Add background color note if enabled
|
58
|
+
if (preserveColors && cell.backgroundColor) {
|
59
|
+
cellContent += ` <!-- bg:${cell.backgroundColor} -->`;
|
60
|
+
}
|
61
|
+
rowMarkdown += ` ${cellContent} |`;
|
62
|
+
}
|
63
|
+
markdown += rowMarkdown + '\n';
|
64
|
+
// Add header separator after first row
|
65
|
+
if (rowIndex === 0) {
|
66
|
+
let separator = '|';
|
67
|
+
for (let i = 0; i < colCount; i++) {
|
68
|
+
const cell = rows[0]?.cells?.[i];
|
69
|
+
let sepContent = ' --- ';
|
70
|
+
// Apply alignment in separator
|
71
|
+
if (preserveAlignment && cell?.alignment) {
|
72
|
+
switch (cell.alignment) {
|
73
|
+
case 'center':
|
74
|
+
sepContent = ':---:';
|
75
|
+
break;
|
76
|
+
case 'right':
|
77
|
+
sepContent = ' ---:';
|
78
|
+
break;
|
79
|
+
case 'left':
|
80
|
+
default:
|
81
|
+
sepContent = ':--- ';
|
82
|
+
break;
|
83
|
+
}
|
84
|
+
}
|
85
|
+
separator += sepContent + '|';
|
86
|
+
}
|
87
|
+
markdown += separator + '\n';
|
88
|
+
}
|
89
|
+
}
|
90
|
+
return markdown;
|
91
|
+
}
|
92
|
+
/**
|
93
|
+
* Parse lists with proper nesting
|
94
|
+
*/
|
95
|
+
parseList(listData) {
|
96
|
+
if (!listData.items || listData.items.length === 0)
|
97
|
+
return '';
|
98
|
+
let markdown = '';
|
99
|
+
const processListItems = (items, level = 0) => {
|
100
|
+
let result = '';
|
101
|
+
for (const item of items) {
|
102
|
+
const indent = ' '.repeat(level);
|
103
|
+
const marker = listData.isOrdered ? '1.' : '-';
|
104
|
+
let itemText = item.text || '';
|
105
|
+
// Apply formatting
|
106
|
+
if (item.bold)
|
107
|
+
itemText = `**${itemText}**`;
|
108
|
+
if (item.italic)
|
109
|
+
itemText = `*${itemText}*`;
|
110
|
+
result += `${indent}${marker} ${itemText}\n`;
|
111
|
+
// Handle nested lists
|
112
|
+
if (item.children && item.children.length > 0) {
|
113
|
+
result += processListItems(item.children, level + 1);
|
114
|
+
}
|
115
|
+
}
|
116
|
+
return result;
|
117
|
+
};
|
118
|
+
return processListItems(listData.items);
|
119
|
+
}
|
120
|
+
/**
|
121
|
+
* Create text box representation
|
122
|
+
*/
|
123
|
+
createTextBox(content, position) {
|
124
|
+
let markdown = '';
|
125
|
+
if (position && (position.x || position.y)) {
|
126
|
+
markdown += `<!-- Position: x=${position.x || 0}, y=${position.y || 0} -->\n`;
|
127
|
+
}
|
128
|
+
markdown += '> **Text Box**\n';
|
129
|
+
markdown += '> \n';
|
130
|
+
// Split content into lines and add blockquote formatting
|
131
|
+
const lines = content.split('\n');
|
132
|
+
for (const line of lines) {
|
133
|
+
markdown += `> ${line}\n`;
|
134
|
+
}
|
135
|
+
return markdown + '\n';
|
136
|
+
}
|
137
|
+
/**
|
138
|
+
* Create multi-column layout approximation
|
139
|
+
*/
|
140
|
+
createColumns(columns) {
|
141
|
+
if (!columns || columns.length <= 1) {
|
142
|
+
return columns[0]?.content || '';
|
143
|
+
}
|
144
|
+
let markdown = '<!-- Multi-column layout -->\n\n';
|
145
|
+
// Create a table to approximate columns
|
146
|
+
markdown += '|';
|
147
|
+
for (let i = 0; i < columns.length; i++) {
|
148
|
+
markdown += ` Column ${i + 1} |`;
|
149
|
+
}
|
150
|
+
markdown += '\n';
|
151
|
+
markdown += '|';
|
152
|
+
for (let i = 0; i < columns.length; i++) {
|
153
|
+
markdown += ' --- |';
|
154
|
+
}
|
155
|
+
markdown += '\n';
|
156
|
+
// Find the maximum number of paragraphs in any column
|
157
|
+
const maxParagraphs = Math.max(...columns.map(col => col.content ? col.content.split('\n\n').length : 0));
|
158
|
+
// Create rows for each paragraph level
|
159
|
+
for (let p = 0; p < maxParagraphs; p++) {
|
160
|
+
markdown += '|';
|
161
|
+
for (const column of columns) {
|
162
|
+
const paragraphs = column.content ? column.content.split('\n\n') : [];
|
163
|
+
const paragraph = paragraphs[p] || '';
|
164
|
+
markdown += ` ${paragraph.replace(/\n/g, '<br>')} |`;
|
165
|
+
}
|
166
|
+
markdown += '\n';
|
167
|
+
}
|
168
|
+
return markdown + '\n';
|
169
|
+
}
|
170
|
+
/**
|
171
|
+
* Parse headers and footers
|
172
|
+
*/
|
173
|
+
parseHeaderFooter(content, type = 'header') {
|
174
|
+
if (!content)
|
175
|
+
return '';
|
176
|
+
const marker = type === 'header' ? 'š' : 'š»';
|
177
|
+
return `<!-- Document ${type} -->\n> ${marker} ${content}\n\n`;
|
178
|
+
}
|
179
|
+
/**
|
180
|
+
* Create divider/separator
|
181
|
+
*/
|
182
|
+
createDivider(style = 'simple') {
|
183
|
+
switch (style) {
|
184
|
+
case 'thick':
|
185
|
+
return '\nāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā\n\n';
|
186
|
+
case 'dashed':
|
187
|
+
return '\n---\n\n';
|
188
|
+
case 'dotted':
|
189
|
+
return '\n⢠⢠⢠⢠⢠⢠⢠⢠⢠⢠⢠⢠⢠⢠⢠⢠⢠⢠⢠⢠ā¢\n\n';
|
190
|
+
default:
|
191
|
+
return '\n---\n\n';
|
192
|
+
}
|
193
|
+
}
|
194
|
+
/**
|
195
|
+
* Calculate relative positioning for layout elements
|
196
|
+
*/
|
197
|
+
calculateRelativePosition(elements) {
|
198
|
+
// Sort elements by their Y position, then X position
|
199
|
+
return [...elements].sort((a, b) => {
|
200
|
+
const aY = a.position?.y || 0;
|
201
|
+
const bY = b.position?.y || 0;
|
202
|
+
const aX = a.position?.x || 0;
|
203
|
+
const bX = b.position?.x || 0;
|
204
|
+
const yDiff = aY - bY;
|
205
|
+
if (Math.abs(yDiff) < 50) { // Same "row"
|
206
|
+
return aX - bX;
|
207
|
+
}
|
208
|
+
return yDiff;
|
209
|
+
});
|
210
|
+
}
|
211
|
+
/**
|
212
|
+
* Format text with approximate font sizes using headers
|
213
|
+
*/
|
214
|
+
formatWithSize(text, fontSize) {
|
215
|
+
if (!fontSize || fontSize === 'normal')
|
216
|
+
return text;
|
217
|
+
const size = typeof fontSize === 'string' ? parseFloat(fontSize) : fontSize;
|
218
|
+
// Map font sizes to markdown headers (approximate)
|
219
|
+
if (size >= 24)
|
220
|
+
return `# ${text}`;
|
221
|
+
if (size >= 20)
|
222
|
+
return `## ${text}`;
|
223
|
+
if (size >= 16)
|
224
|
+
return `### ${text}`;
|
225
|
+
if (size >= 14)
|
226
|
+
return `#### ${text}`;
|
227
|
+
if (size <= 10)
|
228
|
+
return `<small>${text}</small>`;
|
229
|
+
return text;
|
230
|
+
}
|
231
|
+
/**
|
232
|
+
* Reset internal counters
|
233
|
+
*/
|
234
|
+
reset() {
|
235
|
+
this.tableCounter = 0;
|
236
|
+
}
|
237
|
+
/**
|
238
|
+
* Get current table counter
|
239
|
+
*/
|
240
|
+
get currentTableCount() {
|
241
|
+
return this.tableCounter;
|
242
|
+
}
|
243
|
+
}
|
244
|
+
//# sourceMappingURL=layout-parser.js.map
|
@@ -0,0 +1 @@
|
|
1
|
+
{"version":3,"file":"layout-parser.js","sourceRoot":"","sources":["../../src/utils/layout-parser.ts"],"names":[],"mappings":"AAqBA,MAAM,OAAO,YAAY;IACf,YAAY,GAAW,CAAC,CAAC;IAEjC;;OAEG;IACH,kBAAkB,CAAC,SAAoB,EAAE,UAA8B,EAAE;QACvE,IAAI,CAAC,SAAS,CAAC,IAAI,IAAI,SAAS,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACnD,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,EACJ,iBAAiB,GAAG,IAAI,EACxB,WAAW,GAAG,IAAI,EAClB,cAAc,GAAG,KAAK,EACvB,GAAG,OAAO,CAAC;QAEZ,IAAI,QAAQ,GAAG,EAAE,CAAC;QAClB,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC;QAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAEhF,mBAAmB;QACnB,KAAK,IAAI,QAAQ,GAAG,CAAC,EAAE,QAAQ,GAAG,IAAI,CAAC,MAAM,EAAE,QAAQ,EAAE,EAAE,CAAC;YAC1D,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC3B,IAAI,WAAW,GAAG,GAAG,CAAC;YAEtB,IAAI,CAAC,GAAG,CAAC,KAAK;gBAAE,SAAS;YAEzB,oBAAoB;YACpB,KAAK,IAAI,QAAQ,GAAG,CAAC,EAAE,QAAQ,GAAG,QAAQ,EAAE,QAAQ,EAAE,EAAE,CAAC;gBACvD,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;gBACjC,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,WAAW,IAAI,KAAK,CAAC;oBACrB,SAAS;gBACX,CAAC;gBAED,IAAI,WAAW,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;gBAElC,sBAAsB;gBACtB,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;oBAChB,IAAI,IAAI,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC;wBACrB,0CAA0C;wBAC1C,WAAW,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;oBAC/D,CAAC;oBACD,4DAA4D;gBAC9D,CAAC;gBAED,wBAAwB;gBACxB,IAAI,IAAI,CAAC,IAAI;oBAAE,WAAW,GAAG,KAAK,WAAW,IAAI,CAAC;gBAClD,IAAI,IAAI,CAAC,MAAM;oBAAE,WAAW,GAAG,IAAI,WAAW,GAAG,CAAC;gBAElD,4CAA4C;gBAC5C,IAAI,iBAAiB,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;oBACxC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;oBACnD,QAAQ,IAAI,CAAC,SAAS,EAAE,CAAC;wBACvB,KAAK,QAAQ,CAAC,CAAC,CAAC;4BACd,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,SAAS,GAAG,WAAW,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;4BACjE,WAAW,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,WAAW,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;4BACtE,MAAM;wBACR,CAAC;wBACD,KAAK,OAAO,CAAC,CAAC,CAAC;4BACb,WAAW,GAAG,WAAW,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;4BAC9C,MAAM;wBACR,CAAC;wBACD,8CAA8C;oBAChD,CAAC;gBACH,CAAC;gBAED,uCAAuC;gBACvC,IAAI,cAAc,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;oBAC3C,WAAW,IAAI,YAAY,IAAI,CAAC,eAAe,MAAM,CAAC;gBACxD,CAAC;gBAED,WAAW,IAAI,IAAI,WAAW,IAAI,CAAC;YACrC,CAAC;YAED,QAAQ,IAAI,WAAW,GAAG,IAAI,CAAC;YAE/B,uCAAuC;YACvC,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;gBACnB,IAAI,SAAS,GAAG,GAAG,CAAC;gBACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;oBAClC,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC;oBACjC,IAAI,UAAU,GAAG,OAAO,CAAC;oBAEzB,+BAA+B;oBAC/B,IAAI,iBAAiB,IAAI,IAAI,EAAE,SAAS,EAAE,CAAC;wBACzC,QAAQ,IAAI,CAAC,SAAS,EAAE,CAAC;4BACvB,KAAK,QAAQ;gCACX,UAAU,GAAG,OAAO,CAAC;gCACrB,MAAM;4BACR,KAAK,OAAO;gCACV,UAAU,GAAG,OAAO,CAAC;gCACrB,MAAM;4BACR,KAAK,MAAM,CAAC;4BACZ;gCACE,UAAU,GAAG,OAAO,CAAC;gCACrB,MAAM;wBACV,CAAC;oBACH,CAAC;oBACD,SAAS,IAAI,UAAU,GAAG,GAAG,CAAC;gBAChC,CAAC;gBACD,QAAQ,IAAI,SAAS,GAAG,IAAI,CAAC;YAC/B,CAAC;QACH,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,SAAS,CAAC,QAAkB;QAC1B,IAAI,CAAC,QAAQ,CAAC,KAAK,IAAI,QAAQ,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAE9D,IAAI,QAAQ,GAAG,EAAE,CAAC;QAElB,MAAM,gBAAgB,GAAG,CAAC,KAA0B,EAAE,QAAgB,CAAC,EAAU,EAAE;YACjF,IAAI,MAAM,GAAG,EAAE,CAAC;YAChB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAClC,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC;gBAE/C,IAAI,QAAQ,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;gBAE/B,mBAAmB;gBACnB,IAAI,IAAI,CAAC,IAAI;oBAAE,QAAQ,GAAG,KAAK,QAAQ,IAAI,CAAC;gBAC5C,IAAI,IAAI,CAAC,MAAM;oBAAE,QAAQ,GAAG,IAAI,QAAQ,GAAG,CAAC;gBAE5C,MAAM,IAAI,GAAG,MAAM,GAAG,MAAM,IAAI,QAAQ,IAAI,CAAC;gBAE7C,sBAAsB;gBACtB,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC9C,MAAM,IAAI,gBAAgB,CAAC,IAAI,CAAC,QAAQ,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;gBACvD,CAAC;YACH,CAAC;YACD,OAAO,MAAM,CAAC;QAChB,CAAC,CAAC;QAEF,OAAO,gBAAgB,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;IAC1C,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,OAAe,EAAE,QAAmB;QAChD,IAAI,QAAQ,GAAG,EAAE,CAAC;QAElB,IAAI,QAAQ,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAI,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3C,QAAQ,IAAI,oBAAoB,QAAQ,CAAC,CAAC,IAAI,CAAC,OAAO,QAAQ,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC;QAChF,CAAC;QAED,QAAQ,IAAI,kBAAkB,CAAC;QAC/B,QAAQ,IAAI,MAAM,CAAC;QAEnB,yDAAyD;QACzD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,QAAQ,IAAI,KAAK,IAAI,IAAI,CAAC;QAC5B,CAAC;QAED,OAAO,QAAQ,GAAG,IAAI,CAAC;IACzB,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,OAA8B;QAC1C,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YACpC,OAAO,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,EAAE,CAAC;QACnC,CAAC;QAED,IAAI,QAAQ,GAAG,kCAAkC,CAAC;QAElD,wCAAwC;QACxC,QAAQ,IAAI,GAAG,CAAC;QAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,QAAQ,IAAI,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC;QACnC,CAAC;QACD,QAAQ,IAAI,IAAI,CAAC;QAEjB,QAAQ,IAAI,GAAG,CAAC;QAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,QAAQ,IAAI,QAAQ,CAAC;QACvB,CAAC;QACD,QAAQ,IAAI,IAAI,CAAC;QAEjB,sDAAsD;QACtD,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAClD,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CACnD,CAAC,CAAC;QAEH,uCAAuC;QACvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,QAAQ,IAAI,GAAG,CAAC;YAChB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,MAAM,UAAU,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBACtE,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBACtC,QAAQ,IAAI,IAAI,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC;YACvD,CAAC;YACD,QAAQ,IAAI,IAAI,CAAC;QACnB,CAAC;QAED,OAAO,QAAQ,GAAG,IAAI,CAAC;IACzB,CAAC;IAED;;OAEG;IACH,iBAAiB,CAAC,OAAe,EAAE,OAA4B,QAAQ;QACrE,IAAI,CAAC,OAAO;YAAE,OAAO,EAAE,CAAC;QAExB,MAAM,MAAM,GAAG,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;QAC/C,OAAO,iBAAiB,IAAI,WAAW,MAAM,IAAI,OAAO,MAAM,CAAC;IACjE,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,QAAkD,QAAQ;QACtE,QAAQ,KAAK,EAAE,CAAC;YACd,KAAK,OAAO;gBACV,OAAO,+CAA+C,CAAC;YACzD,KAAK,QAAQ;gBACX,OAAO,WAAW,CAAC;YACrB,KAAK,QAAQ;gBACX,OAAO,iDAAiD,CAAC;YAC3D;gBACE,OAAO,WAAW,CAAC;QACvB,CAAC;IACH,CAAC;IAED;;OAEG;IACH,yBAAyB,CAAoC,QAAsB;QACjF,qDAAqD;QACrD,OAAO,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YACjC,MAAM,EAAE,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,CAAC,CAAC;YAC9B,MAAM,EAAE,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,CAAC,CAAC;YAC9B,MAAM,EAAE,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,CAAC,CAAC;YAC9B,MAAM,EAAE,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,CAAC,CAAC;YAE9B,MAAM,KAAK,GAAG,EAAE,GAAG,EAAE,CAAC;YACtB,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,aAAa;gBACvC,OAAO,EAAE,GAAG,EAAE,CAAC;YACjB,CAAC;YACD,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACH,cAAc,CAAC,IAAY,EAAE,QAAyB;QACpD,IAAI,CAAC,QAAQ,IAAI,QAAQ,KAAK,QAAQ;YAAE,OAAO,IAAI,CAAC;QAEpD,MAAM,IAAI,GAAG,OAAO,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;QAE5E,mDAAmD;QACnD,IAAI,IAAI,IAAI,EAAE;YAAE,OAAO,KAAK,IAAI,EAAE,CAAC;QACnC,IAAI,IAAI,IAAI,EAAE;YAAE,OAAO,MAAM,IAAI,EAAE,CAAC;QACpC,IAAI,IAAI,IAAI,EAAE;YAAE,OAAO,OAAO,IAAI,EAAE,CAAC;QACrC,IAAI,IAAI,IAAI,EAAE;YAAE,OAAO,QAAQ,IAAI,EAAE,CAAC;QACtC,IAAI,IAAI,IAAI,EAAE;YAAE,OAAO,UAAU,IAAI,UAAU,CAAC;QAEhD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,IAAI,iBAAiB;QACnB,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;CACF"}
|
@@ -0,0 +1,46 @@
|
|
1
|
+
import type { Buffer } from 'node:buffer';
|
2
|
+
import type { PageData } from '../types/interfaces.js';
|
3
|
+
import type { ImageExtractor } from './image-extractor.js';
|
4
|
+
export interface PDFParseOptions {
|
5
|
+
readonly maxPages?: number;
|
6
|
+
readonly preserveLayout?: boolean;
|
7
|
+
}
|
8
|
+
export interface PDFParseResult {
|
9
|
+
readonly markdown: string;
|
10
|
+
readonly images: readonly import('../types/interfaces.js').ImageData[];
|
11
|
+
readonly pageCount: number;
|
12
|
+
readonly metadata: Record<string, unknown>;
|
13
|
+
}
|
14
|
+
export declare class PDFExtractor {
|
15
|
+
private readonly imageExtractor;
|
16
|
+
private pageCounter;
|
17
|
+
constructor(imageExtractor: ImageExtractor);
|
18
|
+
/**
|
19
|
+
* Extract images from PDF by converting pages to images
|
20
|
+
*/
|
21
|
+
extractImagesFromPDF(buffer: Buffer): Promise<readonly PageData[]>;
|
22
|
+
/**
|
23
|
+
* Enhance text with layout detection
|
24
|
+
*/
|
25
|
+
enhanceTextWithLayout(text: string, pdfData?: unknown): Promise<string>;
|
26
|
+
private isLikelyHeading;
|
27
|
+
private determineHeadingLevel;
|
28
|
+
private isLikelyTableRow;
|
29
|
+
private parseTableRow;
|
30
|
+
private formatTableRows;
|
31
|
+
private isListItem;
|
32
|
+
private formatListItem;
|
33
|
+
/**
|
34
|
+
* Create page breaks with images
|
35
|
+
*/
|
36
|
+
createPageBreaks(pageImages: readonly PageData[]): Promise<string>;
|
37
|
+
/**
|
38
|
+
* Reset internal counters
|
39
|
+
*/
|
40
|
+
reset(): void;
|
41
|
+
/**
|
42
|
+
* Get current page counter
|
43
|
+
*/
|
44
|
+
get currentPageCount(): number;
|
45
|
+
}
|
46
|
+
//# sourceMappingURL=pdf-extractor.d.ts.map
|
@@ -0,0 +1 @@
|
|
1
|
+
{"version":3,"file":"pdf-extractor.d.ts","sourceRoot":"","sources":["../../src/utils/pdf-extractor.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAE1C,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAEvD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAW3D,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,cAAc,CAAC,EAAE,OAAO,CAAC;CACnC;AAED,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,MAAM,EAAE,SAAS,OAAO,wBAAwB,EAAE,SAAS,EAAE,CAAC;IACvE,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAC5C;AAED,qBAAa,YAAY;IACvB,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAiB;IAChD,OAAO,CAAC,WAAW,CAAa;gBAEpB,cAAc,EAAE,cAAc;IAI1C;;OAEG;IACG,oBAAoB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,QAAQ,EAAE,CAAC;IAmCxE;;OAEG;IACG,qBAAqB,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC;IAiE7E,OAAO,CAAC,eAAe;IAoBvB,OAAO,CAAC,qBAAqB;IAO7B,OAAO,CAAC,gBAAgB;IAaxB,OAAO,CAAC,aAAa;IAgBrB,OAAO,CAAC,eAAe;IAgCvB,OAAO,CAAC,UAAU;IAYlB,OAAO,CAAC,cAAc;IAatB;;OAEG;IACG,gBAAgB,CAAC,UAAU,EAAE,SAAS,QAAQ,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAiBxE;;OAEG;IACH,KAAK,IAAI,IAAI;IAIb;;OAEG;IACH,IAAI,gBAAgB,IAAI,MAAM,CAE7B;CACF"}
|