file2md 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +293 -0
  3. package/dist/index.d.ts +33 -0
  4. package/dist/index.d.ts.map +1 -0
  5. package/dist/index.js +153 -0
  6. package/dist/index.js.map +1 -0
  7. package/dist/parsers/docx-parser.d.ts +20 -0
  8. package/dist/parsers/docx-parser.d.ts.map +1 -0
  9. package/dist/parsers/docx-parser.js +237 -0
  10. package/dist/parsers/docx-parser.js.map +1 -0
  11. package/dist/parsers/pdf-parser.d.ts +8 -0
  12. package/dist/parsers/pdf-parser.d.ts.map +1 -0
  13. package/dist/parsers/pdf-parser.js +98 -0
  14. package/dist/parsers/pdf-parser.js.map +1 -0
  15. package/dist/parsers/pptx-parser.d.ts +21 -0
  16. package/dist/parsers/pptx-parser.d.ts.map +1 -0
  17. package/dist/parsers/pptx-parser.js +264 -0
  18. package/dist/parsers/pptx-parser.js.map +1 -0
  19. package/dist/parsers/xlsx-parser.d.ts +19 -0
  20. package/dist/parsers/xlsx-parser.d.ts.map +1 -0
  21. package/dist/parsers/xlsx-parser.js +267 -0
  22. package/dist/parsers/xlsx-parser.js.map +1 -0
  23. package/dist/types/errors.d.ts +52 -0
  24. package/dist/types/errors.d.ts.map +1 -0
  25. package/dist/types/errors.js +76 -0
  26. package/dist/types/errors.js.map +1 -0
  27. package/dist/types/index.d.ts +5 -0
  28. package/dist/types/index.d.ts.map +1 -0
  29. package/dist/types/index.js +5 -0
  30. package/dist/types/index.js.map +1 -0
  31. package/dist/types/interfaces.d.ts +228 -0
  32. package/dist/types/interfaces.d.ts.map +1 -0
  33. package/dist/types/interfaces.js +10 -0
  34. package/dist/types/interfaces.js.map +1 -0
  35. package/dist/utils/chart-extractor.d.ts +44 -0
  36. package/dist/utils/chart-extractor.d.ts.map +1 -0
  37. package/dist/utils/chart-extractor.js +258 -0
  38. package/dist/utils/chart-extractor.js.map +1 -0
  39. package/dist/utils/image-extractor.d.ts +50 -0
  40. package/dist/utils/image-extractor.d.ts.map +1 -0
  41. package/dist/utils/image-extractor.js +136 -0
  42. package/dist/utils/image-extractor.js.map +1 -0
  43. package/dist/utils/layout-parser.d.ts +55 -0
  44. package/dist/utils/layout-parser.d.ts.map +1 -0
  45. package/dist/utils/layout-parser.js +244 -0
  46. package/dist/utils/layout-parser.js.map +1 -0
  47. package/dist/utils/pdf-extractor.d.ts +46 -0
  48. package/dist/utils/pdf-extractor.d.ts.map +1 -0
  49. package/dist/utils/pdf-extractor.js +235 -0
  50. package/dist/utils/pdf-extractor.js.map +1 -0
  51. package/package.json +70 -0
@@ -0,0 +1,136 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { ImageExtractionError } from '../types/errors.js';
4
+ export class ImageExtractor {
5
+ outputDir;
6
+ imageCounter = 0;
7
+ extractedImages = new Map();
8
+ constructor(outputDir = 'images') {
9
+ this.outputDir = outputDir;
10
+ // Create images directory if it doesn't exist
11
+ if (!fs.existsSync(this.outputDir)) {
12
+ fs.mkdirSync(this.outputDir, { recursive: true });
13
+ }
14
+ }
15
+ /**
16
+ * Extract images from a ZIP archive (DOCX, XLSX, PPTX)
17
+ */
18
+ async extractImagesFromZip(zip, basePath = '') {
19
+ const images = [];
20
+ zip.forEach((relativePath, file) => {
21
+ // Check for image files in common locations
22
+ if (this.isImageFile(relativePath)) {
23
+ images.push({
24
+ path: relativePath,
25
+ file: file,
26
+ basePath: basePath
27
+ });
28
+ }
29
+ });
30
+ const extractedImages = [];
31
+ for (const img of images) {
32
+ try {
33
+ const imageData = await img.file.async('nodebuffer');
34
+ const savedPath = await this.saveImage(imageData, img.path, img.basePath);
35
+ if (savedPath) {
36
+ extractedImages.push({
37
+ originalPath: img.path,
38
+ savedPath: savedPath,
39
+ basePath: img.basePath,
40
+ format: this.getImageFormat(img.path),
41
+ size: imageData.length
42
+ });
43
+ }
44
+ }
45
+ catch (error) {
46
+ console.warn(`Failed to extract image ${img.path}:`, error instanceof Error ? error.message : 'Unknown error');
47
+ }
48
+ }
49
+ return extractedImages;
50
+ }
51
+ /**
52
+ * Save an image buffer to disk
53
+ */
54
+ async saveImage(buffer, originalPath, basePath = '') {
55
+ this.imageCounter++;
56
+ const ext = path.extname(originalPath) || '.png';
57
+ const filename = `image_${this.imageCounter}${ext}`;
58
+ const fullPath = path.join(this.outputDir, filename);
59
+ try {
60
+ fs.writeFileSync(fullPath, buffer);
61
+ // Store mapping for reference lookup
62
+ const key = basePath + originalPath;
63
+ this.extractedImages.set(key, filename);
64
+ return filename;
65
+ }
66
+ catch (error) {
67
+ const message = error instanceof Error ? error.message : 'Unknown error';
68
+ throw new ImageExtractionError(`Failed to save image ${filename}: ${message}`, error);
69
+ }
70
+ }
71
+ /**
72
+ * Check if a file path represents an image
73
+ */
74
+ isImageFile(filePath) {
75
+ const imageExtensions = ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.svg', '.emf', '.wmf'];
76
+ const ext = path.extname(filePath).toLowerCase();
77
+ return imageExtensions.includes(ext) ||
78
+ filePath.includes('/media/') ||
79
+ filePath.includes('/images/') ||
80
+ filePath.includes('\\media\\') ||
81
+ filePath.includes('\\images\\');
82
+ }
83
+ /**
84
+ * Get image format from file extension
85
+ */
86
+ getImageFormat(filePath) {
87
+ const ext = path.extname(filePath).toLowerCase();
88
+ return ext.startsWith('.') ? ext.slice(1) : 'unknown';
89
+ }
90
+ /**
91
+ * Get markdown reference for an image by its original path
92
+ */
93
+ getImageReference(originalPath, basePath = '') {
94
+ const key = basePath + originalPath;
95
+ const savedFilename = this.extractedImages.get(key);
96
+ if (savedFilename) {
97
+ return `![Image](${this.outputDir}/${savedFilename})`;
98
+ }
99
+ return null;
100
+ }
101
+ /**
102
+ * Create markdown image reference
103
+ */
104
+ getImageMarkdown(description = 'Image', imagePath) {
105
+ if (imagePath) {
106
+ return `![${description}](${this.outputDir}/${imagePath})`;
107
+ }
108
+ return `![${description}](image-not-found)`;
109
+ }
110
+ /**
111
+ * Reset the image counter and clear extracted images map
112
+ */
113
+ reset() {
114
+ this.imageCounter = 0;
115
+ this.extractedImages.clear();
116
+ }
117
+ /**
118
+ * Get the output directory for images
119
+ */
120
+ get imageDirectory() {
121
+ return this.outputDir;
122
+ }
123
+ /**
124
+ * Get the current image counter
125
+ */
126
+ get currentImageCount() {
127
+ return this.imageCounter;
128
+ }
129
+ /**
130
+ * Get all extracted image mappings
131
+ */
132
+ get extractedImageMappings() {
133
+ return this.extractedImages;
134
+ }
135
+ }
136
+ //# sourceMappingURL=image-extractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image-extractor.js","sourceRoot":"","sources":["../../src/utils/image-extractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAK7B,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAE1D,MAAM,OAAO,cAAc;IACR,SAAS,CAAS;IAC3B,YAAY,GAAW,CAAC,CAAC;IAChB,eAAe,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE7D,YAAY,YAAoB,QAAQ;QACtC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAE3B,8CAA8C;QAC9C,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;YACnC,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,oBAAoB,CAAC,GAAU,EAAE,WAAmB,EAAE;QAC1D,MAAM,MAAM,GAIP,EAAE,CAAC;QAER,GAAG,CAAC,OAAO,CAAC,CAAC,YAAY,EAAE,IAAI,EAAE,EAAE;YACjC,4CAA4C;YAC5C,IAAI,IAAI,CAAC,WAAW,CAAC,YAAY,CAAC,EAAE,CAAC;gBACnC,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,YAAY;oBAClB,IAAI,EAAE,IAAI;oBACV,QAAQ,EAAE,QAAQ;iBACnB,CAAC,CAAC;YACL,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,MAAM,eAAe,GAAgB,EAAE,CAAC;QACxC,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;YACzB,IAAI,CAAC;gBACH,MAAM,SAAS,GAAG,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;gBACrD,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,QAAQ,CAAC,CAAC;gBAC1E,IAAI,SAAS,EAAE,CAAC;oBACd,eAAe,CAAC,IAAI,CAAC;wBACnB,YAAY,EAAE,GAAG,CAAC,IAAI;wBACtB,SAAS,EAAE,SAAS;wBACpB,QAAQ,EAAE,GAAG,CAAC,QAAQ;wBACtB,MAAM,EAAE,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,IAAI,CAAC;wBACrC,IAAI,EAAE,SAAS,CAAC,MAAM;qBACvB,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAAC,OAAO,KAAc,EAAE,CAAC;gBACxB,OAAO,CAAC,IAAI,CAAC,2BAA2B,GAAG,CAAC,IAAI,GAAG,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;YACjH,CAAC;QACH,CAAC;QAED,OAAO,eAAe,CAAC;IACzB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS,CAAC,MAAc,EAAE,YAAoB,EAAE,WAAmB,EAAE;QACzE,IAAI,CAAC,YAAY,EAAE,CAAC;QACpB,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,IAAI,MAAM,CAAC;QACjD,MAAM,QAAQ,GAAG,SAAS,IAAI,CAAC,YAAY,GAAG,GAAG,EAAE,CAAC;QACpD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAErD,IAAI,CAAC;YACH,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;YAEnC,qCAAqC;YACrC,MAAM,GAAG,GAAG,QAAQ,GAAG,YAAY,CAAC;YACpC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;YAExC,OAAO,QAAQ,CAAC;QAClB,CAAC;QAAC,OAAO,KAAc,EAAE,CAAC;YACxB,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YACzE,MAAM,IAAI,oBAAoB,CAAC,wBAAwB,QAAQ,KAAK,OAAO,EAAE,EAAE,KAAc,CAAC,CAAC;QACjG,CAAC;IACH,CAAC;IAED;;OAEG;IACH,WAAW,CAAC,QAAgB;QAC1B,MAAM,eAAe,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;QACnG,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;QACjD,OAAO,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC;YAC7B,QAAQ,CAAC,QAAQ,CAAC,SAAS,CAAC;YAC5B,QAAQ,CAAC,QAAQ,CAAC,UAAU,CAAC;YAC7B,QAAQ,CAAC,QAAQ,CAAC,WAAW,CAAC;YAC9B,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;IACzC,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAgB;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;QACjD,OAAO,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IACxD,CAAC;IAED;;OAEG;IACH,iBAAiB,CAAC,YAAoB,EAAE,WAAmB,EAAE;QAC3D,MAAM,GAAG,GAAG,QAAQ,GAAG,YAAY,CAAC;QACpC,MAAM,aAAa,GAAG,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACpD,IAAI,aAAa,EAAE,CAAC;YAClB,OAAO,YAAY,IAAI,CAAC,SAAS,IAAI,aAAa,GAAG,CAAC;QACxD,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,gBAAgB,CAAC,cAAsB,OAAO,EAAE,SAAkB;QAChE,IAAI,SAAS,EAAE,CAAC;YACd,OAAO,KAAK,WAAW,KAAK,IAAI,CAAC,SAAS,IAAI,SAAS,GAAG,CAAC;QAC7D,CAAC;QACD,OAAO,KAAK,WAAW,oBAAoB,CAAC;IAC9C,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;QACtB,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;IAC/B,CAAC;IAED;;OAEG;IACH,IAAI,cAAc;QAChB,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,IAAI,iBAAiB;QACnB,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,IAAI,sBAAsB;QACxB,OAAO,IAAI,CAAC,eAAe,CAAC;IAC9B,CAAC;CACF"}
@@ -0,0 +1,55 @@
1
+ import type { TableData, ListData, Position } from '../types/interfaces.js';
2
+ export interface TableFormatOptions {
3
+ readonly preserveAlignment?: boolean;
4
+ readonly showBorders?: boolean;
5
+ readonly preserveColors?: boolean;
6
+ }
7
+ export interface ColumnData {
8
+ readonly content: string;
9
+ }
10
+ export declare class LayoutParser {
11
+ private tableCounter;
12
+ /**
13
+ * Parse an advanced table with merged cells and styling
14
+ */
15
+ parseAdvancedTable(tableData: TableData, options?: TableFormatOptions): string;
16
+ /**
17
+ * Parse lists with proper nesting
18
+ */
19
+ parseList(listData: ListData): string;
20
+ /**
21
+ * Create text box representation
22
+ */
23
+ createTextBox(content: string, position?: Position): string;
24
+ /**
25
+ * Create multi-column layout approximation
26
+ */
27
+ createColumns(columns: readonly ColumnData[]): string;
28
+ /**
29
+ * Parse headers and footers
30
+ */
31
+ parseHeaderFooter(content: string, type?: 'header' | 'footer'): string;
32
+ /**
33
+ * Create divider/separator
34
+ */
35
+ createDivider(style?: 'simple' | 'thick' | 'dashed' | 'dotted'): string;
36
+ /**
37
+ * Calculate relative positioning for layout elements
38
+ */
39
+ calculateRelativePosition<T extends {
40
+ position?: Position;
41
+ }>(elements: readonly T[]): T[];
42
+ /**
43
+ * Format text with approximate font sizes using headers
44
+ */
45
+ formatWithSize(text: string, fontSize: number | string): string;
46
+ /**
47
+ * Reset internal counters
48
+ */
49
+ reset(): void;
50
+ /**
51
+ * Get current table counter
52
+ */
53
+ get currentTableCount(): number;
54
+ }
55
+ //# sourceMappingURL=layout-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"layout-parser.d.ts","sourceRoot":"","sources":["../../src/utils/layout-parser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,SAAS,EAGT,QAAQ,EAER,QAAQ,EAGT,MAAM,wBAAwB,CAAC;AAEhC,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,OAAO,CAAC;IACrC,QAAQ,CAAC,WAAW,CAAC,EAAE,OAAO,CAAC;IAC/B,QAAQ,CAAC,cAAc,CAAC,EAAE,OAAO,CAAC;CACnC;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B;AAED,qBAAa,YAAY;IACvB,OAAO,CAAC,YAAY,CAAa;IAEjC;;OAEG;IACH,kBAAkB,CAAC,SAAS,EAAE,SAAS,EAAE,OAAO,GAAE,kBAAuB,GAAG,MAAM;IAuGlF;;OAEG;IACH,SAAS,CAAC,QAAQ,EAAE,QAAQ,GAAG,MAAM;IA8BrC;;OAEG;IACH,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,QAAQ,GAAG,MAAM;IAmB3D;;OAEG;IACH,aAAa,CAAC,OAAO,EAAE,SAAS,UAAU,EAAE,GAAG,MAAM;IAuCrD;;OAEG;IACH,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,GAAE,QAAQ,GAAG,QAAmB,GAAG,MAAM;IAOhF;;OAEG;IACH,aAAa,CAAC,KAAK,GAAE,QAAQ,GAAG,OAAO,GAAG,QAAQ,GAAG,QAAmB,GAAG,MAAM;IAajF;;OAEG;IACH,yBAAyB,CAAC,CAAC,SAAS;QAAE,QAAQ,CAAC,EAAE,QAAQ,CAAA;KAAE,EAAE,QAAQ,EAAE,SAAS,CAAC,EAAE,GAAG,CAAC,EAAE;IAgBzF;;OAEG;IACH,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM;IAe/D;;OAEG;IACH,KAAK,IAAI,IAAI;IAIb;;OAEG;IACH,IAAI,iBAAiB,IAAI,MAAM,CAE9B;CACF"}
@@ -0,0 +1,244 @@
1
+ export class LayoutParser {
2
+ tableCounter = 0;
3
+ /**
4
+ * Parse an advanced table with merged cells and styling
5
+ */
6
+ parseAdvancedTable(tableData, options = {}) {
7
+ if (!tableData.rows || tableData.rows.length === 0) {
8
+ return '';
9
+ }
10
+ const { preserveAlignment = true, showBorders = true, preserveColors = false } = options;
11
+ let markdown = '';
12
+ const rows = tableData.rows;
13
+ const colCount = Math.max(...rows.map(row => row.cells ? row.cells.length : 0));
14
+ // Process each row
15
+ for (let rowIndex = 0; rowIndex < rows.length; rowIndex++) {
16
+ const row = rows[rowIndex];
17
+ let rowMarkdown = '|';
18
+ if (!row.cells)
19
+ continue;
20
+ // Process each cell
21
+ for (let colIndex = 0; colIndex < colCount; colIndex++) {
22
+ const cell = row.cells[colIndex];
23
+ if (!cell) {
24
+ rowMarkdown += ' |';
25
+ continue;
26
+ }
27
+ let cellContent = cell.text || '';
28
+ // Handle merged cells
29
+ if (cell.merged) {
30
+ if (cell.colSpan > 1) {
31
+ // For horizontal merge, add extra columns
32
+ cellContent += ' '.repeat(Math.max(0, cell.colSpan - 1) * 3);
33
+ }
34
+ // Note: Markdown doesn't support rowspan, so we approximate
35
+ }
36
+ // Apply text formatting
37
+ if (cell.bold)
38
+ cellContent = `**${cellContent}**`;
39
+ if (cell.italic)
40
+ cellContent = `*${cellContent}*`;
41
+ // Apply alignment (approximate with spaces)
42
+ if (preserveAlignment && cell.alignment) {
43
+ const cellWidth = Math.max(cellContent.length, 10);
44
+ switch (cell.alignment) {
45
+ case 'center': {
46
+ const padding = Math.floor((cellWidth - cellContent.length) / 2);
47
+ cellContent = ' '.repeat(padding) + cellContent + ' '.repeat(padding);
48
+ break;
49
+ }
50
+ case 'right': {
51
+ cellContent = cellContent.padStart(cellWidth);
52
+ break;
53
+ }
54
+ // 'left' and 'justify' use default formatting
55
+ }
56
+ }
57
+ // Add background color note if enabled
58
+ if (preserveColors && cell.backgroundColor) {
59
+ cellContent += ` <!-- bg:${cell.backgroundColor} -->`;
60
+ }
61
+ rowMarkdown += ` ${cellContent} |`;
62
+ }
63
+ markdown += rowMarkdown + '\n';
64
+ // Add header separator after first row
65
+ if (rowIndex === 0) {
66
+ let separator = '|';
67
+ for (let i = 0; i < colCount; i++) {
68
+ const cell = rows[0]?.cells?.[i];
69
+ let sepContent = ' --- ';
70
+ // Apply alignment in separator
71
+ if (preserveAlignment && cell?.alignment) {
72
+ switch (cell.alignment) {
73
+ case 'center':
74
+ sepContent = ':---:';
75
+ break;
76
+ case 'right':
77
+ sepContent = ' ---:';
78
+ break;
79
+ case 'left':
80
+ default:
81
+ sepContent = ':--- ';
82
+ break;
83
+ }
84
+ }
85
+ separator += sepContent + '|';
86
+ }
87
+ markdown += separator + '\n';
88
+ }
89
+ }
90
+ return markdown;
91
+ }
92
+ /**
93
+ * Parse lists with proper nesting
94
+ */
95
+ parseList(listData) {
96
+ if (!listData.items || listData.items.length === 0)
97
+ return '';
98
+ let markdown = '';
99
+ const processListItems = (items, level = 0) => {
100
+ let result = '';
101
+ for (const item of items) {
102
+ const indent = ' '.repeat(level);
103
+ const marker = listData.isOrdered ? '1.' : '-';
104
+ let itemText = item.text || '';
105
+ // Apply formatting
106
+ if (item.bold)
107
+ itemText = `**${itemText}**`;
108
+ if (item.italic)
109
+ itemText = `*${itemText}*`;
110
+ result += `${indent}${marker} ${itemText}\n`;
111
+ // Handle nested lists
112
+ if (item.children && item.children.length > 0) {
113
+ result += processListItems(item.children, level + 1);
114
+ }
115
+ }
116
+ return result;
117
+ };
118
+ return processListItems(listData.items);
119
+ }
120
+ /**
121
+ * Create text box representation
122
+ */
123
+ createTextBox(content, position) {
124
+ let markdown = '';
125
+ if (position && (position.x || position.y)) {
126
+ markdown += `<!-- Position: x=${position.x || 0}, y=${position.y || 0} -->\n`;
127
+ }
128
+ markdown += '> **Text Box**\n';
129
+ markdown += '> \n';
130
+ // Split content into lines and add blockquote formatting
131
+ const lines = content.split('\n');
132
+ for (const line of lines) {
133
+ markdown += `> ${line}\n`;
134
+ }
135
+ return markdown + '\n';
136
+ }
137
+ /**
138
+ * Create multi-column layout approximation
139
+ */
140
+ createColumns(columns) {
141
+ if (!columns || columns.length <= 1) {
142
+ return columns[0]?.content || '';
143
+ }
144
+ let markdown = '<!-- Multi-column layout -->\n\n';
145
+ // Create a table to approximate columns
146
+ markdown += '|';
147
+ for (let i = 0; i < columns.length; i++) {
148
+ markdown += ` Column ${i + 1} |`;
149
+ }
150
+ markdown += '\n';
151
+ markdown += '|';
152
+ for (let i = 0; i < columns.length; i++) {
153
+ markdown += ' --- |';
154
+ }
155
+ markdown += '\n';
156
+ // Find the maximum number of paragraphs in any column
157
+ const maxParagraphs = Math.max(...columns.map(col => col.content ? col.content.split('\n\n').length : 0));
158
+ // Create rows for each paragraph level
159
+ for (let p = 0; p < maxParagraphs; p++) {
160
+ markdown += '|';
161
+ for (const column of columns) {
162
+ const paragraphs = column.content ? column.content.split('\n\n') : [];
163
+ const paragraph = paragraphs[p] || '';
164
+ markdown += ` ${paragraph.replace(/\n/g, '<br>')} |`;
165
+ }
166
+ markdown += '\n';
167
+ }
168
+ return markdown + '\n';
169
+ }
170
+ /**
171
+ * Parse headers and footers
172
+ */
173
+ parseHeaderFooter(content, type = 'header') {
174
+ if (!content)
175
+ return '';
176
+ const marker = type === 'header' ? 'šŸ”' : 'šŸ”»';
177
+ return `<!-- Document ${type} -->\n> ${marker} ${content}\n\n`;
178
+ }
179
+ /**
180
+ * Create divider/separator
181
+ */
182
+ createDivider(style = 'simple') {
183
+ switch (style) {
184
+ case 'thick':
185
+ return '\n═══════════════════════════════════════\n\n';
186
+ case 'dashed':
187
+ return '\n---\n\n';
188
+ case 'dotted':
189
+ return '\n• • • • • • • • • • • • • • • • • • • • •\n\n';
190
+ default:
191
+ return '\n---\n\n';
192
+ }
193
+ }
194
+ /**
195
+ * Calculate relative positioning for layout elements
196
+ */
197
+ calculateRelativePosition(elements) {
198
+ // Sort elements by their Y position, then X position
199
+ return [...elements].sort((a, b) => {
200
+ const aY = a.position?.y || 0;
201
+ const bY = b.position?.y || 0;
202
+ const aX = a.position?.x || 0;
203
+ const bX = b.position?.x || 0;
204
+ const yDiff = aY - bY;
205
+ if (Math.abs(yDiff) < 50) { // Same "row"
206
+ return aX - bX;
207
+ }
208
+ return yDiff;
209
+ });
210
+ }
211
+ /**
212
+ * Format text with approximate font sizes using headers
213
+ */
214
+ formatWithSize(text, fontSize) {
215
+ if (!fontSize || fontSize === 'normal')
216
+ return text;
217
+ const size = typeof fontSize === 'string' ? parseFloat(fontSize) : fontSize;
218
+ // Map font sizes to markdown headers (approximate)
219
+ if (size >= 24)
220
+ return `# ${text}`;
221
+ if (size >= 20)
222
+ return `## ${text}`;
223
+ if (size >= 16)
224
+ return `### ${text}`;
225
+ if (size >= 14)
226
+ return `#### ${text}`;
227
+ if (size <= 10)
228
+ return `<small>${text}</small>`;
229
+ return text;
230
+ }
231
+ /**
232
+ * Reset internal counters
233
+ */
234
+ reset() {
235
+ this.tableCounter = 0;
236
+ }
237
+ /**
238
+ * Get current table counter
239
+ */
240
+ get currentTableCount() {
241
+ return this.tableCounter;
242
+ }
243
+ }
244
+ //# sourceMappingURL=layout-parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"layout-parser.js","sourceRoot":"","sources":["../../src/utils/layout-parser.ts"],"names":[],"mappings":"AAqBA,MAAM,OAAO,YAAY;IACf,YAAY,GAAW,CAAC,CAAC;IAEjC;;OAEG;IACH,kBAAkB,CAAC,SAAoB,EAAE,UAA8B,EAAE;QACvE,IAAI,CAAC,SAAS,CAAC,IAAI,IAAI,SAAS,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACnD,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,EACJ,iBAAiB,GAAG,IAAI,EACxB,WAAW,GAAG,IAAI,EAClB,cAAc,GAAG,KAAK,EACvB,GAAG,OAAO,CAAC;QAEZ,IAAI,QAAQ,GAAG,EAAE,CAAC;QAClB,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC;QAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAEhF,mBAAmB;QACnB,KAAK,IAAI,QAAQ,GAAG,CAAC,EAAE,QAAQ,GAAG,IAAI,CAAC,MAAM,EAAE,QAAQ,EAAE,EAAE,CAAC;YAC1D,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC3B,IAAI,WAAW,GAAG,GAAG,CAAC;YAEtB,IAAI,CAAC,GAAG,CAAC,KAAK;gBAAE,SAAS;YAEzB,oBAAoB;YACpB,KAAK,IAAI,QAAQ,GAAG,CAAC,EAAE,QAAQ,GAAG,QAAQ,EAAE,QAAQ,EAAE,EAAE,CAAC;gBACvD,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;gBACjC,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,WAAW,IAAI,KAAK,CAAC;oBACrB,SAAS;gBACX,CAAC;gBAED,IAAI,WAAW,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;gBAElC,sBAAsB;gBACtB,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;oBAChB,IAAI,IAAI,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC;wBACrB,0CAA0C;wBAC1C,WAAW,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;oBAC/D,CAAC;oBACD,4DAA4D;gBAC9D,CAAC;gBAED,wBAAwB;gBACxB,IAAI,IAAI,CAAC,IAAI;oBAAE,WAAW,GAAG,KAAK,WAAW,IAAI,CAAC;gBAClD,IAAI,IAAI,CAAC,MAAM;oBAAE,WAAW,GAAG,IAAI,WAAW,GAAG,CAAC;gBAElD,4CAA4C;gBAC5C,IAAI,iBAAiB,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;oBACxC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;oBACnD,QAAQ,IAAI,CAAC,SAAS,EAAE,CAAC;wBACvB,KAAK,QAAQ,CAAC,CAAC,CAAC;4BACd,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,SAAS,GAAG,WAAW,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;4BACjE,WAAW,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,WAAW,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;4BACtE,MAAM;wBACR,CAAC;wBACD,KAAK,OAAO,CAAC,CAAC,CAAC;4BACb,WAAW,GAAG,WAAW,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;4BAC9C,MAAM;wBACR,CAAC;wBACD,8CAA8C;oBAChD,CAAC;gBACH,CAAC;gBAED,uCAAuC;gBACvC,IAAI,cAAc,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;oBAC3C,WAAW,IAAI,YAAY,IAAI,CAAC,eAAe,MAAM,CAAC;gBACxD,CAAC;gBAED,WAAW,IAAI,IAAI,WAAW,IAAI,CAAC;YACrC,CAAC;YAED,QAAQ,IAAI,WAAW,GAAG,IAAI,CAAC;YAE/B,uCAAuC;YACvC,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;gBACnB,IAAI,SAAS,GAAG,GAAG,CAAC;gBACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;oBAClC,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC;oBACjC,IAAI,UAAU,GAAG,OAAO,CAAC;oBAEzB,+BAA+B;oBAC/B,IAAI,iBAAiB,IAAI,IAAI,EAAE,SAAS,EAAE,CAAC;wBACzC,QAAQ,IAAI,CAAC,SAAS,EAAE,CAAC;4BACvB,KAAK,QAAQ;gCACX,UAAU,GAAG,OAAO,CAAC;gCACrB,MAAM;4BACR,KAAK,OAAO;gCACV,UAAU,GAAG,OAAO,CAAC;gCACrB,MAAM;4BACR,KAAK,MAAM,CAAC;4BACZ;gCACE,UAAU,GAAG,OAAO,CAAC;gCACrB,MAAM;wBACV,CAAC;oBACH,CAAC;oBACD,SAAS,IAAI,UAAU,GAAG,GAAG,CAAC;gBAChC,CAAC;gBACD,QAAQ,IAAI,SAAS,GAAG,IAAI,CAAC;YAC/B,CAAC;QACH,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,SAAS,CAAC,QAAkB;QAC1B,IAAI,CAAC,QAAQ,CAAC,KAAK,IAAI,QAAQ,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAE9D,IAAI,QAAQ,GAAG,EAAE,CAAC;QAElB,MAAM,gBAAgB,GAAG,CAAC,KAA0B,EAAE,QAAgB,CAAC,EAAU,EAAE;YACjF,IAAI,MAAM,GAAG,EAAE,CAAC;YAChB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAClC,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC;gBAE/C,IAAI,QAAQ,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;gBAE/B,mBAAmB;gBACnB,IAAI,IAAI,CAAC,IAAI;oBAAE,QAAQ,GAAG,KAAK,QAAQ,IAAI,CAAC;gBAC5C,IAAI,IAAI,CAAC,MAAM;oBAAE,QAAQ,GAAG,IAAI,QAAQ,GAAG,CAAC;gBAE5C,MAAM,IAAI,GAAG,MAAM,GAAG,MAAM,IAAI,QAAQ,IAAI,CAAC;gBAE7C,sBAAsB;gBACtB,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC9C,MAAM,IAAI,gBAAgB,CAAC,IAAI,CAAC,QAAQ,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;gBACvD,CAAC;YACH,CAAC;YACD,OAAO,MAAM,CAAC;QAChB,CAAC,CAAC;QAEF,OAAO,gBAAgB,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;IAC1C,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,OAAe,EAAE,QAAmB;QAChD,IAAI,QAAQ,GAAG,EAAE,CAAC;QAElB,IAAI,QAAQ,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAI,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3C,QAAQ,IAAI,oBAAoB,QAAQ,CAAC,CAAC,IAAI,CAAC,OAAO,QAAQ,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC;QAChF,CAAC;QAED,QAAQ,IAAI,kBAAkB,CAAC;QAC/B,QAAQ,IAAI,MAAM,CAAC;QAEnB,yDAAyD;QACzD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,QAAQ,IAAI,KAAK,IAAI,IAAI,CAAC;QAC5B,CAAC;QAED,OAAO,QAAQ,GAAG,IAAI,CAAC;IACzB,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,OAA8B;QAC1C,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YACpC,OAAO,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,EAAE,CAAC;QACnC,CAAC;QAED,IAAI,QAAQ,GAAG,kCAAkC,CAAC;QAElD,wCAAwC;QACxC,QAAQ,IAAI,GAAG,CAAC;QAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,QAAQ,IAAI,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC;QACnC,CAAC;QACD,QAAQ,IAAI,IAAI,CAAC;QAEjB,QAAQ,IAAI,GAAG,CAAC;QAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,QAAQ,IAAI,QAAQ,CAAC;QACvB,CAAC;QACD,QAAQ,IAAI,IAAI,CAAC;QAEjB,sDAAsD;QACtD,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAClD,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CACnD,CAAC,CAAC;QAEH,uCAAuC;QACvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,QAAQ,IAAI,GAAG,CAAC;YAChB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,MAAM,UAAU,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBACtE,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBACtC,QAAQ,IAAI,IAAI,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC;YACvD,CAAC;YACD,QAAQ,IAAI,IAAI,CAAC;QACnB,CAAC;QAED,OAAO,QAAQ,GAAG,IAAI,CAAC;IACzB,CAAC;IAED;;OAEG;IACH,iBAAiB,CAAC,OAAe,EAAE,OAA4B,QAAQ;QACrE,IAAI,CAAC,OAAO;YAAE,OAAO,EAAE,CAAC;QAExB,MAAM,MAAM,GAAG,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;QAC/C,OAAO,iBAAiB,IAAI,WAAW,MAAM,IAAI,OAAO,MAAM,CAAC;IACjE,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,QAAkD,QAAQ;QACtE,QAAQ,KAAK,EAAE,CAAC;YACd,KAAK,OAAO;gBACV,OAAO,+CAA+C,CAAC;YACzD,KAAK,QAAQ;gBACX,OAAO,WAAW,CAAC;YACrB,KAAK,QAAQ;gBACX,OAAO,iDAAiD,CAAC;YAC3D;gBACE,OAAO,WAAW,CAAC;QACvB,CAAC;IACH,CAAC;IAED;;OAEG;IACH,yBAAyB,CAAoC,QAAsB;QACjF,qDAAqD;QACrD,OAAO,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YACjC,MAAM,EAAE,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,CAAC,CAAC;YAC9B,MAAM,EAAE,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,CAAC,CAAC;YAC9B,MAAM,EAAE,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,CAAC,CAAC;YAC9B,MAAM,EAAE,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,CAAC,CAAC;YAE9B,MAAM,KAAK,GAAG,EAAE,GAAG,EAAE,CAAC;YACtB,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,aAAa;gBACvC,OAAO,EAAE,GAAG,EAAE,CAAC;YACjB,CAAC;YACD,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACH,cAAc,CAAC,IAAY,EAAE,QAAyB;QACpD,IAAI,CAAC,QAAQ,IAAI,QAAQ,KAAK,QAAQ;YAAE,OAAO,IAAI,CAAC;QAEpD,MAAM,IAAI,GAAG,OAAO,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;QAE5E,mDAAmD;QACnD,IAAI,IAAI,IAAI,EAAE;YAAE,OAAO,KAAK,IAAI,EAAE,CAAC;QACnC,IAAI,IAAI,IAAI,EAAE;YAAE,OAAO,MAAM,IAAI,EAAE,CAAC;QACpC,IAAI,IAAI,IAAI,EAAE;YAAE,OAAO,OAAO,IAAI,EAAE,CAAC;QACrC,IAAI,IAAI,IAAI,EAAE;YAAE,OAAO,QAAQ,IAAI,EAAE,CAAC;QACtC,IAAI,IAAI,IAAI,EAAE;YAAE,OAAO,UAAU,IAAI,UAAU,CAAC;QAEhD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,IAAI,iBAAiB;QACnB,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;CACF"}
@@ -0,0 +1,46 @@
1
+ import type { Buffer } from 'node:buffer';
2
+ import type { PageData } from '../types/interfaces.js';
3
+ import type { ImageExtractor } from './image-extractor.js';
4
+ export interface PDFParseOptions {
5
+ readonly maxPages?: number;
6
+ readonly preserveLayout?: boolean;
7
+ }
8
+ export interface PDFParseResult {
9
+ readonly markdown: string;
10
+ readonly images: readonly import('../types/interfaces.js').ImageData[];
11
+ readonly pageCount: number;
12
+ readonly metadata: Record<string, unknown>;
13
+ }
14
+ export declare class PDFExtractor {
15
+ private readonly imageExtractor;
16
+ private pageCounter;
17
+ constructor(imageExtractor: ImageExtractor);
18
+ /**
19
+ * Extract images from PDF by converting pages to images
20
+ */
21
+ extractImagesFromPDF(buffer: Buffer): Promise<readonly PageData[]>;
22
+ /**
23
+ * Enhance text with layout detection
24
+ */
25
+ enhanceTextWithLayout(text: string, pdfData?: unknown): Promise<string>;
26
+ private isLikelyHeading;
27
+ private determineHeadingLevel;
28
+ private isLikelyTableRow;
29
+ private parseTableRow;
30
+ private formatTableRows;
31
+ private isListItem;
32
+ private formatListItem;
33
+ /**
34
+ * Create page breaks with images
35
+ */
36
+ createPageBreaks(pageImages: readonly PageData[]): Promise<string>;
37
+ /**
38
+ * Reset internal counters
39
+ */
40
+ reset(): void;
41
+ /**
42
+ * Get current page counter
43
+ */
44
+ get currentPageCount(): number;
45
+ }
46
+ //# sourceMappingURL=pdf-extractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pdf-extractor.d.ts","sourceRoot":"","sources":["../../src/utils/pdf-extractor.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAE1C,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAEvD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAW3D,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,cAAc,CAAC,EAAE,OAAO,CAAC;CACnC;AAED,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,MAAM,EAAE,SAAS,OAAO,wBAAwB,EAAE,SAAS,EAAE,CAAC;IACvE,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAC5C;AAED,qBAAa,YAAY;IACvB,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAiB;IAChD,OAAO,CAAC,WAAW,CAAa;gBAEpB,cAAc,EAAE,cAAc;IAI1C;;OAEG;IACG,oBAAoB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,QAAQ,EAAE,CAAC;IAmCxE;;OAEG;IACG,qBAAqB,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC;IAiE7E,OAAO,CAAC,eAAe;IAoBvB,OAAO,CAAC,qBAAqB;IAO7B,OAAO,CAAC,gBAAgB;IAaxB,OAAO,CAAC,aAAa;IAgBrB,OAAO,CAAC,eAAe;IAgCvB,OAAO,CAAC,UAAU;IAYlB,OAAO,CAAC,cAAc;IAatB;;OAEG;IACG,gBAAgB,CAAC,UAAU,EAAE,SAAS,QAAQ,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAiBxE;;OAEG;IACH,KAAK,IAAI,IAAI;IAIb;;OAEG;IACH,IAAI,gBAAgB,IAAI,MAAM,CAE7B;CACF"}