file2md 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +293 -0
  3. package/dist/index.d.ts +33 -0
  4. package/dist/index.d.ts.map +1 -0
  5. package/dist/index.js +153 -0
  6. package/dist/index.js.map +1 -0
  7. package/dist/parsers/docx-parser.d.ts +20 -0
  8. package/dist/parsers/docx-parser.d.ts.map +1 -0
  9. package/dist/parsers/docx-parser.js +237 -0
  10. package/dist/parsers/docx-parser.js.map +1 -0
  11. package/dist/parsers/pdf-parser.d.ts +8 -0
  12. package/dist/parsers/pdf-parser.d.ts.map +1 -0
  13. package/dist/parsers/pdf-parser.js +98 -0
  14. package/dist/parsers/pdf-parser.js.map +1 -0
  15. package/dist/parsers/pptx-parser.d.ts +21 -0
  16. package/dist/parsers/pptx-parser.d.ts.map +1 -0
  17. package/dist/parsers/pptx-parser.js +264 -0
  18. package/dist/parsers/pptx-parser.js.map +1 -0
  19. package/dist/parsers/xlsx-parser.d.ts +19 -0
  20. package/dist/parsers/xlsx-parser.d.ts.map +1 -0
  21. package/dist/parsers/xlsx-parser.js +267 -0
  22. package/dist/parsers/xlsx-parser.js.map +1 -0
  23. package/dist/types/errors.d.ts +52 -0
  24. package/dist/types/errors.d.ts.map +1 -0
  25. package/dist/types/errors.js +76 -0
  26. package/dist/types/errors.js.map +1 -0
  27. package/dist/types/index.d.ts +5 -0
  28. package/dist/types/index.d.ts.map +1 -0
  29. package/dist/types/index.js +5 -0
  30. package/dist/types/index.js.map +1 -0
  31. package/dist/types/interfaces.d.ts +228 -0
  32. package/dist/types/interfaces.d.ts.map +1 -0
  33. package/dist/types/interfaces.js +10 -0
  34. package/dist/types/interfaces.js.map +1 -0
  35. package/dist/utils/chart-extractor.d.ts +44 -0
  36. package/dist/utils/chart-extractor.d.ts.map +1 -0
  37. package/dist/utils/chart-extractor.js +258 -0
  38. package/dist/utils/chart-extractor.js.map +1 -0
  39. package/dist/utils/image-extractor.d.ts +50 -0
  40. package/dist/utils/image-extractor.d.ts.map +1 -0
  41. package/dist/utils/image-extractor.js +136 -0
  42. package/dist/utils/image-extractor.js.map +1 -0
  43. package/dist/utils/layout-parser.d.ts +55 -0
  44. package/dist/utils/layout-parser.d.ts.map +1 -0
  45. package/dist/utils/layout-parser.js +244 -0
  46. package/dist/utils/layout-parser.js.map +1 -0
  47. package/dist/utils/pdf-extractor.d.ts +46 -0
  48. package/dist/utils/pdf-extractor.d.ts.map +1 -0
  49. package/dist/utils/pdf-extractor.js +235 -0
  50. package/dist/utils/pdf-extractor.js.map +1 -0
  51. package/package.json +70 -0
@@ -0,0 +1,228 @@
1
+ import type { Buffer } from 'node:buffer';
2
+ /**
3
+ * Options for document conversion
4
+ */
5
+ export interface ConvertOptions {
6
+ /** Directory to save extracted images. Defaults to 'images' */
7
+ readonly imageDir?: string;
8
+ /** Whether to preserve document layout as much as possible. Defaults to true */
9
+ readonly preserveLayout?: boolean;
10
+ /** Whether to extract charts and convert them to markdown tables. Defaults to true */
11
+ readonly extractCharts?: boolean;
12
+ /** Whether to extract images from documents. Defaults to true */
13
+ readonly extractImages?: boolean;
14
+ /** Maximum number of pages to process for PDFs. Defaults to unlimited */
15
+ readonly maxPages?: number;
16
+ }
17
+ /**
18
+ * Metadata about the converted document
19
+ */
20
+ export interface DocumentMetadata {
21
+ /** Original file type detected */
22
+ readonly fileType: string;
23
+ /** MIME type of the original file */
24
+ readonly mimeType: string;
25
+ /** Number of pages/sheets/slides processed */
26
+ readonly pageCount: number;
27
+ /** Number of images extracted */
28
+ readonly imageCount: number;
29
+ /** Number of charts extracted */
30
+ readonly chartCount: number;
31
+ /** Processing time in milliseconds */
32
+ readonly processingTime: number;
33
+ /** Additional format-specific metadata */
34
+ readonly additional?: Record<string, unknown>;
35
+ }
36
+ /**
37
+ * Information about an extracted image
38
+ */
39
+ export interface ImageData {
40
+ /** Original path/reference in the document */
41
+ readonly originalPath: string;
42
+ /** Path where the image was saved */
43
+ readonly savedPath: string;
44
+ /** Base path for relative references */
45
+ readonly basePath?: string;
46
+ /** Image dimensions if available */
47
+ readonly dimensions?: {
48
+ readonly width: number;
49
+ readonly height: number;
50
+ };
51
+ /** Image format (png, jpg, etc.) */
52
+ readonly format?: string;
53
+ /** Size of the image file in bytes */
54
+ readonly size?: number;
55
+ }
56
+ /**
57
+ * Information about an extracted chart
58
+ */
59
+ export interface ChartData {
60
+ /** Type of chart (bar, line, pie, scatter, etc.) */
61
+ readonly type: ChartType;
62
+ /** Chart title */
63
+ readonly title: string;
64
+ /** Chart data series */
65
+ readonly series: readonly ChartSeries[];
66
+ /** Category labels */
67
+ readonly categories: readonly string[];
68
+ /** Additional chart metadata */
69
+ readonly metadata?: Record<string, unknown>;
70
+ }
71
+ /**
72
+ * Supported chart types
73
+ */
74
+ export type ChartType = 'bar' | 'line' | 'pie' | 'scatter' | 'area' | 'column' | 'unknown';
75
+ /**
76
+ * Data series in a chart
77
+ */
78
+ export interface ChartSeries {
79
+ /** Series name */
80
+ readonly name: string;
81
+ /** Data values */
82
+ readonly values: readonly number[];
83
+ /** Categories for this series (if different from chart categories) */
84
+ readonly categories?: readonly string[];
85
+ }
86
+ /**
87
+ * Cell data in a table
88
+ */
89
+ export interface CellData {
90
+ /** Cell text content */
91
+ text: string;
92
+ /** Whether the cell text is bold */
93
+ bold: boolean;
94
+ /** Whether the cell text is italic */
95
+ italic: boolean;
96
+ /** Text alignment */
97
+ alignment: TextAlignment;
98
+ /** Background color (hex code) */
99
+ backgroundColor?: string;
100
+ /** Number of columns this cell spans */
101
+ colSpan: number;
102
+ /** Number of rows this cell spans */
103
+ rowSpan: number;
104
+ /** Whether this cell is part of a merged cell */
105
+ merged?: boolean;
106
+ }
107
+ /**
108
+ * Row data in a table
109
+ */
110
+ export interface RowData {
111
+ /** Cells in this row */
112
+ cells: CellData[];
113
+ /** Row height if available */
114
+ height?: number;
115
+ }
116
+ /**
117
+ * Table structure
118
+ */
119
+ export interface TableData {
120
+ /** All rows in the table */
121
+ rows: RowData[];
122
+ /** Table caption/title */
123
+ caption?: string;
124
+ /** Table width if available */
125
+ width?: number;
126
+ }
127
+ /**
128
+ * Text alignment options
129
+ */
130
+ export type TextAlignment = 'left' | 'center' | 'right' | 'justify';
131
+ /**
132
+ * Position information for layout elements
133
+ */
134
+ export interface Position {
135
+ /** X coordinate */
136
+ x: number;
137
+ /** Y coordinate */
138
+ y: number;
139
+ /** Width of the element */
140
+ width?: number;
141
+ /** Height of the element */
142
+ height?: number;
143
+ }
144
+ /**
145
+ * Layout element types
146
+ */
147
+ export type ElementType = 'text' | 'image' | 'table' | 'chart' | 'shape' | 'unknown';
148
+ /**
149
+ * Layout element with positioning
150
+ */
151
+ export interface LayoutElement {
152
+ /** Type of element */
153
+ readonly type: ElementType;
154
+ /** Element content */
155
+ readonly content: string | TableData | ImageData | ChartData;
156
+ /** Position of the element */
157
+ readonly position?: Position;
158
+ /** Additional formatting information */
159
+ readonly formatting?: Record<string, unknown>;
160
+ }
161
+ /**
162
+ * List item data
163
+ */
164
+ export interface ListItem {
165
+ /** List item text */
166
+ readonly text: string;
167
+ /** Nesting level (0-based) */
168
+ readonly level: number;
169
+ /** Whether the text is bold */
170
+ readonly bold?: boolean;
171
+ /** Whether the text is italic */
172
+ readonly italic?: boolean;
173
+ /** Child list items */
174
+ readonly children?: readonly ListItem[];
175
+ }
176
+ /**
177
+ * List data structure
178
+ */
179
+ export interface ListData {
180
+ /** Whether this is an ordered (numbered) list */
181
+ readonly isOrdered: boolean;
182
+ /** List items */
183
+ readonly items: readonly ListItem[];
184
+ }
185
+ /**
186
+ * Page information for PDFs
187
+ */
188
+ export interface PageData {
189
+ /** Page number (1-based) */
190
+ readonly pageNumber: number;
191
+ /** Path to the page image */
192
+ readonly imagePath: string;
193
+ /** Full path to the image file */
194
+ readonly fullPath: string;
195
+ /** Page dimensions */
196
+ readonly dimensions?: {
197
+ readonly width: number;
198
+ readonly height: number;
199
+ };
200
+ }
201
+ /**
202
+ * Result of document conversion
203
+ */
204
+ export interface ConversionResult {
205
+ /** Generated markdown content */
206
+ readonly markdown: string;
207
+ /** Extracted images */
208
+ readonly images: readonly ImageData[];
209
+ /** Extracted charts */
210
+ readonly charts: readonly ChartData[];
211
+ /** Document metadata */
212
+ readonly metadata: DocumentMetadata;
213
+ }
214
+ /**
215
+ * Input type for conversion function
216
+ */
217
+ export type ConvertInput = string | Buffer;
218
+ /**
219
+ * Supported MIME types
220
+ */
221
+ export declare const SUPPORTED_MIME_TYPES: {
222
+ readonly PDF: "application/pdf";
223
+ readonly DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
224
+ readonly XLSX: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
225
+ readonly PPTX: "application/vnd.openxmlformats-officedocument.presentationml.presentation";
226
+ };
227
+ export type SupportedMimeType = typeof SUPPORTED_MIME_TYPES[keyof typeof SUPPORTED_MIME_TYPES];
228
+ //# sourceMappingURL=interfaces.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"interfaces.d.ts","sourceRoot":"","sources":["../../src/types/interfaces.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAE1C;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,+DAA+D;IAC/D,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,gFAAgF;IAChF,QAAQ,CAAC,cAAc,CAAC,EAAE,OAAO,CAAC;IAClC,sFAAsF;IACtF,QAAQ,CAAC,aAAa,CAAC,EAAE,OAAO,CAAC;IACjC,iEAAiE;IACjE,QAAQ,CAAC,aAAa,CAAC,EAAE,OAAO,CAAC;IACjC,yEAAyE;IACzE,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,kCAAkC;IAClC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,qCAAqC;IACrC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,8CAA8C;IAC9C,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,iCAAiC;IACjC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,iCAAiC;IACjC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,sCAAsC;IACtC,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,0CAA0C;IAC1C,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAC/C;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,8CAA8C;IAC9C,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,qCAAqC;IACrC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,wCAAwC;IACxC,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,oCAAoC;IACpC,QAAQ,CAAC,UAAU,CAAC,EAAE;QACpB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;QACvB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;KACzB,CAAC;IACF,oCAAoC;IACpC,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,sCAAsC;IACtC,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,oDAAoD;IACpD,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC;IACzB,kBAAkB;IAClB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,wBAAwB;IACxB,QAAQ,CAAC,MAAM,EAAE,SAAS,WAAW,EAAE,CAAC;IACxC,sBAAsB;IACtB,QAAQ,CAAC,UAAU,EAAE,SAAS,MAAM,EAAE,CAAC;IACvC,gCAAgC;IAChC,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAC7C;AAED;;GAEG;AACH,MAAM,MAAM,SAAS,GAAG,KAAK,GAAG,MAAM,GAAG,KAAK,GAAG,SAAS,GAAG,MAAM,GAAG,QAAQ,GAAG,SAAS,CAAC;AAE3F;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,kBAAkB;IAClB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,kBAAkB;IAClB,QAAQ,CAAC,MAAM,EAAE,SAAS,MAAM,EAAE,CAAC;IACnC,sEAAsE;IACtE,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;CACzC;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,wBAAwB;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,oCAAoC;IACpC,IAAI,EAAE,OAAO,CAAC;IACd,sCAAsC;IACtC,MAAM,EAAE,OAAO,CAAC;IAChB,qBAAqB;IACrB,SAAS,EAAE,aAAa,CAAC;IACzB,kCAAkC;IAClC,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,wCAAwC;IACxC,OAAO,EAAE,MAAM,CAAC;IAChB,qCAAqC;IACrC,OAAO,EAAE,MAAM,CAAC;IAChB,iDAAiD;IACjD,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,OAAO;IACtB,wBAAwB;IACxB,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,8BAA8B;IAC9B,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,4BAA4B;IAC5B,IAAI,EAAE,OAAO,EAAE,CAAC;IAChB,0BAA0B;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,+BAA+B;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,MAAM,aAAa,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,SAAS,CAAC;AAEpE;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,mBAAmB;IACnB,CAAC,EAAE,MAAM,CAAC;IACV,mBAAmB;IACnB,CAAC,EAAE,MAAM,CAAC;IACV,2BAA2B;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,4BAA4B;IAC5B,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,OAAO,GAAG,OAAO,GAAG,OAAO,GAAG,OAAO,GAAG,SAAS,CAAC;AAErF;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,sBAAsB;IACtB,QAAQ,CAAC,IAAI,EAAE,WAAW,CAAC;IAC3B,sBAAsB;IACtB,QAAQ,CAAC,OAAO,EAAE,MAAM,GAAG,SAAS,GAAG,SAAS,GAAG,SAAS,CAAC;IAC7D,8BAA8B;IAC9B,QAAQ,CAAC,QAAQ,CAAC,EAAE,QAAQ,CAAC;IAC7B,wCAAwC;IACxC,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAC/C;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,qBAAqB;IACrB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,8BAA8B;IAC9B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,+BAA+B;IAC/B,QAAQ,CAAC,IAAI,CAAC,EAAE,OAAO,CAAC;IACxB,iCAAiC;IACjC,QAAQ,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC;IAC1B,uBAAuB;IACvB,QAAQ,CAAC,QAAQ,CAAC,EAAE,SAAS,QAAQ,EAAE,CAAC;CACzC;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,iDAAiD;IACjD,QAAQ,CAAC,SAAS,EAAE,OAAO,CAAC;IAC5B,iBAAiB;IACjB,QAAQ,CAAC,KAAK,EAAE,SAAS,QAAQ,EAAE,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,4BAA4B;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,6BAA6B;IAC7B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,kCAAkC;IAClC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,sBAAsB;IACtB,QAAQ,CAAC,UAAU,CAAC,EAAE;QACpB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;QACvB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;KACzB,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,iCAAiC;IACjC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,uBAAuB;IACvB,QAAQ,CAAC,MAAM,EAAE,SAAS,SAAS,EAAE,CAAC;IACtC,uBAAuB;IACvB,QAAQ,CAAC,MAAM,EAAE,SAAS,SAAS,EAAE,CAAC;IACtC,wBAAwB;IACxB,QAAQ,CAAC,QAAQ,EAAE,gBAAgB,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,MAAM,GAAG,MAAM,CAAC;AAE3C;;GAEG;AACH,eAAO,MAAM,oBAAoB;;;;;CAKvB,CAAC;AAEX,MAAM,MAAM,iBAAiB,GAAG,OAAO,oBAAoB,CAAC,MAAM,OAAO,oBAAoB,CAAC,CAAC"}
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Supported MIME types
3
+ */
4
+ export const SUPPORTED_MIME_TYPES = {
5
+ PDF: 'application/pdf',
6
+ DOCX: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
7
+ XLSX: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
8
+ PPTX: 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
9
+ };
10
+ //# sourceMappingURL=interfaces.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"interfaces.js","sourceRoot":"","sources":["../../src/types/interfaces.ts"],"names":[],"mappings":"AA4OA;;GAEG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAG;IAClC,GAAG,EAAE,iBAAiB;IACtB,IAAI,EAAE,yEAAyE;IAC/E,IAAI,EAAE,mEAAmE;IACzE,IAAI,EAAE,2EAA2E;CACzE,CAAC"}
@@ -0,0 +1,44 @@
1
+ import type JSZip from 'jszip';
2
+ import type { ChartData } from '../types/interfaces.js';
3
+ import type { ImageExtractor } from './image-extractor.js';
4
+ interface ExtractedChart {
5
+ readonly originalPath: string;
6
+ readonly data: ChartData;
7
+ readonly basePath: string;
8
+ }
9
+ export declare class ChartExtractor {
10
+ private readonly imageExtractor;
11
+ private chartCounter;
12
+ constructor(imageExtractor: ImageExtractor);
13
+ /**
14
+ * Extract charts from a ZIP archive (DOCX, XLSX, PPTX)
15
+ */
16
+ extractChartsFromZip(zip: JSZip, basePath?: string): Promise<readonly ExtractedChart[]>;
17
+ /**
18
+ * Parse a chart XML file
19
+ */
20
+ private parseChart;
21
+ private extractTextFromTitle;
22
+ private extractBarChartData;
23
+ private extractLineChartData;
24
+ private extractPieChartData;
25
+ private extractScatterChartData;
26
+ private extractGenericChartData;
27
+ /**
28
+ * Format chart data as markdown
29
+ */
30
+ formatChartAsMarkdown(chartData: ChartData): string;
31
+ private formatBarLineChart;
32
+ private formatPieChart;
33
+ private formatGenericChart;
34
+ /**
35
+ * Reset internal counters
36
+ */
37
+ reset(): void;
38
+ /**
39
+ * Get current chart counter
40
+ */
41
+ get currentChartCount(): number;
42
+ }
43
+ export {};
44
+ //# sourceMappingURL=chart-extractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chart-extractor.d.ts","sourceRoot":"","sources":["../../src/utils/chart-extractor.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,KAAK,MAAM,OAAO,CAAC;AAE/B,OAAO,KAAK,EAAE,SAAS,EAA0B,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAE3D,UAAU,cAAc;IACtB,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC;IACzB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;CAC3B;AAyDD,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAiB;IAChD,OAAO,CAAC,YAAY,CAAa;gBAErB,cAAc,EAAE,cAAc;IAI1C;;OAEG;IACG,oBAAoB,CAAC,GAAG,EAAE,KAAK,EAAE,QAAQ,GAAE,MAAW,GAAG,OAAO,CAAC,SAAS,cAAc,EAAE,CAAC;IAiCjG;;OAEG;YACW,UAAU;IA8DxB,OAAO,CAAC,oBAAoB;IAoB5B,OAAO,CAAC,mBAAmB;IAI3B,OAAO,CAAC,oBAAoB;IAI5B,OAAO,CAAC,mBAAmB;IAI3B,OAAO,CAAC,uBAAuB;IAI/B,OAAO,CAAC,uBAAuB;IA+C/B;;OAEG;IACH,qBAAqB,CAAC,SAAS,EAAE,SAAS,GAAG,MAAM;IAuBnD,OAAO,CAAC,kBAAkB;IAqC1B,OAAO,CAAC,cAAc;IAqBtB,OAAO,CAAC,kBAAkB;IAgB1B;;OAEG;IACH,KAAK,IAAI,IAAI;IAIb;;OAEG;IACH,IAAI,iBAAiB,IAAI,MAAM,CAE9B;CACF"}
@@ -0,0 +1,258 @@
1
+ import { parseStringPromise } from 'xml2js';
2
+ import { ChartExtractionError } from '../types/errors.js';
3
+ export class ChartExtractor {
4
+ imageExtractor;
5
+ chartCounter = 0;
6
+ constructor(imageExtractor) {
7
+ this.imageExtractor = imageExtractor;
8
+ }
9
+ /**
10
+ * Extract charts from a ZIP archive (DOCX, XLSX, PPTX)
11
+ */
12
+ async extractChartsFromZip(zip, basePath = '') {
13
+ const charts = [];
14
+ zip.forEach((relativePath, file) => {
15
+ // Look for chart files
16
+ if (relativePath.includes('/charts/') && relativePath.endsWith('.xml')) {
17
+ charts.push({
18
+ path: relativePath,
19
+ file: file,
20
+ basePath: basePath
21
+ });
22
+ }
23
+ });
24
+ const extractedCharts = [];
25
+ for (const chart of charts) {
26
+ try {
27
+ const chartData = await this.parseChart(chart.file);
28
+ if (chartData) {
29
+ extractedCharts.push({
30
+ originalPath: chart.path,
31
+ data: chartData,
32
+ basePath: chart.basePath
33
+ });
34
+ }
35
+ }
36
+ catch (error) {
37
+ console.warn(`Failed to extract chart ${chart.path}:`, error instanceof Error ? error.message : 'Unknown error');
38
+ }
39
+ }
40
+ return extractedCharts;
41
+ }
42
+ /**
43
+ * Parse a chart XML file
44
+ */
45
+ async parseChart(chartFile) {
46
+ try {
47
+ const xmlContent = await chartFile.async('string');
48
+ const result = await parseStringPromise(xmlContent);
49
+ const chartData = {
50
+ type: 'unknown',
51
+ title: '',
52
+ series: [],
53
+ categories: []
54
+ };
55
+ // Extract chart type
56
+ if (result['c:chartSpace']) {
57
+ const chart = result['c:chartSpace'][0]['c:chart'][0];
58
+ // Extract title
59
+ if (chart['c:title']?.[0]?.['c:tx']) {
60
+ chartData.title = this.extractTextFromTitle(chart['c:title'][0]['c:tx'][0]);
61
+ }
62
+ // Extract plot area
63
+ if (chart['c:plotArea']) {
64
+ const plotArea = chart['c:plotArea'][0];
65
+ // Determine chart type and extract data
66
+ if (plotArea['c:barChart']) {
67
+ chartData.type = 'bar';
68
+ const { series, categories } = this.extractBarChartData(plotArea['c:barChart'][0]);
69
+ chartData.series = series;
70
+ chartData.categories = categories;
71
+ }
72
+ else if (plotArea['c:lineChart']) {
73
+ chartData.type = 'line';
74
+ const { series, categories } = this.extractLineChartData(plotArea['c:lineChart'][0]);
75
+ chartData.series = series;
76
+ chartData.categories = categories;
77
+ }
78
+ else if (plotArea['c:pieChart']) {
79
+ chartData.type = 'pie';
80
+ const { series, categories } = this.extractPieChartData(plotArea['c:pieChart'][0]);
81
+ chartData.series = series;
82
+ chartData.categories = categories;
83
+ }
84
+ else if (plotArea['c:scatterChart']) {
85
+ chartData.type = 'scatter';
86
+ const { series, categories } = this.extractScatterChartData(plotArea['c:scatterChart'][0]);
87
+ chartData.series = series;
88
+ chartData.categories = categories;
89
+ }
90
+ }
91
+ }
92
+ return chartData;
93
+ }
94
+ catch (error) {
95
+ const message = error instanceof Error ? error.message : 'Unknown error';
96
+ throw new ChartExtractionError(`Failed to parse chart: ${message}`, error);
97
+ }
98
+ }
99
+ extractTextFromTitle(titleData) {
100
+ // Simplified title extraction - in a real implementation, this would need more robust typing
101
+ try {
102
+ const title = titleData;
103
+ if (title?.['c:rich']?.[0]?.['a:p']) {
104
+ const paragraphs = title['c:rich'][0]['a:p'];
105
+ let titleText = '';
106
+ for (const para of paragraphs) {
107
+ if (para?.['a:r']?.[0]?.['a:t']?.[0]) {
108
+ titleText += para['a:r'][0]['a:t'][0] + ' ';
109
+ }
110
+ }
111
+ return titleText.trim();
112
+ }
113
+ }
114
+ catch {
115
+ // Ignore parsing errors for title
116
+ }
117
+ return '';
118
+ }
119
+ extractBarChartData(barChart) {
120
+ return this.extractGenericChartData(barChart);
121
+ }
122
+ extractLineChartData(lineChart) {
123
+ return this.extractGenericChartData(lineChart);
124
+ }
125
+ extractPieChartData(pieChart) {
126
+ return this.extractGenericChartData(pieChart);
127
+ }
128
+ extractScatterChartData(scatterChart) {
129
+ return this.extractGenericChartData(scatterChart);
130
+ }
131
+ extractGenericChartData(chartData) {
132
+ const series = [];
133
+ let allCategories = [];
134
+ if (chartData['c:ser']) {
135
+ for (const seriesData of chartData['c:ser']) {
136
+ const seriesInfo = {
137
+ name: '',
138
+ values: [],
139
+ categories: undefined
140
+ };
141
+ // Extract series name
142
+ if (seriesData['c:tx']?.[0]?.['c:strRef']?.[0]?.['c:strCache']?.[0]?.['c:pt']?.[0]) {
143
+ seriesInfo.name = seriesData['c:tx'][0]['c:strRef'][0]['c:strCache'][0]['c:pt'][0]['c:v'][0];
144
+ }
145
+ // Extract values
146
+ if (seriesData['c:val']?.[0]?.['c:numRef']?.[0]?.['c:numCache']?.[0]?.['c:pt']) {
147
+ for (const pt of seriesData['c:val'][0]['c:numRef'][0]['c:numCache'][0]['c:pt']) {
148
+ seriesInfo.values.push(parseFloat(pt['c:v'][0]) || 0);
149
+ }
150
+ }
151
+ // Extract categories for this series
152
+ if (seriesData['c:cat']?.[0]?.['c:strRef']?.[0]?.['c:strCache']?.[0]?.['c:pt']) {
153
+ const categories = [];
154
+ for (const pt of seriesData['c:cat'][0]['c:strRef'][0]['c:strCache'][0]['c:pt']) {
155
+ categories.push(pt['c:v'][0]);
156
+ }
157
+ seriesInfo.categories = categories;
158
+ if (allCategories.length === 0) {
159
+ allCategories = categories;
160
+ }
161
+ }
162
+ series.push(seriesInfo);
163
+ }
164
+ }
165
+ return { series, categories: allCategories };
166
+ }
167
+ /**
168
+ * Format chart data as markdown
169
+ */
170
+ formatChartAsMarkdown(chartData) {
171
+ this.chartCounter++;
172
+ let markdown = `#### Chart ${this.chartCounter}: ${chartData.title || chartData.type.toUpperCase() + ' Chart'}\n\n`;
173
+ if (chartData.series.length === 0) {
174
+ return markdown + '*No chart data available*\n\n';
175
+ }
176
+ switch (chartData.type) {
177
+ case 'bar':
178
+ case 'line':
179
+ markdown += this.formatBarLineChart(chartData);
180
+ break;
181
+ case 'pie':
182
+ markdown += this.formatPieChart(chartData);
183
+ break;
184
+ default:
185
+ markdown += this.formatGenericChart(chartData);
186
+ }
187
+ return markdown + '\n';
188
+ }
189
+ formatBarLineChart(chartData) {
190
+ let markdown = '| Category |';
191
+ // Add series headers
192
+ for (const series of chartData.series) {
193
+ markdown += ` ${series.name || 'Series'} |`;
194
+ }
195
+ markdown += '\n';
196
+ // Add separator
197
+ markdown += '| --- |';
198
+ for (let i = 0; i < chartData.series.length; i++) {
199
+ markdown += ' --- |';
200
+ }
201
+ markdown += '\n';
202
+ // Find maximum number of categories
203
+ const maxCategories = Math.max(chartData.categories.length, ...chartData.series.map(s => s.categories?.length || 0));
204
+ // Add data rows
205
+ for (let i = 0; i < maxCategories; i++) {
206
+ const category = chartData.categories[i] || chartData.series[0]?.categories?.[i] || `Item ${i + 1}`;
207
+ markdown += `| ${category} |`;
208
+ for (const series of chartData.series) {
209
+ const value = series.values[i] || 0;
210
+ markdown += ` ${value} |`;
211
+ }
212
+ markdown += '\n';
213
+ }
214
+ return markdown;
215
+ }
216
+ formatPieChart(chartData) {
217
+ const series = chartData.series[0];
218
+ if (!series)
219
+ return '*No pie chart data*\n';
220
+ let markdown = '| Category | Value | Percentage |\n';
221
+ markdown += '| --- | --- | --- |\n';
222
+ const total = series.values.reduce((sum, val) => sum + val, 0);
223
+ const categories = series.categories || chartData.categories;
224
+ for (let i = 0; i < Math.min(categories.length, series.values.length); i++) {
225
+ const category = categories[i];
226
+ const value = series.values[i] || 0;
227
+ const percentage = total > 0 ? ((value / total) * 100).toFixed(1) : '0';
228
+ markdown += `| ${category} | ${value} | ${percentage}% |\n`;
229
+ }
230
+ return markdown;
231
+ }
232
+ formatGenericChart(chartData) {
233
+ let markdown = `*${chartData.type.toUpperCase()} chart with ${chartData.series.length} series*\n\n`;
234
+ for (let i = 0; i < chartData.series.length; i++) {
235
+ const series = chartData.series[i];
236
+ markdown += `**Series ${i + 1}: ${series.name}**\n`;
237
+ markdown += `Values: ${series.values.join(', ')}\n`;
238
+ if (series.categories && series.categories.length > 0) {
239
+ markdown += `Categories: ${series.categories.join(', ')}\n`;
240
+ }
241
+ markdown += '\n';
242
+ }
243
+ return markdown;
244
+ }
245
+ /**
246
+ * Reset internal counters
247
+ */
248
+ reset() {
249
+ this.chartCounter = 0;
250
+ }
251
+ /**
252
+ * Get current chart counter
253
+ */
254
+ get currentChartCount() {
255
+ return this.chartCounter;
256
+ }
257
+ }
258
+ //# sourceMappingURL=chart-extractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chart-extractor.js","sourceRoot":"","sources":["../../src/utils/chart-extractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,QAAQ,CAAC;AAI5C,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAgE1D,MAAM,OAAO,cAAc;IACR,cAAc,CAAiB;IACxC,YAAY,GAAW,CAAC,CAAC;IAEjC,YAAY,cAA8B;QACxC,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;IACvC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,oBAAoB,CAAC,GAAU,EAAE,WAAmB,EAAE;QAC1D,MAAM,MAAM,GAAoB,EAAE,CAAC;QAEnC,GAAG,CAAC,OAAO,CAAC,CAAC,YAAY,EAAE,IAAI,EAAE,EAAE;YACjC,uBAAuB;YACvB,IAAI,YAAY,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;gBACvE,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,YAAY;oBAClB,IAAI,EAAE,IAAI;oBACV,QAAQ,EAAE,QAAQ;iBACnB,CAAC,CAAC;YACL,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,MAAM,eAAe,GAAqB,EAAE,CAAC;QAC7C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,IAAI,CAAC;gBACH,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACpD,IAAI,SAAS,EAAE,CAAC;oBACd,eAAe,CAAC,IAAI,CAAC;wBACnB,YAAY,EAAE,KAAK,CAAC,IAAI;wBACxB,IAAI,EAAE,SAAS;wBACf,QAAQ,EAAE,KAAK,CAAC,QAAQ;qBACzB,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAAC,OAAO,KAAc,EAAE,CAAC;gBACxB,OAAO,CAAC,IAAI,CAAC,2BAA2B,KAAK,CAAC,IAAI,GAAG,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;YACnH,CAAC;QACH,CAAC;QAED,OAAO,eAAe,CAAC;IACzB,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,UAAU,CAAC,SAA4B;QACnD,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,MAAM,SAAS,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YACnD,MAAM,MAAM,GAAG,MAAM,kBAAkB,CAAC,UAAU,CAAmB,CAAC;YAEtE,MAAM,SAAS,GAKX;gBACF,IAAI,EAAE,SAAS;gBACf,KAAK,EAAE,EAAE;gBACT,MAAM,EAAE,EAAE;gBACV,UAAU,EAAE,EAAE;aACf,CAAC;YAEF,qBAAqB;YACrB,IAAI,MAAM,CAAC,cAAc,CAAC,EAAE,CAAC;gBAC3B,MAAM,KAAK,GAAG,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;gBAEtD,gBAAgB;gBAChB,IAAI,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC;oBACpC,SAAS,CAAC,KAAK,GAAG,IAAI,CAAC,oBAAoB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC9E,CAAC;gBAED,oBAAoB;gBACpB,IAAI,KAAK,CAAC,YAAY,CAAC,EAAE,CAAC;oBACxB,MAAM,QAAQ,GAAG,KAAK,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC;oBAExC,wCAAwC;oBACxC,IAAI,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;wBAC3B,SAAS,CAAC,IAAI,GAAG,KAAK,CAAC;wBACvB,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC,mBAAmB,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,CAAC,CAAoB,CAAC,CAAC;wBACtG,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC;wBAC1B,SAAS,CAAC,UAAU,GAAG,UAAU,CAAC;oBACpC,CAAC;yBAAM,IAAI,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;wBACnC,SAAS,CAAC,IAAI,GAAG,MAAM,CAAC;wBACxB,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC,oBAAoB,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC,CAAoB,CAAC,CAAC;wBACxG,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC;wBAC1B,SAAS,CAAC,UAAU,GAAG,UAAU,CAAC;oBACpC,CAAC;yBAAM,IAAI,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;wBAClC,SAAS,CAAC,IAAI,GAAG,KAAK,CAAC;wBACvB,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC,mBAAmB,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,CAAC,CAAoB,CAAC,CAAC;wBACtG,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC;wBAC1B,SAAS,CAAC,UAAU,GAAG,UAAU,CAAC;oBACpC,CAAC;yBAAM,IAAI,QAAQ,CAAC,gBAAgB,CAAC,EAAE,CAAC;wBACtC,SAAS,CAAC,IAAI,GAAG,SAAS,CAAC;wBAC3B,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC,uBAAuB,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAoB,CAAC,CAAC;wBAC9G,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC;wBAC1B,SAAS,CAAC,UAAU,GAAG,UAAU,CAAC;oBACpC,CAAC;gBACH,CAAC;YACH,CAAC;YAED,OAAO,SAAS,CAAC;QACnB,CAAC;QAAC,OAAO,KAAc,EAAE,CAAC;YACxB,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YACzE,MAAM,IAAI,oBAAoB,CAAC,0BAA0B,OAAO,EAAE,EAAE,KAAc,CAAC,CAAC;QACtF,CAAC;IACH,CAAC;IAEO,oBAAoB,CAAC,SAAkB;QAC7C,6FAA6F;QAC7F,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,SAAgB,CAAC;YAC/B,IAAI,KAAK,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC;gBACpC,MAAM,UAAU,GAAG,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;gBAC7C,IAAI,SAAS,GAAG,EAAE,CAAC;gBACnB,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;oBAC9B,IAAI,IAAI,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;wBACrC,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC;oBAC9C,CAAC;gBACH,CAAC;gBACD,OAAO,SAAS,CAAC,IAAI,EAAE,CAAC;YAC1B,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,kCAAkC;QACpC,CAAC;QACD,OAAO,EAAE,CAAC;IACZ,CAAC;IAEO,mBAAmB,CAAC,QAAyB;QACnD,OAAO,IAAI,CAAC,uBAAuB,CAAC,QAAQ,CAAC,CAAC;IAChD,CAAC;IAEO,oBAAoB,CAAC,SAA0B;QACrD,OAAO,IAAI,CAAC,uBAAuB,CAAC,SAAS,CAAC,CAAC;IACjD,CAAC;IAEO,mBAAmB,CAAC,QAAyB;QACnD,OAAO,IAAI,CAAC,uBAAuB,CAAC,QAAQ,CAAC,CAAC;IAChD,CAAC;IAEO,uBAAuB,CAAC,YAA6B;QAC3D,OAAO,IAAI,CAAC,uBAAuB,CAAC,YAAY,CAAC,CAAC;IACpD,CAAC;IAEO,uBAAuB,CAAC,SAA0B;QACxD,MAAM,MAAM,GAAkB,EAAE,CAAC;QACjC,IAAI,aAAa,GAAa,EAAE,CAAC;QAEjC,IAAI,SAAS,CAAC,OAAO,CAAC,EAAE,CAAC;YACvB,KAAK,MAAM,UAAU,IAAI,SAAS,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC5C,MAAM,UAAU,GAIZ;oBACF,IAAI,EAAE,EAAE;oBACR,MAAM,EAAE,EAAE;oBACV,UAAU,EAAE,SAAS;iBACtB,CAAC;gBAEF,sBAAsB;gBACtB,IAAI,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnF,UAAU,CAAC,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC/F,CAAC;gBAED,iBAAiB;gBACjB,IAAI,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC;oBAC/E,KAAK,MAAM,EAAE,IAAI,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;wBAChF,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;oBACxD,CAAC;gBACH,CAAC;gBAED,qCAAqC;gBACrC,IAAI,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC;oBAC/E,MAAM,UAAU,GAAa,EAAE,CAAC;oBAChC,KAAK,MAAM,EAAE,IAAI,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;wBAChF,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;oBAChC,CAAC;oBACD,UAAU,CAAC,UAAU,GAAG,UAAU,CAAC;oBACnC,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;wBAC/B,aAAa,GAAG,UAAU,CAAC;oBAC7B,CAAC;gBACH,CAAC;gBAED,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QAED,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,aAAa,EAAE,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,qBAAqB,CAAC,SAAoB;QACxC,IAAI,CAAC,YAAY,EAAE,CAAC;QACpB,IAAI,QAAQ,GAAG,cAAc,IAAI,CAAC,YAAY,KAAK,SAAS,CAAC,KAAK,IAAI,SAAS,CAAC,IAAI,CAAC,WAAW,EAAE,GAAG,QAAQ,MAAM,CAAC;QAEpH,IAAI,SAAS,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClC,OAAO,QAAQ,GAAG,+BAA+B,CAAC;QACpD,CAAC;QAED,QAAQ,SAAS,CAAC,IAAI,EAAE,CAAC;YACvB,KAAK,KAAK,CAAC;YACX,KAAK,MAAM;gBACT,QAAQ,IAAI,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,CAAC;gBAC/C,MAAM;YACR,KAAK,KAAK;gBACR,QAAQ,IAAI,IAAI,CAAC,cAAc,CAAC,SAAS,CAAC,CAAC;gBAC3C,MAAM;YACR;gBACE,QAAQ,IAAI,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,CAAC;QACnD,CAAC;QAED,OAAO,QAAQ,GAAG,IAAI,CAAC;IACzB,CAAC;IAEO,kBAAkB,CAAC,SAAoB;QAC7C,IAAI,QAAQ,GAAG,cAAc,CAAC;QAE9B,qBAAqB;QACrB,KAAK,MAAM,MAAM,IAAI,SAAS,CAAC,MAAM,EAAE,CAAC;YACtC,QAAQ,IAAI,IAAI,MAAM,CAAC,IAAI,IAAI,QAAQ,IAAI,CAAC;QAC9C,CAAC;QACD,QAAQ,IAAI,IAAI,CAAC;QAEjB,gBAAgB;QAChB,QAAQ,IAAI,SAAS,CAAC;QACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjD,QAAQ,IAAI,QAAQ,CAAC;QACvB,CAAC;QACD,QAAQ,IAAI,IAAI,CAAC;QAEjB,oCAAoC;QACpC,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAC5B,SAAS,CAAC,UAAU,CAAC,MAAM,EAC3B,GAAG,SAAS,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC,CAAC,CACxD,CAAC;QAEF,gBAAgB;QAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,QAAQ,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YACpG,QAAQ,IAAI,KAAK,QAAQ,IAAI,CAAC;YAE9B,KAAK,MAAM,MAAM,IAAI,SAAS,CAAC,MAAM,EAAE,CAAC;gBACtC,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gBACpC,QAAQ,IAAI,IAAI,KAAK,IAAI,CAAC;YAC5B,CAAC;YACD,QAAQ,IAAI,IAAI,CAAC;QACnB,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAEO,cAAc,CAAC,SAAoB;QACzC,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QACnC,IAAI,CAAC,MAAM;YAAE,OAAO,uBAAuB,CAAC;QAE5C,IAAI,QAAQ,GAAG,qCAAqC,CAAC;QACrD,QAAQ,IAAI,uBAAuB,CAAC;QAEpC,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC;QAC/D,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,IAAI,SAAS,CAAC,UAAU,CAAC;QAE7D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3E,MAAM,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YACpC,MAAM,UAAU,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;YAExE,QAAQ,IAAI,KAAK,QAAQ,MAAM,KAAK,MAAM,UAAU,OAAO,CAAC;QAC9D,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAEO,kBAAkB,CAAC,SAAoB;QAC7C,IAAI,QAAQ,GAAG,IAAI,SAAS,CAAC,IAAI,CAAC,WAAW,EAAE,eAAe,SAAS,CAAC,MAAM,CAAC,MAAM,cAAc,CAAC;QAEpG,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjD,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;YACnC,QAAQ,IAAI,YAAY,CAAC,GAAG,CAAC,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC;YACpD,QAAQ,IAAI,WAAW,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;YACpD,IAAI,MAAM,CAAC,UAAU,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACtD,QAAQ,IAAI,eAAe,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;YAC9D,CAAC;YACD,QAAQ,IAAI,IAAI,CAAC;QACnB,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,IAAI,iBAAiB;QACnB,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;CACF"}
@@ -0,0 +1,50 @@
1
+ import type JSZip from 'jszip';
2
+ import type { Buffer } from 'node:buffer';
3
+ import type { ImageData } from '../types/interfaces.js';
4
+ export declare class ImageExtractor {
5
+ private readonly outputDir;
6
+ private imageCounter;
7
+ private readonly extractedImages;
8
+ constructor(outputDir?: string);
9
+ /**
10
+ * Extract images from a ZIP archive (DOCX, XLSX, PPTX)
11
+ */
12
+ extractImagesFromZip(zip: JSZip, basePath?: string): Promise<readonly ImageData[]>;
13
+ /**
14
+ * Save an image buffer to disk
15
+ */
16
+ saveImage(buffer: Buffer, originalPath: string, basePath?: string): Promise<string | null>;
17
+ /**
18
+ * Check if a file path represents an image
19
+ */
20
+ isImageFile(filePath: string): boolean;
21
+ /**
22
+ * Get image format from file extension
23
+ */
24
+ private getImageFormat;
25
+ /**
26
+ * Get markdown reference for an image by its original path
27
+ */
28
+ getImageReference(originalPath: string, basePath?: string): string | null;
29
+ /**
30
+ * Create markdown image reference
31
+ */
32
+ getImageMarkdown(description?: string, imagePath?: string): string;
33
+ /**
34
+ * Reset the image counter and clear extracted images map
35
+ */
36
+ reset(): void;
37
+ /**
38
+ * Get the output directory for images
39
+ */
40
+ get imageDirectory(): string;
41
+ /**
42
+ * Get the current image counter
43
+ */
44
+ get currentImageCount(): number;
45
+ /**
46
+ * Get all extracted image mappings
47
+ */
48
+ get extractedImageMappings(): ReadonlyMap<string, string>;
49
+ }
50
+ //# sourceMappingURL=image-extractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image-extractor.d.ts","sourceRoot":"","sources":["../../src/utils/image-extractor.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,KAAK,MAAM,OAAO,CAAC;AAC/B,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAE1C,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAC;AAGxD,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,YAAY,CAAa;IACjC,OAAO,CAAC,QAAQ,CAAC,eAAe,CAA6B;gBAEjD,SAAS,GAAE,MAAiB;IASxC;;OAEG;IACG,oBAAoB,CAAC,GAAG,EAAE,KAAK,EAAE,QAAQ,GAAE,MAAW,GAAG,OAAO,CAAC,SAAS,SAAS,EAAE,CAAC;IAwC5F;;OAEG;IACG,SAAS,CAAC,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,EAAE,QAAQ,GAAE,MAAW,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;IAoBpG;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO;IAUtC;;OAEG;IACH,OAAO,CAAC,cAAc;IAKtB;;OAEG;IACH,iBAAiB,CAAC,YAAY,EAAE,MAAM,EAAE,QAAQ,GAAE,MAAW,GAAG,MAAM,GAAG,IAAI;IAS7E;;OAEG;IACH,gBAAgB,CAAC,WAAW,GAAE,MAAgB,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,MAAM;IAO3E;;OAEG;IACH,KAAK,IAAI,IAAI;IAKb;;OAEG;IACH,IAAI,cAAc,IAAI,MAAM,CAE3B;IAED;;OAEG;IACH,IAAI,iBAAiB,IAAI,MAAM,CAE9B;IAED;;OAEG;IACH,IAAI,sBAAsB,IAAI,WAAW,CAAC,MAAM,EAAE,MAAM,CAAC,CAExD;CACF"}